{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 313052, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.388714973870156e-05, "grad_norm": 0.3715410828590393, "learning_rate": 9.999999974822838e-05, "loss": 1.0516, "step": 10 }, { "epoch": 0.00012777429947740312, "grad_norm": 0.5814082026481628, "learning_rate": 9.99999989929135e-05, "loss": 1.1442, "step": 20 }, { "epoch": 0.00019166144921610467, "grad_norm": 0.5080631375312805, "learning_rate": 9.999999773405537e-05, "loss": 0.786, "step": 30 }, { "epoch": 0.00025554859895480624, "grad_norm": 0.9986467361450195, "learning_rate": 9.999999636441773e-05, "loss": 0.9332, "step": 40 }, { "epoch": 0.00031943574869350776, "grad_norm": 0.9622060060501099, "learning_rate": 9.99999941991818e-05, "loss": 1.0711, "step": 50 }, { "epoch": 0.00038332289843220934, "grad_norm": 0.7242945432662964, "learning_rate": 9.999999153040267e-05, "loss": 1.0533, "step": 60 }, { "epoch": 0.0004472100481709109, "grad_norm": 0.6160323619842529, "learning_rate": 9.999998835808037e-05, "loss": 1.0065, "step": 70 }, { "epoch": 0.0005110971979096125, "grad_norm": 1.1578069925308228, "learning_rate": 9.999998468221492e-05, "loss": 1.0088, "step": 80 }, { "epoch": 0.0005749843476483141, "grad_norm": 0.7251982092857361, "learning_rate": 9.999998050280638e-05, "loss": 0.9858, "step": 90 }, { "epoch": 0.0006388714973870155, "grad_norm": 0.9950217604637146, "learning_rate": 9.999997581985477e-05, "loss": 1.0171, "step": 100 }, { "epoch": 0.0007027586471257171, "grad_norm": 1.055307388305664, "learning_rate": 9.999997063336015e-05, "loss": 0.993, "step": 110 }, { "epoch": 0.0007666457968644187, "grad_norm": 0.8479915857315063, "learning_rate": 9.999996494332258e-05, "loss": 0.9431, "step": 120 }, { "epoch": 0.0008305329466031203, "grad_norm": 0.8658767938613892, "learning_rate": 9.999995874974209e-05, "loss": 0.8251, "step": 130 }, { "epoch": 0.0008944200963418218, "grad_norm": 2.8936755657196045, "learning_rate": 9.999995205261878e-05, "loss": 1.2926, "step": 140 }, { "epoch": 0.0009583072460805234, "grad_norm": 0.5815268158912659, "learning_rate": 9.999994485195268e-05, "loss": 1.0419, "step": 150 }, { "epoch": 0.001022194395819225, "grad_norm": 1.1052826642990112, "learning_rate": 9.999993714774389e-05, "loss": 1.1736, "step": 160 }, { "epoch": 0.0010860815455579266, "grad_norm": 1.4575506448745728, "learning_rate": 9.999992893999246e-05, "loss": 1.129, "step": 170 }, { "epoch": 0.0011499686952966281, "grad_norm": 0.6741073131561279, "learning_rate": 9.99999202286985e-05, "loss": 1.0822, "step": 180 }, { "epoch": 0.0012138558450353297, "grad_norm": 0.7011894583702087, "learning_rate": 9.99999110138621e-05, "loss": 0.8165, "step": 190 }, { "epoch": 0.001277742994774031, "grad_norm": 0.812254011631012, "learning_rate": 9.999990129548333e-05, "loss": 0.901, "step": 200 }, { "epoch": 0.0013416301445127326, "grad_norm": 2.268691301345825, "learning_rate": 9.99998910735623e-05, "loss": 1.0642, "step": 210 }, { "epoch": 0.0014055172942514342, "grad_norm": 1.1439971923828125, "learning_rate": 9.999988034809911e-05, "loss": 1.0481, "step": 220 }, { "epoch": 0.0014694044439901358, "grad_norm": 0.953459620475769, "learning_rate": 9.999986911909385e-05, "loss": 0.9192, "step": 230 }, { "epoch": 0.0015332915937288374, "grad_norm": 1.2826868295669556, "learning_rate": 9.999985738654666e-05, "loss": 0.9682, "step": 240 }, { "epoch": 0.001597178743467539, "grad_norm": 0.9371192455291748, "learning_rate": 9.999984515045768e-05, "loss": 1.0604, "step": 250 }, { "epoch": 0.0016610658932062405, "grad_norm": 1.085310459136963, "learning_rate": 9.999983241082698e-05, "loss": 0.9281, "step": 260 }, { "epoch": 0.001724953042944942, "grad_norm": 0.906535267829895, "learning_rate": 9.99998191676547e-05, "loss": 0.9776, "step": 270 }, { "epoch": 0.0017888401926836437, "grad_norm": 1.267155647277832, "learning_rate": 9.9999805420941e-05, "loss": 1.0792, "step": 280 }, { "epoch": 0.0018527273424223452, "grad_norm": 0.6740846037864685, "learning_rate": 9.9999791170686e-05, "loss": 0.8078, "step": 290 }, { "epoch": 0.0019166144921610468, "grad_norm": 1.0244088172912598, "learning_rate": 9.999977641688985e-05, "loss": 0.8445, "step": 300 }, { "epoch": 0.001980501641899748, "grad_norm": 1.1094986200332642, "learning_rate": 9.99997611595527e-05, "loss": 0.9578, "step": 310 }, { "epoch": 0.00204438879163845, "grad_norm": 1.1098616123199463, "learning_rate": 9.99997453986747e-05, "loss": 0.968, "step": 320 }, { "epoch": 0.0021082759413771513, "grad_norm": 0.9596286416053772, "learning_rate": 9.9999729134256e-05, "loss": 1.0052, "step": 330 }, { "epoch": 0.002172163091115853, "grad_norm": 1.476741075515747, "learning_rate": 9.999971236629676e-05, "loss": 1.0505, "step": 340 }, { "epoch": 0.0022360502408545545, "grad_norm": 1.221603274345398, "learning_rate": 9.999969509479718e-05, "loss": 0.7985, "step": 350 }, { "epoch": 0.0022999373905932563, "grad_norm": 1.42596435546875, "learning_rate": 9.99996773197574e-05, "loss": 1.0444, "step": 360 }, { "epoch": 0.0023638245403319576, "grad_norm": 2.004958152770996, "learning_rate": 9.999965904117762e-05, "loss": 0.9776, "step": 370 }, { "epoch": 0.0024277116900706594, "grad_norm": 0.6965352892875671, "learning_rate": 9.999964025905801e-05, "loss": 0.8532, "step": 380 }, { "epoch": 0.0024915988398093608, "grad_norm": 0.995827853679657, "learning_rate": 9.999962097339879e-05, "loss": 1.1039, "step": 390 }, { "epoch": 0.002555485989548062, "grad_norm": 4.0594635009765625, "learning_rate": 9.999960118420011e-05, "loss": 0.954, "step": 400 }, { "epoch": 0.002619373139286764, "grad_norm": 1.0161226987838745, "learning_rate": 9.99995808914622e-05, "loss": 0.9513, "step": 410 }, { "epoch": 0.0026832602890254653, "grad_norm": 1.1862547397613525, "learning_rate": 9.999956009518525e-05, "loss": 0.8693, "step": 420 }, { "epoch": 0.002747147438764167, "grad_norm": 1.0898480415344238, "learning_rate": 9.999953879536947e-05, "loss": 1.0837, "step": 430 }, { "epoch": 0.0028110345885028684, "grad_norm": 0.5398057103157043, "learning_rate": 9.999951699201509e-05, "loss": 0.8603, "step": 440 }, { "epoch": 0.00287492173824157, "grad_norm": 0.825309157371521, "learning_rate": 9.999949468512231e-05, "loss": 1.1857, "step": 450 }, { "epoch": 0.0029388088879802716, "grad_norm": 1.3066387176513672, "learning_rate": 9.999947187469137e-05, "loss": 0.918, "step": 460 }, { "epoch": 0.0030026960377189734, "grad_norm": 1.3756886720657349, "learning_rate": 9.999944856072248e-05, "loss": 0.9851, "step": 470 }, { "epoch": 0.0030665831874576747, "grad_norm": 1.39496648311615, "learning_rate": 9.99994247432159e-05, "loss": 0.9551, "step": 480 }, { "epoch": 0.0031304703371963765, "grad_norm": 1.3940093517303467, "learning_rate": 9.999940042217184e-05, "loss": 0.9794, "step": 490 }, { "epoch": 0.003194357486935078, "grad_norm": 0.708533763885498, "learning_rate": 9.999937559759059e-05, "loss": 0.8929, "step": 500 }, { "epoch": 0.0032582446366737792, "grad_norm": 1.2305490970611572, "learning_rate": 9.999935026947235e-05, "loss": 0.8819, "step": 510 }, { "epoch": 0.003322131786412481, "grad_norm": 1.3446779251098633, "learning_rate": 9.99993244378174e-05, "loss": 1.5002, "step": 520 }, { "epoch": 0.0033860189361511824, "grad_norm": 1.0329649448394775, "learning_rate": 9.9999298102626e-05, "loss": 0.9671, "step": 530 }, { "epoch": 0.003449906085889884, "grad_norm": 1.0138870477676392, "learning_rate": 9.99992712638984e-05, "loss": 0.9673, "step": 540 }, { "epoch": 0.0035137932356285855, "grad_norm": 0.76459139585495, "learning_rate": 9.999924392163491e-05, "loss": 1.1123, "step": 550 }, { "epoch": 0.0035776803853672873, "grad_norm": 1.9452675580978394, "learning_rate": 9.999921607583576e-05, "loss": 0.8708, "step": 560 }, { "epoch": 0.0036415675351059887, "grad_norm": 0.7392802834510803, "learning_rate": 9.999918772650126e-05, "loss": 1.0164, "step": 570 }, { "epoch": 0.0037054546848446905, "grad_norm": 1.3913438320159912, "learning_rate": 9.999915887363167e-05, "loss": 1.0721, "step": 580 }, { "epoch": 0.003769341834583392, "grad_norm": 0.5592684745788574, "learning_rate": 9.99991295172273e-05, "loss": 1.0308, "step": 590 }, { "epoch": 0.0038332289843220936, "grad_norm": 1.1413140296936035, "learning_rate": 9.999909965728845e-05, "loss": 0.8317, "step": 600 }, { "epoch": 0.003897116134060795, "grad_norm": 1.4501404762268066, "learning_rate": 9.99990692938154e-05, "loss": 0.8625, "step": 610 }, { "epoch": 0.003961003283799496, "grad_norm": 0.7926478981971741, "learning_rate": 9.999903842680846e-05, "loss": 1.0851, "step": 620 }, { "epoch": 0.004024890433538198, "grad_norm": 2.0299394130706787, "learning_rate": 9.999900705626797e-05, "loss": 0.8804, "step": 630 }, { "epoch": 0.0040887775832769, "grad_norm": 0.8396856188774109, "learning_rate": 9.99989751821942e-05, "loss": 0.9552, "step": 640 }, { "epoch": 0.004152664733015601, "grad_norm": 1.275235891342163, "learning_rate": 9.999894280458752e-05, "loss": 0.9358, "step": 650 }, { "epoch": 0.004216551882754303, "grad_norm": 0.8721204400062561, "learning_rate": 9.999890992344821e-05, "loss": 0.8874, "step": 660 }, { "epoch": 0.004280439032493004, "grad_norm": 0.6353357434272766, "learning_rate": 9.999887653877663e-05, "loss": 1.1176, "step": 670 }, { "epoch": 0.004344326182231706, "grad_norm": 1.0310698747634888, "learning_rate": 9.999884265057311e-05, "loss": 0.9272, "step": 680 }, { "epoch": 0.004408213331970407, "grad_norm": 0.8742356896400452, "learning_rate": 9.999880825883798e-05, "loss": 0.9773, "step": 690 }, { "epoch": 0.004472100481709109, "grad_norm": 0.9229012131690979, "learning_rate": 9.99987733635716e-05, "loss": 1.0118, "step": 700 }, { "epoch": 0.004535987631447811, "grad_norm": 1.0641270875930786, "learning_rate": 9.999873796477433e-05, "loss": 0.9236, "step": 710 }, { "epoch": 0.0045998747811865125, "grad_norm": 1.2784768342971802, "learning_rate": 9.99987020624465e-05, "loss": 1.3926, "step": 720 }, { "epoch": 0.004663761930925213, "grad_norm": 0.919906497001648, "learning_rate": 9.999866565658848e-05, "loss": 0.9255, "step": 730 }, { "epoch": 0.004727649080663915, "grad_norm": 1.3947570323944092, "learning_rate": 9.999862874720065e-05, "loss": 0.9953, "step": 740 }, { "epoch": 0.004791536230402617, "grad_norm": 1.0191991329193115, "learning_rate": 9.999859133428338e-05, "loss": 0.9042, "step": 750 }, { "epoch": 0.004855423380141319, "grad_norm": 5.101995944976807, "learning_rate": 9.999855341783703e-05, "loss": 1.0483, "step": 760 }, { "epoch": 0.00491931052988002, "grad_norm": 1.350167155265808, "learning_rate": 9.9998514997862e-05, "loss": 1.0955, "step": 770 }, { "epoch": 0.0049831976796187215, "grad_norm": 2.278700351715088, "learning_rate": 9.999847607435866e-05, "loss": 0.9322, "step": 780 }, { "epoch": 0.005047084829357423, "grad_norm": 0.9818449020385742, "learning_rate": 9.999843664732743e-05, "loss": 0.8905, "step": 790 }, { "epoch": 0.005110971979096124, "grad_norm": 1.0081336498260498, "learning_rate": 9.999839671676865e-05, "loss": 1.0194, "step": 800 }, { "epoch": 0.005174859128834826, "grad_norm": 1.2605959177017212, "learning_rate": 9.999835628268279e-05, "loss": 0.9553, "step": 810 }, { "epoch": 0.005238746278573528, "grad_norm": 0.9298542737960815, "learning_rate": 9.999831534507022e-05, "loss": 0.7657, "step": 820 }, { "epoch": 0.00530263342831223, "grad_norm": 1.2965129613876343, "learning_rate": 9.999827390393136e-05, "loss": 0.7605, "step": 830 }, { "epoch": 0.0053665205780509305, "grad_norm": 0.6737092137336731, "learning_rate": 9.999823195926663e-05, "loss": 1.3731, "step": 840 }, { "epoch": 0.005430407727789632, "grad_norm": 1.1260855197906494, "learning_rate": 9.999818951107644e-05, "loss": 1.1665, "step": 850 }, { "epoch": 0.005494294877528334, "grad_norm": 0.9080353379249573, "learning_rate": 9.999814655936123e-05, "loss": 1.053, "step": 860 }, { "epoch": 0.005558182027267036, "grad_norm": 0.7714121341705322, "learning_rate": 9.999810310412146e-05, "loss": 1.0622, "step": 870 }, { "epoch": 0.005622069177005737, "grad_norm": 1.5367814302444458, "learning_rate": 9.99980591453575e-05, "loss": 0.832, "step": 880 }, { "epoch": 0.005685956326744439, "grad_norm": 0.8397789597511292, "learning_rate": 9.999801468306984e-05, "loss": 0.8085, "step": 890 }, { "epoch": 0.00574984347648314, "grad_norm": 1.5233057737350464, "learning_rate": 9.999796971725892e-05, "loss": 0.7896, "step": 900 }, { "epoch": 0.005813730626221841, "grad_norm": 0.987886369228363, "learning_rate": 9.99979242479252e-05, "loss": 1.2048, "step": 910 }, { "epoch": 0.005877617775960543, "grad_norm": 0.8574057817459106, "learning_rate": 9.999787827506911e-05, "loss": 1.1049, "step": 920 }, { "epoch": 0.005941504925699245, "grad_norm": 0.8249441385269165, "learning_rate": 9.999783179869114e-05, "loss": 0.8109, "step": 930 }, { "epoch": 0.006005392075437947, "grad_norm": 1.311522364616394, "learning_rate": 9.999778481879175e-05, "loss": 1.0632, "step": 940 }, { "epoch": 0.006069279225176648, "grad_norm": 0.7848984599113464, "learning_rate": 9.999773733537141e-05, "loss": 0.8783, "step": 950 }, { "epoch": 0.006133166374915349, "grad_norm": 1.3800158500671387, "learning_rate": 9.999768934843062e-05, "loss": 1.0681, "step": 960 }, { "epoch": 0.006197053524654051, "grad_norm": 2.259437322616577, "learning_rate": 9.999764085796981e-05, "loss": 1.0596, "step": 970 }, { "epoch": 0.006260940674392753, "grad_norm": 0.876724123954773, "learning_rate": 9.999759186398951e-05, "loss": 0.9664, "step": 980 }, { "epoch": 0.006324827824131454, "grad_norm": 0.6863592267036438, "learning_rate": 9.999754236649023e-05, "loss": 0.9189, "step": 990 }, { "epoch": 0.006388714973870156, "grad_norm": 1.7007731199264526, "learning_rate": 9.999749236547242e-05, "loss": 1.3958, "step": 1000 }, { "epoch": 0.0064526021236088575, "grad_norm": 1.7878336906433105, "learning_rate": 9.999744186093662e-05, "loss": 0.689, "step": 1010 }, { "epoch": 0.0065164892733475584, "grad_norm": 0.8811324238777161, "learning_rate": 9.999739085288333e-05, "loss": 1.0409, "step": 1020 }, { "epoch": 0.00658037642308626, "grad_norm": 0.7681977152824402, "learning_rate": 9.999733934131305e-05, "loss": 0.6836, "step": 1030 }, { "epoch": 0.006644263572824962, "grad_norm": 0.9528589844703674, "learning_rate": 9.999728732622631e-05, "loss": 0.8524, "step": 1040 }, { "epoch": 0.006708150722563664, "grad_norm": 0.8264364004135132, "learning_rate": 9.999723480762365e-05, "loss": 1.2183, "step": 1050 }, { "epoch": 0.006772037872302365, "grad_norm": 0.740313708782196, "learning_rate": 9.999718178550556e-05, "loss": 1.0717, "step": 1060 }, { "epoch": 0.0068359250220410665, "grad_norm": 1.0919981002807617, "learning_rate": 9.99971282598726e-05, "loss": 0.8355, "step": 1070 }, { "epoch": 0.006899812171779768, "grad_norm": 1.0758978128433228, "learning_rate": 9.999707423072531e-05, "loss": 0.853, "step": 1080 }, { "epoch": 0.00696369932151847, "grad_norm": 1.2707561254501343, "learning_rate": 9.999701969806424e-05, "loss": 1.0517, "step": 1090 }, { "epoch": 0.007027586471257171, "grad_norm": 0.8416491150856018, "learning_rate": 9.99969646618899e-05, "loss": 0.9035, "step": 1100 }, { "epoch": 0.007091473620995873, "grad_norm": 1.4568763971328735, "learning_rate": 9.99969091222029e-05, "loss": 0.8714, "step": 1110 }, { "epoch": 0.007155360770734575, "grad_norm": 1.4576863050460815, "learning_rate": 9.999685307900376e-05, "loss": 0.8144, "step": 1120 }, { "epoch": 0.007219247920473276, "grad_norm": 1.0689126253128052, "learning_rate": 9.999679653229304e-05, "loss": 0.925, "step": 1130 }, { "epoch": 0.007283135070211977, "grad_norm": 1.1141548156738281, "learning_rate": 9.999673948207134e-05, "loss": 1.1567, "step": 1140 }, { "epoch": 0.007347022219950679, "grad_norm": 0.8566306829452515, "learning_rate": 9.999668192833922e-05, "loss": 0.9069, "step": 1150 }, { "epoch": 0.007410909369689381, "grad_norm": 0.7586050629615784, "learning_rate": 9.999662387109728e-05, "loss": 0.9713, "step": 1160 }, { "epoch": 0.007474796519428082, "grad_norm": 0.6867004036903381, "learning_rate": 9.999656531034604e-05, "loss": 0.9686, "step": 1170 }, { "epoch": 0.007538683669166784, "grad_norm": 0.9020546078681946, "learning_rate": 9.999650624608617e-05, "loss": 0.8857, "step": 1180 }, { "epoch": 0.007602570818905485, "grad_norm": 0.6556907892227173, "learning_rate": 9.999644667831822e-05, "loss": 0.7392, "step": 1190 }, { "epoch": 0.007666457968644187, "grad_norm": 0.8906095027923584, "learning_rate": 9.99963866070428e-05, "loss": 0.9946, "step": 1200 }, { "epoch": 0.007730345118382888, "grad_norm": 0.776619017124176, "learning_rate": 9.99963260322605e-05, "loss": 0.8289, "step": 1210 }, { "epoch": 0.00779423226812159, "grad_norm": 0.7643131613731384, "learning_rate": 9.999626495397197e-05, "loss": 1.1158, "step": 1220 }, { "epoch": 0.007858119417860292, "grad_norm": 0.6919121742248535, "learning_rate": 9.999620337217778e-05, "loss": 0.713, "step": 1230 }, { "epoch": 0.007922006567598993, "grad_norm": 0.9334784150123596, "learning_rate": 9.999614128687857e-05, "loss": 1.1716, "step": 1240 }, { "epoch": 0.007985893717337695, "grad_norm": 1.4314568042755127, "learning_rate": 9.999607869807496e-05, "loss": 1.1029, "step": 1250 }, { "epoch": 0.008049780867076396, "grad_norm": 1.5527832508087158, "learning_rate": 9.99960156057676e-05, "loss": 0.9211, "step": 1260 }, { "epoch": 0.008113668016815097, "grad_norm": 0.7879507541656494, "learning_rate": 9.999595200995711e-05, "loss": 1.0019, "step": 1270 }, { "epoch": 0.0081775551665538, "grad_norm": 1.07510244846344, "learning_rate": 9.999588791064412e-05, "loss": 0.9774, "step": 1280 }, { "epoch": 0.0082414423162925, "grad_norm": 0.6843255162239075, "learning_rate": 9.999582330782928e-05, "loss": 0.9584, "step": 1290 }, { "epoch": 0.008305329466031202, "grad_norm": 1.3522765636444092, "learning_rate": 9.999575820151326e-05, "loss": 0.8153, "step": 1300 }, { "epoch": 0.008369216615769904, "grad_norm": 0.546192467212677, "learning_rate": 9.99956925916967e-05, "loss": 1.19, "step": 1310 }, { "epoch": 0.008433103765508605, "grad_norm": 0.7880367636680603, "learning_rate": 9.999562647838026e-05, "loss": 1.017, "step": 1320 }, { "epoch": 0.008496990915247308, "grad_norm": 0.9877641201019287, "learning_rate": 9.999555986156461e-05, "loss": 1.1224, "step": 1330 }, { "epoch": 0.008560878064986009, "grad_norm": 1.541818618774414, "learning_rate": 9.999549274125042e-05, "loss": 0.8995, "step": 1340 }, { "epoch": 0.00862476521472471, "grad_norm": 0.7599831223487854, "learning_rate": 9.999542511743836e-05, "loss": 1.0069, "step": 1350 }, { "epoch": 0.008688652364463412, "grad_norm": 1.1491132974624634, "learning_rate": 9.999535699012912e-05, "loss": 0.882, "step": 1360 }, { "epoch": 0.008752539514202113, "grad_norm": 0.8400082588195801, "learning_rate": 9.999528835932339e-05, "loss": 0.9932, "step": 1370 }, { "epoch": 0.008816426663940814, "grad_norm": 0.897087037563324, "learning_rate": 9.999521922502185e-05, "loss": 0.8435, "step": 1380 }, { "epoch": 0.008880313813679517, "grad_norm": 1.469058632850647, "learning_rate": 9.99951495872252e-05, "loss": 1.1746, "step": 1390 }, { "epoch": 0.008944200963418218, "grad_norm": 0.7353557348251343, "learning_rate": 9.999507944593413e-05, "loss": 0.8944, "step": 1400 }, { "epoch": 0.009008088113156919, "grad_norm": 0.8597003817558289, "learning_rate": 9.999500880114938e-05, "loss": 1.1237, "step": 1410 }, { "epoch": 0.009071975262895621, "grad_norm": 1.2774052619934082, "learning_rate": 9.999493765287164e-05, "loss": 1.0234, "step": 1420 }, { "epoch": 0.009135862412634322, "grad_norm": 1.0299676656723022, "learning_rate": 9.99948660011016e-05, "loss": 1.2595, "step": 1430 }, { "epoch": 0.009199749562373025, "grad_norm": 0.6526196002960205, "learning_rate": 9.999479384584003e-05, "loss": 0.9003, "step": 1440 }, { "epoch": 0.009263636712111726, "grad_norm": 0.9184065461158752, "learning_rate": 9.999472118708763e-05, "loss": 0.8782, "step": 1450 }, { "epoch": 0.009327523861850427, "grad_norm": 1.0141165256500244, "learning_rate": 9.999464802484513e-05, "loss": 0.8616, "step": 1460 }, { "epoch": 0.00939141101158913, "grad_norm": 1.9449567794799805, "learning_rate": 9.999457435911328e-05, "loss": 0.9921, "step": 1470 }, { "epoch": 0.00945529816132783, "grad_norm": 0.9585944414138794, "learning_rate": 9.99945001898928e-05, "loss": 0.9861, "step": 1480 }, { "epoch": 0.009519185311066531, "grad_norm": 1.201170802116394, "learning_rate": 9.999442551718448e-05, "loss": 1.3192, "step": 1490 }, { "epoch": 0.009583072460805234, "grad_norm": 0.8674155473709106, "learning_rate": 9.999435034098901e-05, "loss": 0.8345, "step": 1500 }, { "epoch": 0.009646959610543935, "grad_norm": 1.0349905490875244, "learning_rate": 9.999427466130721e-05, "loss": 1.1643, "step": 1510 }, { "epoch": 0.009710846760282638, "grad_norm": 0.8286603689193726, "learning_rate": 9.99941984781398e-05, "loss": 1.0253, "step": 1520 }, { "epoch": 0.009774733910021339, "grad_norm": 1.2230565547943115, "learning_rate": 9.999412179148756e-05, "loss": 1.1343, "step": 1530 }, { "epoch": 0.00983862105976004, "grad_norm": 0.7413927912712097, "learning_rate": 9.999404460135126e-05, "loss": 0.9257, "step": 1540 }, { "epoch": 0.009902508209498742, "grad_norm": 1.2482092380523682, "learning_rate": 9.999396690773169e-05, "loss": 1.2573, "step": 1550 }, { "epoch": 0.009966395359237443, "grad_norm": 1.8260524272918701, "learning_rate": 9.99938887106296e-05, "loss": 0.9978, "step": 1560 }, { "epoch": 0.010030282508976144, "grad_norm": 0.7294577956199646, "learning_rate": 9.999381001004582e-05, "loss": 0.8249, "step": 1570 }, { "epoch": 0.010094169658714847, "grad_norm": 0.8026980757713318, "learning_rate": 9.999373080598112e-05, "loss": 0.9687, "step": 1580 }, { "epoch": 0.010158056808453548, "grad_norm": 0.9354428052902222, "learning_rate": 9.99936510984363e-05, "loss": 1.0309, "step": 1590 }, { "epoch": 0.010221943958192248, "grad_norm": 1.3766313791275024, "learning_rate": 9.999357088741216e-05, "loss": 1.0006, "step": 1600 }, { "epoch": 0.010285831107930951, "grad_norm": 0.6556980609893799, "learning_rate": 9.999349017290951e-05, "loss": 0.9616, "step": 1610 }, { "epoch": 0.010349718257669652, "grad_norm": 0.6386595368385315, "learning_rate": 9.999340895492917e-05, "loss": 0.9685, "step": 1620 }, { "epoch": 0.010413605407408355, "grad_norm": 0.859089195728302, "learning_rate": 9.999332723347194e-05, "loss": 1.1593, "step": 1630 }, { "epoch": 0.010477492557147056, "grad_norm": 0.8099786043167114, "learning_rate": 9.999324500853866e-05, "loss": 1.1586, "step": 1640 }, { "epoch": 0.010541379706885757, "grad_norm": 0.8301547169685364, "learning_rate": 9.999316228013016e-05, "loss": 0.9436, "step": 1650 }, { "epoch": 0.01060526685662446, "grad_norm": 1.2509781122207642, "learning_rate": 9.999307904824725e-05, "loss": 0.8458, "step": 1660 }, { "epoch": 0.01066915400636316, "grad_norm": 1.0006517171859741, "learning_rate": 9.99929953128908e-05, "loss": 0.9221, "step": 1670 }, { "epoch": 0.010733041156101861, "grad_norm": 0.8921092748641968, "learning_rate": 9.999291107406163e-05, "loss": 0.918, "step": 1680 }, { "epoch": 0.010796928305840564, "grad_norm": 0.8920373916625977, "learning_rate": 9.999282633176059e-05, "loss": 1.059, "step": 1690 }, { "epoch": 0.010860815455579265, "grad_norm": 0.7163852453231812, "learning_rate": 9.999274108598854e-05, "loss": 1.1965, "step": 1700 }, { "epoch": 0.010924702605317966, "grad_norm": 0.7184985876083374, "learning_rate": 9.999265533674635e-05, "loss": 1.1157, "step": 1710 }, { "epoch": 0.010988589755056668, "grad_norm": 1.6878572702407837, "learning_rate": 9.999256908403485e-05, "loss": 0.7872, "step": 1720 }, { "epoch": 0.01105247690479537, "grad_norm": 2.4965457916259766, "learning_rate": 9.999248232785494e-05, "loss": 0.7284, "step": 1730 }, { "epoch": 0.011116364054534072, "grad_norm": 0.6647805571556091, "learning_rate": 9.999239506820749e-05, "loss": 1.0634, "step": 1740 }, { "epoch": 0.011180251204272773, "grad_norm": 1.112949252128601, "learning_rate": 9.999230730509337e-05, "loss": 1.0865, "step": 1750 }, { "epoch": 0.011244138354011474, "grad_norm": 0.7501624822616577, "learning_rate": 9.999221903851346e-05, "loss": 1.0212, "step": 1760 }, { "epoch": 0.011308025503750176, "grad_norm": 0.6178969144821167, "learning_rate": 9.999213026846865e-05, "loss": 0.9825, "step": 1770 }, { "epoch": 0.011371912653488877, "grad_norm": 0.7546608448028564, "learning_rate": 9.999204099495984e-05, "loss": 0.8365, "step": 1780 }, { "epoch": 0.011435799803227578, "grad_norm": 0.6355531811714172, "learning_rate": 9.999195121798795e-05, "loss": 1.1684, "step": 1790 }, { "epoch": 0.01149968695296628, "grad_norm": 1.0356401205062866, "learning_rate": 9.999186093755385e-05, "loss": 1.0752, "step": 1800 }, { "epoch": 0.011563574102704982, "grad_norm": 0.9333721399307251, "learning_rate": 9.999177015365844e-05, "loss": 0.9288, "step": 1810 }, { "epoch": 0.011627461252443683, "grad_norm": 0.9251835942268372, "learning_rate": 9.999167886630269e-05, "loss": 0.748, "step": 1820 }, { "epoch": 0.011691348402182385, "grad_norm": 1.0885391235351562, "learning_rate": 9.999158707548745e-05, "loss": 1.1773, "step": 1830 }, { "epoch": 0.011755235551921086, "grad_norm": 1.2786647081375122, "learning_rate": 9.99914947812137e-05, "loss": 1.1812, "step": 1840 }, { "epoch": 0.011819122701659789, "grad_norm": 0.6569780111312866, "learning_rate": 9.999140198348236e-05, "loss": 0.989, "step": 1850 }, { "epoch": 0.01188300985139849, "grad_norm": 1.241723656654358, "learning_rate": 9.999130868229434e-05, "loss": 1.0771, "step": 1860 }, { "epoch": 0.01194689700113719, "grad_norm": 1.3552509546279907, "learning_rate": 9.999121487765058e-05, "loss": 1.0246, "step": 1870 }, { "epoch": 0.012010784150875893, "grad_norm": 0.6326724290847778, "learning_rate": 9.999112056955205e-05, "loss": 0.9514, "step": 1880 }, { "epoch": 0.012074671300614594, "grad_norm": 2.2786476612091064, "learning_rate": 9.99910257579997e-05, "loss": 0.6857, "step": 1890 }, { "epoch": 0.012138558450353295, "grad_norm": 1.0282983779907227, "learning_rate": 9.999093044299446e-05, "loss": 0.7788, "step": 1900 }, { "epoch": 0.012202445600091998, "grad_norm": 1.1858989000320435, "learning_rate": 9.999083462453728e-05, "loss": 0.9619, "step": 1910 }, { "epoch": 0.012266332749830699, "grad_norm": 0.6922428011894226, "learning_rate": 9.999073830262918e-05, "loss": 1.1683, "step": 1920 }, { "epoch": 0.0123302198995694, "grad_norm": 0.6754278540611267, "learning_rate": 9.999064147727109e-05, "loss": 0.7358, "step": 1930 }, { "epoch": 0.012394107049308102, "grad_norm": 0.7409210205078125, "learning_rate": 9.999054414846398e-05, "loss": 1.1866, "step": 1940 }, { "epoch": 0.012457994199046803, "grad_norm": 0.8322914242744446, "learning_rate": 9.999044631620887e-05, "loss": 0.7945, "step": 1950 }, { "epoch": 0.012521881348785506, "grad_norm": 1.1325633525848389, "learning_rate": 9.999034798050668e-05, "loss": 0.9324, "step": 1960 }, { "epoch": 0.012585768498524207, "grad_norm": 0.9204065203666687, "learning_rate": 9.999024914135846e-05, "loss": 0.8747, "step": 1970 }, { "epoch": 0.012649655648262908, "grad_norm": 1.3020517826080322, "learning_rate": 9.999014979876517e-05, "loss": 1.0649, "step": 1980 }, { "epoch": 0.01271354279800161, "grad_norm": 1.0476547479629517, "learning_rate": 9.999004995272785e-05, "loss": 1.0729, "step": 1990 }, { "epoch": 0.012777429947740311, "grad_norm": 0.8980121612548828, "learning_rate": 9.998994960324746e-05, "loss": 0.8566, "step": 2000 }, { "epoch": 0.012841317097479012, "grad_norm": 2.678067684173584, "learning_rate": 9.998984875032503e-05, "loss": 1.01, "step": 2010 }, { "epoch": 0.012905204247217715, "grad_norm": 1.1093647480010986, "learning_rate": 9.998974739396159e-05, "loss": 0.886, "step": 2020 }, { "epoch": 0.012969091396956416, "grad_norm": 0.5292948484420776, "learning_rate": 9.998964553415813e-05, "loss": 0.9973, "step": 2030 }, { "epoch": 0.013032978546695117, "grad_norm": 1.0876336097717285, "learning_rate": 9.998954317091568e-05, "loss": 1.09, "step": 2040 }, { "epoch": 0.01309686569643382, "grad_norm": 1.7153469324111938, "learning_rate": 9.998944030423531e-05, "loss": 0.8236, "step": 2050 }, { "epoch": 0.01316075284617252, "grad_norm": 0.5473589897155762, "learning_rate": 9.998933693411802e-05, "loss": 0.8271, "step": 2060 }, { "epoch": 0.013224639995911223, "grad_norm": 0.930847704410553, "learning_rate": 9.998923306056487e-05, "loss": 1.0062, "step": 2070 }, { "epoch": 0.013288527145649924, "grad_norm": 1.016547679901123, "learning_rate": 9.998912868357688e-05, "loss": 0.9092, "step": 2080 }, { "epoch": 0.013352414295388625, "grad_norm": 0.8655534386634827, "learning_rate": 9.99890238031551e-05, "loss": 0.8901, "step": 2090 }, { "epoch": 0.013416301445127328, "grad_norm": 0.7575225830078125, "learning_rate": 9.998891841930064e-05, "loss": 1.1021, "step": 2100 }, { "epoch": 0.013480188594866029, "grad_norm": 0.8108758330345154, "learning_rate": 9.998881253201452e-05, "loss": 1.0897, "step": 2110 }, { "epoch": 0.01354407574460473, "grad_norm": 1.2894190549850464, "learning_rate": 9.998870614129781e-05, "loss": 1.1317, "step": 2120 }, { "epoch": 0.013607962894343432, "grad_norm": 1.173697590827942, "learning_rate": 9.998859924715157e-05, "loss": 0.7373, "step": 2130 }, { "epoch": 0.013671850044082133, "grad_norm": 0.7047708034515381, "learning_rate": 9.998849184957689e-05, "loss": 0.7464, "step": 2140 }, { "epoch": 0.013735737193820836, "grad_norm": 0.7167409062385559, "learning_rate": 9.998838394857486e-05, "loss": 0.9529, "step": 2150 }, { "epoch": 0.013799624343559537, "grad_norm": 0.8524914383888245, "learning_rate": 9.998827554414656e-05, "loss": 1.0282, "step": 2160 }, { "epoch": 0.013863511493298238, "grad_norm": 0.7894335389137268, "learning_rate": 9.998816663629307e-05, "loss": 1.0432, "step": 2170 }, { "epoch": 0.01392739864303694, "grad_norm": 0.7883844971656799, "learning_rate": 9.99880572250155e-05, "loss": 1.1062, "step": 2180 }, { "epoch": 0.013991285792775641, "grad_norm": 1.115862250328064, "learning_rate": 9.998794731031494e-05, "loss": 1.0593, "step": 2190 }, { "epoch": 0.014055172942514342, "grad_norm": 0.5956576466560364, "learning_rate": 9.998783689219251e-05, "loss": 0.8832, "step": 2200 }, { "epoch": 0.014119060092253045, "grad_norm": 1.0389795303344727, "learning_rate": 9.998773708545755e-05, "loss": 0.9955, "step": 2210 }, { "epoch": 0.014182947241991746, "grad_norm": 1.3539459705352783, "learning_rate": 9.998762571083662e-05, "loss": 0.9878, "step": 2220 }, { "epoch": 0.014246834391730447, "grad_norm": 1.031422734260559, "learning_rate": 9.998751383279706e-05, "loss": 0.9666, "step": 2230 }, { "epoch": 0.01431072154146915, "grad_norm": 0.5059804320335388, "learning_rate": 9.998740145134e-05, "loss": 0.7655, "step": 2240 }, { "epoch": 0.01437460869120785, "grad_norm": 0.7834402322769165, "learning_rate": 9.998728856646656e-05, "loss": 1.0262, "step": 2250 }, { "epoch": 0.014438495840946553, "grad_norm": 0.7399794459342957, "learning_rate": 9.998717517817786e-05, "loss": 0.982, "step": 2260 }, { "epoch": 0.014502382990685254, "grad_norm": 0.7037153840065002, "learning_rate": 9.998706128647508e-05, "loss": 0.7902, "step": 2270 }, { "epoch": 0.014566270140423955, "grad_norm": 0.8694613575935364, "learning_rate": 9.998694689135934e-05, "loss": 1.0659, "step": 2280 }, { "epoch": 0.014630157290162657, "grad_norm": 1.4297699928283691, "learning_rate": 9.99868319928318e-05, "loss": 1.0498, "step": 2290 }, { "epoch": 0.014694044439901358, "grad_norm": 1.0179654359817505, "learning_rate": 9.998671659089361e-05, "loss": 0.9041, "step": 2300 }, { "epoch": 0.01475793158964006, "grad_norm": 0.9118665456771851, "learning_rate": 9.998660068554596e-05, "loss": 1.0452, "step": 2310 }, { "epoch": 0.014821818739378762, "grad_norm": 1.0615768432617188, "learning_rate": 9.998649594031891e-05, "loss": 0.9364, "step": 2320 }, { "epoch": 0.014885705889117463, "grad_norm": 1.8446980714797974, "learning_rate": 9.998637907849646e-05, "loss": 1.0038, "step": 2330 }, { "epoch": 0.014949593038856164, "grad_norm": 1.1372798681259155, "learning_rate": 9.998626171326792e-05, "loss": 1.0814, "step": 2340 }, { "epoch": 0.015013480188594866, "grad_norm": 1.2520413398742676, "learning_rate": 9.998614384463449e-05, "loss": 0.9373, "step": 2350 }, { "epoch": 0.015077367338333567, "grad_norm": 0.7592064738273621, "learning_rate": 9.998602547259734e-05, "loss": 1.0605, "step": 2360 }, { "epoch": 0.01514125448807227, "grad_norm": 0.8538485169410706, "learning_rate": 9.998590659715766e-05, "loss": 0.8727, "step": 2370 }, { "epoch": 0.01520514163781097, "grad_norm": 0.7715469002723694, "learning_rate": 9.998578721831666e-05, "loss": 1.0918, "step": 2380 }, { "epoch": 0.015269028787549672, "grad_norm": 1.0266464948654175, "learning_rate": 9.998566733607554e-05, "loss": 1.0816, "step": 2390 }, { "epoch": 0.015332915937288374, "grad_norm": 0.564927339553833, "learning_rate": 9.998554695043552e-05, "loss": 0.8394, "step": 2400 }, { "epoch": 0.015396803087027075, "grad_norm": 1.2067440748214722, "learning_rate": 9.998542606139779e-05, "loss": 1.1371, "step": 2410 }, { "epoch": 0.015460690236765776, "grad_norm": 1.1786682605743408, "learning_rate": 9.998530466896357e-05, "loss": 0.9845, "step": 2420 }, { "epoch": 0.015524577386504479, "grad_norm": 3.820138454437256, "learning_rate": 9.99851827731341e-05, "loss": 0.9517, "step": 2430 }, { "epoch": 0.01558846453624318, "grad_norm": 0.8492526412010193, "learning_rate": 9.998506037391058e-05, "loss": 0.989, "step": 2440 }, { "epoch": 0.01565235168598188, "grad_norm": 1.1744376420974731, "learning_rate": 9.998493747129428e-05, "loss": 0.8713, "step": 2450 }, { "epoch": 0.015716238835720583, "grad_norm": 1.1239817142486572, "learning_rate": 9.99848140652864e-05, "loss": 0.801, "step": 2460 }, { "epoch": 0.015780125985459286, "grad_norm": 0.8037886023521423, "learning_rate": 9.99846901558882e-05, "loss": 0.8116, "step": 2470 }, { "epoch": 0.015844013135197985, "grad_norm": 0.9169192314147949, "learning_rate": 9.998456574310094e-05, "loss": 1.0343, "step": 2480 }, { "epoch": 0.015907900284936688, "grad_norm": 0.7503566145896912, "learning_rate": 9.998444082692585e-05, "loss": 1.0077, "step": 2490 }, { "epoch": 0.01597178743467539, "grad_norm": 1.1476398706436157, "learning_rate": 9.99843154073642e-05, "loss": 0.8427, "step": 2500 }, { "epoch": 0.01603567458441409, "grad_norm": 0.7474212646484375, "learning_rate": 9.998418948441726e-05, "loss": 0.7488, "step": 2510 }, { "epoch": 0.016099561734152792, "grad_norm": 0.9779971837997437, "learning_rate": 9.998406305808627e-05, "loss": 0.8778, "step": 2520 }, { "epoch": 0.016163448883891495, "grad_norm": 1.0902825593948364, "learning_rate": 9.998393612837254e-05, "loss": 1.2649, "step": 2530 }, { "epoch": 0.016227336033630194, "grad_norm": 0.9004558324813843, "learning_rate": 9.998380869527732e-05, "loss": 0.7976, "step": 2540 }, { "epoch": 0.016291223183368897, "grad_norm": 0.8847173452377319, "learning_rate": 9.998368075880192e-05, "loss": 0.9168, "step": 2550 }, { "epoch": 0.0163551103331076, "grad_norm": 1.1703412532806396, "learning_rate": 9.99835523189476e-05, "loss": 0.9307, "step": 2560 }, { "epoch": 0.0164189974828463, "grad_norm": 0.7630004286766052, "learning_rate": 9.998342337571565e-05, "loss": 0.8969, "step": 2570 }, { "epoch": 0.016482884632585, "grad_norm": 0.9424830079078674, "learning_rate": 9.998329392910741e-05, "loss": 1.0097, "step": 2580 }, { "epoch": 0.016546771782323704, "grad_norm": 0.891345739364624, "learning_rate": 9.998316397912415e-05, "loss": 0.9626, "step": 2590 }, { "epoch": 0.016610658932062403, "grad_norm": 0.7180986404418945, "learning_rate": 9.998303352576719e-05, "loss": 0.9426, "step": 2600 }, { "epoch": 0.016674546081801106, "grad_norm": 1.2385119199752808, "learning_rate": 9.998290256903784e-05, "loss": 0.7992, "step": 2610 }, { "epoch": 0.01673843323153981, "grad_norm": 0.9304938316345215, "learning_rate": 9.998277110893741e-05, "loss": 1.1183, "step": 2620 }, { "epoch": 0.01680232038127851, "grad_norm": 4.244834899902344, "learning_rate": 9.998263914546724e-05, "loss": 1.1446, "step": 2630 }, { "epoch": 0.01686620753101721, "grad_norm": 1.0744621753692627, "learning_rate": 9.998250667862868e-05, "loss": 0.7592, "step": 2640 }, { "epoch": 0.016930094680755913, "grad_norm": 1.1547142267227173, "learning_rate": 9.9982373708423e-05, "loss": 0.9098, "step": 2650 }, { "epoch": 0.016993981830494616, "grad_norm": 0.8676884770393372, "learning_rate": 9.998224023485159e-05, "loss": 1.1234, "step": 2660 }, { "epoch": 0.017057868980233315, "grad_norm": 1.3594059944152832, "learning_rate": 9.998210625791578e-05, "loss": 0.7285, "step": 2670 }, { "epoch": 0.017121756129972018, "grad_norm": 0.9443914294242859, "learning_rate": 9.998197177761692e-05, "loss": 1.0057, "step": 2680 }, { "epoch": 0.01718564327971072, "grad_norm": 0.7387935519218445, "learning_rate": 9.998183679395636e-05, "loss": 0.8873, "step": 2690 }, { "epoch": 0.01724953042944942, "grad_norm": 0.9435983896255493, "learning_rate": 9.998170130693545e-05, "loss": 0.8891, "step": 2700 }, { "epoch": 0.017313417579188122, "grad_norm": 1.0034334659576416, "learning_rate": 9.998156531655557e-05, "loss": 1.0039, "step": 2710 }, { "epoch": 0.017377304728926825, "grad_norm": 1.2125136852264404, "learning_rate": 9.99814288228181e-05, "loss": 0.7617, "step": 2720 }, { "epoch": 0.017441191878665524, "grad_norm": 0.9862277507781982, "learning_rate": 9.998129182572442e-05, "loss": 0.8764, "step": 2730 }, { "epoch": 0.017505079028404227, "grad_norm": 1.1421021223068237, "learning_rate": 9.998115432527586e-05, "loss": 0.9241, "step": 2740 }, { "epoch": 0.01756896617814293, "grad_norm": 0.8746705651283264, "learning_rate": 9.998101632147385e-05, "loss": 0.9238, "step": 2750 }, { "epoch": 0.01763285332788163, "grad_norm": 0.6663450002670288, "learning_rate": 9.998087781431977e-05, "loss": 1.0525, "step": 2760 }, { "epoch": 0.01769674047762033, "grad_norm": 1.4795788526535034, "learning_rate": 9.9980738803815e-05, "loss": 0.9025, "step": 2770 }, { "epoch": 0.017760627627359034, "grad_norm": 0.7279462218284607, "learning_rate": 9.998059928996095e-05, "loss": 1.1858, "step": 2780 }, { "epoch": 0.017824514777097733, "grad_norm": 0.7917711138725281, "learning_rate": 9.998045927275903e-05, "loss": 0.9119, "step": 2790 }, { "epoch": 0.017888401926836436, "grad_norm": 1.2472501993179321, "learning_rate": 9.998031875221065e-05, "loss": 0.839, "step": 2800 }, { "epoch": 0.01795228907657514, "grad_norm": 0.9328956604003906, "learning_rate": 9.998017772831723e-05, "loss": 0.9749, "step": 2810 }, { "epoch": 0.018016176226313838, "grad_norm": 0.732351541519165, "learning_rate": 9.998003620108017e-05, "loss": 1.0359, "step": 2820 }, { "epoch": 0.01808006337605254, "grad_norm": 0.8829627633094788, "learning_rate": 9.99798941705009e-05, "loss": 0.9921, "step": 2830 }, { "epoch": 0.018143950525791243, "grad_norm": 0.7300599813461304, "learning_rate": 9.997975163658086e-05, "loss": 0.9041, "step": 2840 }, { "epoch": 0.018207837675529946, "grad_norm": 1.0057677030563354, "learning_rate": 9.997960859932148e-05, "loss": 1.1656, "step": 2850 }, { "epoch": 0.018271724825268645, "grad_norm": 0.6405202746391296, "learning_rate": 9.997946505872421e-05, "loss": 0.7273, "step": 2860 }, { "epoch": 0.018335611975007347, "grad_norm": 1.383867621421814, "learning_rate": 9.997932101479049e-05, "loss": 1.0818, "step": 2870 }, { "epoch": 0.01839949912474605, "grad_norm": 2.046144723892212, "learning_rate": 9.997917646752175e-05, "loss": 1.0075, "step": 2880 }, { "epoch": 0.01846338627448475, "grad_norm": 0.6531755924224854, "learning_rate": 9.99790314169195e-05, "loss": 0.978, "step": 2890 }, { "epoch": 0.018527273424223452, "grad_norm": 0.8605973720550537, "learning_rate": 9.997888586298514e-05, "loss": 1.0424, "step": 2900 }, { "epoch": 0.018591160573962155, "grad_norm": 1.2451750040054321, "learning_rate": 9.997873980572017e-05, "loss": 0.9909, "step": 2910 }, { "epoch": 0.018655047723700854, "grad_norm": 1.1829801797866821, "learning_rate": 9.997859324512604e-05, "loss": 0.8175, "step": 2920 }, { "epoch": 0.018718934873439556, "grad_norm": 1.987342357635498, "learning_rate": 9.997844618120424e-05, "loss": 1.1086, "step": 2930 }, { "epoch": 0.01878282202317826, "grad_norm": 1.5796905755996704, "learning_rate": 9.997829861395627e-05, "loss": 0.9863, "step": 2940 }, { "epoch": 0.018846709172916958, "grad_norm": 0.5378701686859131, "learning_rate": 9.997815054338357e-05, "loss": 0.7471, "step": 2950 }, { "epoch": 0.01891059632265566, "grad_norm": 1.4551935195922852, "learning_rate": 9.997800196948768e-05, "loss": 1.1466, "step": 2960 }, { "epoch": 0.018974483472394364, "grad_norm": 0.5287359356880188, "learning_rate": 9.997785289227007e-05, "loss": 0.8842, "step": 2970 }, { "epoch": 0.019038370622133063, "grad_norm": 0.6062310338020325, "learning_rate": 9.997770331173221e-05, "loss": 0.8015, "step": 2980 }, { "epoch": 0.019102257771871765, "grad_norm": 0.9560365676879883, "learning_rate": 9.997755322787568e-05, "loss": 1.1405, "step": 2990 }, { "epoch": 0.019166144921610468, "grad_norm": 0.7935013175010681, "learning_rate": 9.997740264070194e-05, "loss": 0.8133, "step": 3000 }, { "epoch": 0.019230032071349167, "grad_norm": 0.8417540788650513, "learning_rate": 9.997725155021253e-05, "loss": 0.8547, "step": 3010 }, { "epoch": 0.01929391922108787, "grad_norm": 0.5501998662948608, "learning_rate": 9.997709995640894e-05, "loss": 0.9299, "step": 3020 }, { "epoch": 0.019357806370826573, "grad_norm": 0.821506917476654, "learning_rate": 9.997694785929273e-05, "loss": 0.9835, "step": 3030 }, { "epoch": 0.019421693520565275, "grad_norm": 0.817926287651062, "learning_rate": 9.997679525886541e-05, "loss": 1.2224, "step": 3040 }, { "epoch": 0.019485580670303974, "grad_norm": 2.5229651927948, "learning_rate": 9.997664215512854e-05, "loss": 1.0535, "step": 3050 }, { "epoch": 0.019549467820042677, "grad_norm": 0.8168900609016418, "learning_rate": 9.997648854808364e-05, "loss": 1.0088, "step": 3060 }, { "epoch": 0.01961335496978138, "grad_norm": 0.522985577583313, "learning_rate": 9.997633443773226e-05, "loss": 0.9106, "step": 3070 }, { "epoch": 0.01967724211952008, "grad_norm": 0.5633349418640137, "learning_rate": 9.997617982407595e-05, "loss": 0.9174, "step": 3080 }, { "epoch": 0.01974112926925878, "grad_norm": 2.293459892272949, "learning_rate": 9.997602470711628e-05, "loss": 0.8805, "step": 3090 }, { "epoch": 0.019805016418997484, "grad_norm": 0.6353404521942139, "learning_rate": 9.997586908685481e-05, "loss": 0.9407, "step": 3100 }, { "epoch": 0.019868903568736183, "grad_norm": 0.6325660943984985, "learning_rate": 9.997571296329312e-05, "loss": 0.6832, "step": 3110 }, { "epoch": 0.019932790718474886, "grad_norm": 0.7705810070037842, "learning_rate": 9.997555633643274e-05, "loss": 0.7368, "step": 3120 }, { "epoch": 0.01999667786821359, "grad_norm": 0.601768434047699, "learning_rate": 9.997539920627527e-05, "loss": 1.0854, "step": 3130 }, { "epoch": 0.020060565017952288, "grad_norm": 1.055450439453125, "learning_rate": 9.997524157282231e-05, "loss": 1.1712, "step": 3140 }, { "epoch": 0.02012445216769099, "grad_norm": 0.5919578671455383, "learning_rate": 9.997508343607542e-05, "loss": 0.9698, "step": 3150 }, { "epoch": 0.020188339317429693, "grad_norm": 1.1966851949691772, "learning_rate": 9.997492479603623e-05, "loss": 0.9131, "step": 3160 }, { "epoch": 0.020252226467168392, "grad_norm": 0.5245844721794128, "learning_rate": 9.997476565270629e-05, "loss": 0.9533, "step": 3170 }, { "epoch": 0.020316113616907095, "grad_norm": 0.6640262603759766, "learning_rate": 9.997460600608723e-05, "loss": 0.954, "step": 3180 }, { "epoch": 0.020380000766645798, "grad_norm": 1.1632764339447021, "learning_rate": 9.997444585618066e-05, "loss": 0.9693, "step": 3190 }, { "epoch": 0.020443887916384497, "grad_norm": 0.8746532797813416, "learning_rate": 9.997428520298817e-05, "loss": 0.9353, "step": 3200 }, { "epoch": 0.0205077750661232, "grad_norm": 0.7248082756996155, "learning_rate": 9.997412404651141e-05, "loss": 1.0746, "step": 3210 }, { "epoch": 0.020571662215861902, "grad_norm": 1.0290027856826782, "learning_rate": 9.997396238675198e-05, "loss": 1.013, "step": 3220 }, { "epoch": 0.0206355493656006, "grad_norm": 1.3203686475753784, "learning_rate": 9.997380022371153e-05, "loss": 0.9819, "step": 3230 }, { "epoch": 0.020699436515339304, "grad_norm": 1.1412265300750732, "learning_rate": 9.997363755739166e-05, "loss": 0.756, "step": 3240 }, { "epoch": 0.020763323665078007, "grad_norm": 1.012272596359253, "learning_rate": 9.997347438779403e-05, "loss": 0.8896, "step": 3250 }, { "epoch": 0.02082721081481671, "grad_norm": 0.6581144332885742, "learning_rate": 9.997331071492028e-05, "loss": 0.9093, "step": 3260 }, { "epoch": 0.02089109796455541, "grad_norm": 0.6292199492454529, "learning_rate": 9.997314653877206e-05, "loss": 0.8898, "step": 3270 }, { "epoch": 0.02095498511429411, "grad_norm": 0.8514048457145691, "learning_rate": 9.997298185935102e-05, "loss": 1.0219, "step": 3280 }, { "epoch": 0.021018872264032814, "grad_norm": 0.8251546621322632, "learning_rate": 9.99728166766588e-05, "loss": 1.1417, "step": 3290 }, { "epoch": 0.021082759413771513, "grad_norm": 0.7164210081100464, "learning_rate": 9.997265099069712e-05, "loss": 0.8025, "step": 3300 }, { "epoch": 0.021146646563510216, "grad_norm": 0.6162307858467102, "learning_rate": 9.997248480146759e-05, "loss": 1.1907, "step": 3310 }, { "epoch": 0.02121053371324892, "grad_norm": 0.9600428938865662, "learning_rate": 9.997231810897191e-05, "loss": 1.0066, "step": 3320 }, { "epoch": 0.021274420862987618, "grad_norm": 1.1238371133804321, "learning_rate": 9.997215091321178e-05, "loss": 0.8551, "step": 3330 }, { "epoch": 0.02133830801272632, "grad_norm": 1.7699466943740845, "learning_rate": 9.997198321418881e-05, "loss": 1.049, "step": 3340 }, { "epoch": 0.021402195162465023, "grad_norm": 1.0499175786972046, "learning_rate": 9.997181501190478e-05, "loss": 1.11, "step": 3350 }, { "epoch": 0.021466082312203722, "grad_norm": 0.9096553325653076, "learning_rate": 9.997164630636132e-05, "loss": 0.9371, "step": 3360 }, { "epoch": 0.021529969461942425, "grad_norm": 0.8059217929840088, "learning_rate": 9.997147709756014e-05, "loss": 0.8638, "step": 3370 }, { "epoch": 0.021593856611681127, "grad_norm": 0.6484128832817078, "learning_rate": 9.997130738550298e-05, "loss": 0.9817, "step": 3380 }, { "epoch": 0.021657743761419827, "grad_norm": 1.1222511529922485, "learning_rate": 9.997113717019151e-05, "loss": 0.7598, "step": 3390 }, { "epoch": 0.02172163091115853, "grad_norm": 1.0018411874771118, "learning_rate": 9.997096645162745e-05, "loss": 0.9593, "step": 3400 }, { "epoch": 0.021785518060897232, "grad_norm": 0.6298023462295532, "learning_rate": 9.997079522981254e-05, "loss": 0.8118, "step": 3410 }, { "epoch": 0.02184940521063593, "grad_norm": 0.5194735527038574, "learning_rate": 9.997062350474849e-05, "loss": 0.8344, "step": 3420 }, { "epoch": 0.021913292360374634, "grad_norm": 0.7458469271659851, "learning_rate": 9.997045127643703e-05, "loss": 1.1305, "step": 3430 }, { "epoch": 0.021977179510113336, "grad_norm": 1.092467188835144, "learning_rate": 9.997027854487988e-05, "loss": 0.7839, "step": 3440 }, { "epoch": 0.022041066659852036, "grad_norm": 0.5377646088600159, "learning_rate": 9.997010531007879e-05, "loss": 0.9457, "step": 3450 }, { "epoch": 0.02210495380959074, "grad_norm": 0.8158820271492004, "learning_rate": 9.996993157203554e-05, "loss": 1.0827, "step": 3460 }, { "epoch": 0.02216884095932944, "grad_norm": 0.9033936858177185, "learning_rate": 9.996975733075184e-05, "loss": 0.8901, "step": 3470 }, { "epoch": 0.022232728109068144, "grad_norm": 0.6493645310401917, "learning_rate": 9.996958258622944e-05, "loss": 1.0609, "step": 3480 }, { "epoch": 0.022296615258806843, "grad_norm": 1.416635274887085, "learning_rate": 9.996940733847013e-05, "loss": 0.9017, "step": 3490 }, { "epoch": 0.022360502408545545, "grad_norm": 0.9830083847045898, "learning_rate": 9.996923158747564e-05, "loss": 0.8952, "step": 3500 }, { "epoch": 0.022424389558284248, "grad_norm": 1.130096197128296, "learning_rate": 9.996905533324777e-05, "loss": 0.8779, "step": 3510 }, { "epoch": 0.022488276708022947, "grad_norm": 0.7025210857391357, "learning_rate": 9.996887857578828e-05, "loss": 1.0576, "step": 3520 }, { "epoch": 0.02255216385776165, "grad_norm": 0.7813702821731567, "learning_rate": 9.996870131509897e-05, "loss": 1.1396, "step": 3530 }, { "epoch": 0.022616051007500353, "grad_norm": 0.9451877474784851, "learning_rate": 9.996852355118158e-05, "loss": 0.8531, "step": 3540 }, { "epoch": 0.022679938157239052, "grad_norm": 0.9123436212539673, "learning_rate": 9.996834528403795e-05, "loss": 0.8832, "step": 3550 }, { "epoch": 0.022743825306977754, "grad_norm": 3.4489307403564453, "learning_rate": 9.996816651366985e-05, "loss": 0.8413, "step": 3560 }, { "epoch": 0.022807712456716457, "grad_norm": 1.0235848426818848, "learning_rate": 9.996798724007907e-05, "loss": 0.9275, "step": 3570 }, { "epoch": 0.022871599606455156, "grad_norm": 0.7772485017776489, "learning_rate": 9.996780746326743e-05, "loss": 1.0924, "step": 3580 }, { "epoch": 0.02293548675619386, "grad_norm": 0.7384485006332397, "learning_rate": 9.996762718323677e-05, "loss": 0.8231, "step": 3590 }, { "epoch": 0.02299937390593256, "grad_norm": 0.9038792848587036, "learning_rate": 9.996744639998885e-05, "loss": 0.7318, "step": 3600 }, { "epoch": 0.02306326105567126, "grad_norm": 0.7685703039169312, "learning_rate": 9.996726511352553e-05, "loss": 0.7945, "step": 3610 }, { "epoch": 0.023127148205409963, "grad_norm": 0.9612904787063599, "learning_rate": 9.996708332384862e-05, "loss": 0.7389, "step": 3620 }, { "epoch": 0.023191035355148666, "grad_norm": 0.6820782423019409, "learning_rate": 9.996690103095995e-05, "loss": 0.7208, "step": 3630 }, { "epoch": 0.023254922504887365, "grad_norm": 0.7813957333564758, "learning_rate": 9.996671823486135e-05, "loss": 1.1023, "step": 3640 }, { "epoch": 0.023318809654626068, "grad_norm": 0.41932976245880127, "learning_rate": 9.996653493555469e-05, "loss": 0.8274, "step": 3650 }, { "epoch": 0.02338269680436477, "grad_norm": 1.1898959875106812, "learning_rate": 9.996635113304178e-05, "loss": 0.862, "step": 3660 }, { "epoch": 0.023446583954103473, "grad_norm": 1.4429035186767578, "learning_rate": 9.99661668273245e-05, "loss": 0.9036, "step": 3670 }, { "epoch": 0.023510471103842173, "grad_norm": 0.9616169929504395, "learning_rate": 9.996598201840469e-05, "loss": 0.9577, "step": 3680 }, { "epoch": 0.023574358253580875, "grad_norm": 0.8261591196060181, "learning_rate": 9.99657967062842e-05, "loss": 1.0401, "step": 3690 }, { "epoch": 0.023638245403319578, "grad_norm": 0.8811150789260864, "learning_rate": 9.996561089096493e-05, "loss": 0.8111, "step": 3700 }, { "epoch": 0.023702132553058277, "grad_norm": 0.5696326494216919, "learning_rate": 9.996542457244871e-05, "loss": 0.7984, "step": 3710 }, { "epoch": 0.02376601970279698, "grad_norm": 0.9691576361656189, "learning_rate": 9.996523775073746e-05, "loss": 0.9321, "step": 3720 }, { "epoch": 0.023829906852535682, "grad_norm": 0.7957014441490173, "learning_rate": 9.996505042583303e-05, "loss": 0.9805, "step": 3730 }, { "epoch": 0.02389379400227438, "grad_norm": 0.920781135559082, "learning_rate": 9.996486259773732e-05, "loss": 0.83, "step": 3740 }, { "epoch": 0.023957681152013084, "grad_norm": 1.661712646484375, "learning_rate": 9.996467426645221e-05, "loss": 0.7208, "step": 3750 }, { "epoch": 0.024021568301751787, "grad_norm": 0.6871623396873474, "learning_rate": 9.99644854319796e-05, "loss": 0.7284, "step": 3760 }, { "epoch": 0.024085455451490486, "grad_norm": 0.9017264246940613, "learning_rate": 9.99642960943214e-05, "loss": 1.1523, "step": 3770 }, { "epoch": 0.02414934260122919, "grad_norm": 0.894895613193512, "learning_rate": 9.996410625347953e-05, "loss": 0.7732, "step": 3780 }, { "epoch": 0.02421322975096789, "grad_norm": 0.8452061414718628, "learning_rate": 9.996391590945588e-05, "loss": 0.913, "step": 3790 }, { "epoch": 0.02427711690070659, "grad_norm": 0.7204217910766602, "learning_rate": 9.996372506225235e-05, "loss": 0.8552, "step": 3800 }, { "epoch": 0.024341004050445293, "grad_norm": 2.9905290603637695, "learning_rate": 9.996353371187091e-05, "loss": 0.8771, "step": 3810 }, { "epoch": 0.024404891200183996, "grad_norm": 0.9556611180305481, "learning_rate": 9.996334185831346e-05, "loss": 0.8103, "step": 3820 }, { "epoch": 0.024468778349922695, "grad_norm": 0.775848925113678, "learning_rate": 9.996314950158192e-05, "loss": 0.8078, "step": 3830 }, { "epoch": 0.024532665499661398, "grad_norm": 0.9693676829338074, "learning_rate": 9.996295664167824e-05, "loss": 0.9336, "step": 3840 }, { "epoch": 0.0245965526494001, "grad_norm": 1.195697546005249, "learning_rate": 9.996276327860436e-05, "loss": 1.2527, "step": 3850 }, { "epoch": 0.0246604397991388, "grad_norm": 0.8424214124679565, "learning_rate": 9.996256941236223e-05, "loss": 0.966, "step": 3860 }, { "epoch": 0.024724326948877502, "grad_norm": 0.6259729266166687, "learning_rate": 9.996237504295382e-05, "loss": 0.9363, "step": 3870 }, { "epoch": 0.024788214098616205, "grad_norm": 0.7807269096374512, "learning_rate": 9.996218017038106e-05, "loss": 0.6411, "step": 3880 }, { "epoch": 0.024852101248354908, "grad_norm": 0.6563220024108887, "learning_rate": 9.996198479464591e-05, "loss": 0.8191, "step": 3890 }, { "epoch": 0.024915988398093607, "grad_norm": 0.831295371055603, "learning_rate": 9.996178891575037e-05, "loss": 0.8589, "step": 3900 }, { "epoch": 0.02497987554783231, "grad_norm": 1.157340168952942, "learning_rate": 9.996159253369638e-05, "loss": 0.9202, "step": 3910 }, { "epoch": 0.025043762697571012, "grad_norm": 0.7473374009132385, "learning_rate": 9.996139564848594e-05, "loss": 0.829, "step": 3920 }, { "epoch": 0.02510764984730971, "grad_norm": 1.1940234899520874, "learning_rate": 9.996119826012101e-05, "loss": 0.9879, "step": 3930 }, { "epoch": 0.025171536997048414, "grad_norm": 0.7762036323547363, "learning_rate": 9.99610003686036e-05, "loss": 0.9162, "step": 3940 }, { "epoch": 0.025235424146787117, "grad_norm": 1.1545424461364746, "learning_rate": 9.996080197393569e-05, "loss": 0.9567, "step": 3950 }, { "epoch": 0.025299311296525816, "grad_norm": 0.6979715824127197, "learning_rate": 9.996060307611927e-05, "loss": 0.9685, "step": 3960 }, { "epoch": 0.02536319844626452, "grad_norm": 0.9557220339775085, "learning_rate": 9.996040367515638e-05, "loss": 1.0768, "step": 3970 }, { "epoch": 0.02542708559600322, "grad_norm": 0.8868962526321411, "learning_rate": 9.996020377104898e-05, "loss": 1.0351, "step": 3980 }, { "epoch": 0.02549097274574192, "grad_norm": 0.5406913757324219, "learning_rate": 9.996000336379913e-05, "loss": 0.9042, "step": 3990 }, { "epoch": 0.025554859895480623, "grad_norm": 0.64485764503479, "learning_rate": 9.995980245340881e-05, "loss": 1.1883, "step": 4000 }, { "epoch": 0.025618747045219326, "grad_norm": 1.2904107570648193, "learning_rate": 9.995960103988005e-05, "loss": 0.936, "step": 4010 }, { "epoch": 0.025682634194958025, "grad_norm": 1.247886061668396, "learning_rate": 9.99593991232149e-05, "loss": 0.8806, "step": 4020 }, { "epoch": 0.025746521344696727, "grad_norm": 0.9545615911483765, "learning_rate": 9.995919670341538e-05, "loss": 1.1493, "step": 4030 }, { "epoch": 0.02581040849443543, "grad_norm": 1.999590277671814, "learning_rate": 9.995899378048352e-05, "loss": 0.6754, "step": 4040 }, { "epoch": 0.02587429564417413, "grad_norm": 0.7333373427391052, "learning_rate": 9.995879035442138e-05, "loss": 0.8109, "step": 4050 }, { "epoch": 0.025938182793912832, "grad_norm": 0.7739579081535339, "learning_rate": 9.995858642523099e-05, "loss": 0.8638, "step": 4060 }, { "epoch": 0.026002069943651535, "grad_norm": 1.069405198097229, "learning_rate": 9.995838199291443e-05, "loss": 0.9313, "step": 4070 }, { "epoch": 0.026065957093390234, "grad_norm": 1.366487979888916, "learning_rate": 9.995817705747372e-05, "loss": 1.0205, "step": 4080 }, { "epoch": 0.026129844243128936, "grad_norm": 1.6458861827850342, "learning_rate": 9.995797161891097e-05, "loss": 0.9609, "step": 4090 }, { "epoch": 0.02619373139286764, "grad_norm": 1.0026328563690186, "learning_rate": 9.995776567722822e-05, "loss": 1.0618, "step": 4100 }, { "epoch": 0.02625761854260634, "grad_norm": 1.0415229797363281, "learning_rate": 9.995755923242754e-05, "loss": 0.761, "step": 4110 }, { "epoch": 0.02632150569234504, "grad_norm": 1.169027328491211, "learning_rate": 9.995735228451103e-05, "loss": 0.92, "step": 4120 }, { "epoch": 0.026385392842083744, "grad_norm": 1.2535079717636108, "learning_rate": 9.995714483348076e-05, "loss": 0.8859, "step": 4130 }, { "epoch": 0.026449279991822446, "grad_norm": 0.6948879957199097, "learning_rate": 9.995693687933883e-05, "loss": 0.7189, "step": 4140 }, { "epoch": 0.026513167141561145, "grad_norm": 0.7670521140098572, "learning_rate": 9.995672842208731e-05, "loss": 1.0072, "step": 4150 }, { "epoch": 0.026577054291299848, "grad_norm": 0.8560011982917786, "learning_rate": 9.995651946172833e-05, "loss": 1.1125, "step": 4160 }, { "epoch": 0.02664094144103855, "grad_norm": 0.762663722038269, "learning_rate": 9.995630999826397e-05, "loss": 0.9922, "step": 4170 }, { "epoch": 0.02670482859077725, "grad_norm": 1.432151198387146, "learning_rate": 9.995610003169635e-05, "loss": 1.0305, "step": 4180 }, { "epoch": 0.026768715740515953, "grad_norm": 1.0463693141937256, "learning_rate": 9.99558895620276e-05, "loss": 0.9721, "step": 4190 }, { "epoch": 0.026832602890254655, "grad_norm": 0.6497074961662292, "learning_rate": 9.99556785892598e-05, "loss": 0.8886, "step": 4200 }, { "epoch": 0.026896490039993354, "grad_norm": 0.8750442266464233, "learning_rate": 9.995546711339512e-05, "loss": 1.1452, "step": 4210 }, { "epoch": 0.026960377189732057, "grad_norm": 0.5352575778961182, "learning_rate": 9.995525513443566e-05, "loss": 1.3216, "step": 4220 }, { "epoch": 0.02702426433947076, "grad_norm": 0.7286153435707092, "learning_rate": 9.995504265238357e-05, "loss": 0.9927, "step": 4230 }, { "epoch": 0.02708815148920946, "grad_norm": 1.133766770362854, "learning_rate": 9.995482966724098e-05, "loss": 0.9198, "step": 4240 }, { "epoch": 0.02715203863894816, "grad_norm": 1.060925006866455, "learning_rate": 9.995461617901004e-05, "loss": 0.984, "step": 4250 }, { "epoch": 0.027215925788686864, "grad_norm": 0.8017410039901733, "learning_rate": 9.995440218769288e-05, "loss": 0.8302, "step": 4260 }, { "epoch": 0.027279812938425563, "grad_norm": 0.6474617719650269, "learning_rate": 9.995418769329171e-05, "loss": 0.8526, "step": 4270 }, { "epoch": 0.027343700088164266, "grad_norm": 0.7051038146018982, "learning_rate": 9.995397269580862e-05, "loss": 0.6267, "step": 4280 }, { "epoch": 0.02740758723790297, "grad_norm": 0.8523268699645996, "learning_rate": 9.995375719524582e-05, "loss": 0.7513, "step": 4290 }, { "epoch": 0.02747147438764167, "grad_norm": 0.5515130162239075, "learning_rate": 9.995354119160546e-05, "loss": 0.8045, "step": 4300 }, { "epoch": 0.02753536153738037, "grad_norm": 0.6105387806892395, "learning_rate": 9.995332468488974e-05, "loss": 0.9739, "step": 4310 }, { "epoch": 0.027599248687119073, "grad_norm": 0.9270747303962708, "learning_rate": 9.99531076751008e-05, "loss": 0.8789, "step": 4320 }, { "epoch": 0.027663135836857776, "grad_norm": 0.46213430166244507, "learning_rate": 9.995289016224087e-05, "loss": 0.8914, "step": 4330 }, { "epoch": 0.027727022986596475, "grad_norm": 0.8763656616210938, "learning_rate": 9.995267214631213e-05, "loss": 0.9085, "step": 4340 }, { "epoch": 0.027790910136335178, "grad_norm": 1.6064941883087158, "learning_rate": 9.995245362731676e-05, "loss": 1.0047, "step": 4350 }, { "epoch": 0.02785479728607388, "grad_norm": 1.2199528217315674, "learning_rate": 9.995223460525696e-05, "loss": 0.749, "step": 4360 }, { "epoch": 0.02791868443581258, "grad_norm": 0.9066464304924011, "learning_rate": 9.995201508013494e-05, "loss": 1.0363, "step": 4370 }, { "epoch": 0.027982571585551282, "grad_norm": 0.8760823011398315, "learning_rate": 9.995179505195291e-05, "loss": 1.1568, "step": 4380 }, { "epoch": 0.028046458735289985, "grad_norm": 0.6646769046783447, "learning_rate": 9.99515745207131e-05, "loss": 1.3106, "step": 4390 }, { "epoch": 0.028110345885028684, "grad_norm": 0.7811892032623291, "learning_rate": 9.995135348641771e-05, "loss": 0.8003, "step": 4400 }, { "epoch": 0.028174233034767387, "grad_norm": 1.2583142518997192, "learning_rate": 9.995113194906899e-05, "loss": 0.934, "step": 4410 }, { "epoch": 0.02823812018450609, "grad_norm": 1.4330214262008667, "learning_rate": 9.995090990866915e-05, "loss": 0.8924, "step": 4420 }, { "epoch": 0.02830200733424479, "grad_norm": 0.7987727522850037, "learning_rate": 9.995068736522044e-05, "loss": 1.257, "step": 4430 }, { "epoch": 0.02836589448398349, "grad_norm": 0.90681391954422, "learning_rate": 9.995046431872507e-05, "loss": 0.9746, "step": 4440 }, { "epoch": 0.028429781633722194, "grad_norm": 1.1222659349441528, "learning_rate": 9.995024076918534e-05, "loss": 0.8702, "step": 4450 }, { "epoch": 0.028493668783460893, "grad_norm": 1.4470833539962769, "learning_rate": 9.995001671660347e-05, "loss": 0.9072, "step": 4460 }, { "epoch": 0.028557555933199596, "grad_norm": 0.9265400767326355, "learning_rate": 9.994979216098171e-05, "loss": 0.9651, "step": 4470 }, { "epoch": 0.0286214430829383, "grad_norm": 0.40936312079429626, "learning_rate": 9.994956710232232e-05, "loss": 0.9576, "step": 4480 }, { "epoch": 0.028685330232676998, "grad_norm": 0.7994583249092102, "learning_rate": 9.99493415406276e-05, "loss": 0.8773, "step": 4490 }, { "epoch": 0.0287492173824157, "grad_norm": 0.8965862989425659, "learning_rate": 9.994911547589979e-05, "loss": 0.9247, "step": 4500 }, { "epoch": 0.028813104532154403, "grad_norm": 0.5341432690620422, "learning_rate": 9.994888890814116e-05, "loss": 0.9735, "step": 4510 }, { "epoch": 0.028876991681893106, "grad_norm": 0.796406090259552, "learning_rate": 9.994866183735403e-05, "loss": 1.0474, "step": 4520 }, { "epoch": 0.028940878831631805, "grad_norm": 0.6537384986877441, "learning_rate": 9.994843426354064e-05, "loss": 0.7858, "step": 4530 }, { "epoch": 0.029004765981370507, "grad_norm": 0.7321698665618896, "learning_rate": 9.994820618670332e-05, "loss": 1.017, "step": 4540 }, { "epoch": 0.02906865313110921, "grad_norm": 0.9634839296340942, "learning_rate": 9.994797760684435e-05, "loss": 0.9671, "step": 4550 }, { "epoch": 0.02913254028084791, "grad_norm": 0.7006617784500122, "learning_rate": 9.994774852396603e-05, "loss": 1.053, "step": 4560 }, { "epoch": 0.029196427430586612, "grad_norm": 0.7608281373977661, "learning_rate": 9.994751893807068e-05, "loss": 0.7445, "step": 4570 }, { "epoch": 0.029260314580325315, "grad_norm": 1.0257230997085571, "learning_rate": 9.99472888491606e-05, "loss": 0.7304, "step": 4580 }, { "epoch": 0.029324201730064014, "grad_norm": 0.6806319355964661, "learning_rate": 9.994705825723811e-05, "loss": 1.1287, "step": 4590 }, { "epoch": 0.029388088879802717, "grad_norm": 1.2967884540557861, "learning_rate": 9.994682716230552e-05, "loss": 0.948, "step": 4600 }, { "epoch": 0.02945197602954142, "grad_norm": 1.0324482917785645, "learning_rate": 9.994659556436518e-05, "loss": 1.0642, "step": 4610 }, { "epoch": 0.02951586317928012, "grad_norm": 0.5615150928497314, "learning_rate": 9.994636346341943e-05, "loss": 0.6903, "step": 4620 }, { "epoch": 0.02957975032901882, "grad_norm": 0.6164289712905884, "learning_rate": 9.994613085947058e-05, "loss": 0.8748, "step": 4630 }, { "epoch": 0.029643637478757524, "grad_norm": 0.9414746761322021, "learning_rate": 9.994589775252097e-05, "loss": 0.9157, "step": 4640 }, { "epoch": 0.029707524628496223, "grad_norm": 0.8447662591934204, "learning_rate": 9.994566414257297e-05, "loss": 1.1894, "step": 4650 }, { "epoch": 0.029771411778234926, "grad_norm": 0.8695082664489746, "learning_rate": 9.994543002962892e-05, "loss": 1.173, "step": 4660 }, { "epoch": 0.029835298927973628, "grad_norm": 1.3696662187576294, "learning_rate": 9.994519541369119e-05, "loss": 0.8384, "step": 4670 }, { "epoch": 0.029899186077712327, "grad_norm": 0.6377172470092773, "learning_rate": 9.994496029476213e-05, "loss": 0.8018, "step": 4680 }, { "epoch": 0.02996307322745103, "grad_norm": 1.396103858947754, "learning_rate": 9.99447246728441e-05, "loss": 0.8777, "step": 4690 }, { "epoch": 0.030026960377189733, "grad_norm": 0.741669774055481, "learning_rate": 9.99444885479395e-05, "loss": 1.1431, "step": 4700 }, { "epoch": 0.030090847526928435, "grad_norm": 0.8591098189353943, "learning_rate": 9.994425192005067e-05, "loss": 1.0976, "step": 4710 }, { "epoch": 0.030154734676667135, "grad_norm": 0.6573971509933472, "learning_rate": 9.994401478918003e-05, "loss": 0.9112, "step": 4720 }, { "epoch": 0.030218621826405837, "grad_norm": 0.7204700708389282, "learning_rate": 9.994377715532996e-05, "loss": 0.8728, "step": 4730 }, { "epoch": 0.03028250897614454, "grad_norm": 1.0097802877426147, "learning_rate": 9.994353901850283e-05, "loss": 1.0269, "step": 4740 }, { "epoch": 0.03034639612588324, "grad_norm": 1.4376720190048218, "learning_rate": 9.994330037870107e-05, "loss": 0.8102, "step": 4750 }, { "epoch": 0.03041028327562194, "grad_norm": 0.7325295209884644, "learning_rate": 9.994306123592704e-05, "loss": 0.9336, "step": 4760 }, { "epoch": 0.030474170425360644, "grad_norm": 0.7449788451194763, "learning_rate": 9.994282159018323e-05, "loss": 0.8539, "step": 4770 }, { "epoch": 0.030538057575099344, "grad_norm": 0.7632824778556824, "learning_rate": 9.994258144147195e-05, "loss": 0.6919, "step": 4780 }, { "epoch": 0.030601944724838046, "grad_norm": 0.9371885657310486, "learning_rate": 9.99423407897957e-05, "loss": 0.8518, "step": 4790 }, { "epoch": 0.03066583187457675, "grad_norm": 0.9703862071037292, "learning_rate": 9.994209963515684e-05, "loss": 0.8882, "step": 4800 }, { "epoch": 0.030729719024315448, "grad_norm": 0.6255933046340942, "learning_rate": 9.994185797755787e-05, "loss": 0.9969, "step": 4810 }, { "epoch": 0.03079360617405415, "grad_norm": 2.328423261642456, "learning_rate": 9.994161581700115e-05, "loss": 0.8677, "step": 4820 }, { "epoch": 0.030857493323792853, "grad_norm": 0.8444818258285522, "learning_rate": 9.994137315348917e-05, "loss": 0.9273, "step": 4830 }, { "epoch": 0.030921380473531553, "grad_norm": 0.6778904795646667, "learning_rate": 9.994112998702434e-05, "loss": 1.136, "step": 4840 }, { "epoch": 0.030985267623270255, "grad_norm": 0.7254196405410767, "learning_rate": 9.994088631760914e-05, "loss": 0.9659, "step": 4850 }, { "epoch": 0.031049154773008958, "grad_norm": 2.3594653606414795, "learning_rate": 9.994064214524602e-05, "loss": 0.9981, "step": 4860 }, { "epoch": 0.031113041922747657, "grad_norm": 0.9346766471862793, "learning_rate": 9.994039746993742e-05, "loss": 1.2296, "step": 4870 }, { "epoch": 0.03117692907248636, "grad_norm": 1.1860244274139404, "learning_rate": 9.994015229168581e-05, "loss": 1.0124, "step": 4880 }, { "epoch": 0.031240816222225062, "grad_norm": 0.977857232093811, "learning_rate": 9.993990661049366e-05, "loss": 0.8632, "step": 4890 }, { "epoch": 0.03130470337196376, "grad_norm": 0.9144421815872192, "learning_rate": 9.993966042636345e-05, "loss": 0.9927, "step": 4900 }, { "epoch": 0.03136859052170247, "grad_norm": 1.034429669380188, "learning_rate": 9.993941373929764e-05, "loss": 0.8818, "step": 4910 }, { "epoch": 0.03143247767144117, "grad_norm": 0.4996863007545471, "learning_rate": 9.993916654929876e-05, "loss": 0.6711, "step": 4920 }, { "epoch": 0.031496364821179866, "grad_norm": 0.6924141049385071, "learning_rate": 9.993891885636925e-05, "loss": 0.9002, "step": 4930 }, { "epoch": 0.03156025197091857, "grad_norm": 0.7536648511886597, "learning_rate": 9.993867066051163e-05, "loss": 1.0268, "step": 4940 }, { "epoch": 0.03162413912065727, "grad_norm": 1.059717059135437, "learning_rate": 9.993842196172838e-05, "loss": 1.4731, "step": 4950 }, { "epoch": 0.03168802627039597, "grad_norm": 0.9447365999221802, "learning_rate": 9.993817276002203e-05, "loss": 0.8936, "step": 4960 }, { "epoch": 0.03175191342013468, "grad_norm": 2.9407436847686768, "learning_rate": 9.993792305539507e-05, "loss": 0.9535, "step": 4970 }, { "epoch": 0.031815800569873376, "grad_norm": 0.9434256553649902, "learning_rate": 9.993767284785003e-05, "loss": 0.9241, "step": 4980 }, { "epoch": 0.031879687719612075, "grad_norm": 0.5843566060066223, "learning_rate": 9.993742213738942e-05, "loss": 1.1005, "step": 4990 }, { "epoch": 0.03194357486935078, "grad_norm": 0.5183364748954773, "learning_rate": 9.993717092401577e-05, "loss": 1.0861, "step": 5000 }, { "epoch": 0.03200746201908948, "grad_norm": 0.716195821762085, "learning_rate": 9.99369192077316e-05, "loss": 1.0468, "step": 5010 }, { "epoch": 0.03207134916882818, "grad_norm": 0.6783444285392761, "learning_rate": 9.993666698853946e-05, "loss": 0.9456, "step": 5020 }, { "epoch": 0.032135236318566886, "grad_norm": 0.8905858397483826, "learning_rate": 9.99364142664419e-05, "loss": 0.9607, "step": 5030 }, { "epoch": 0.032199123468305585, "grad_norm": 1.1394882202148438, "learning_rate": 9.993616104144141e-05, "loss": 0.7845, "step": 5040 }, { "epoch": 0.032263010618044284, "grad_norm": 0.9417553544044495, "learning_rate": 9.99359073135406e-05, "loss": 0.9869, "step": 5050 }, { "epoch": 0.03232689776778299, "grad_norm": 0.6557328104972839, "learning_rate": 9.993565308274199e-05, "loss": 1.132, "step": 5060 }, { "epoch": 0.03239078491752169, "grad_norm": 1.505283236503601, "learning_rate": 9.993539834904816e-05, "loss": 0.6938, "step": 5070 }, { "epoch": 0.03245467206726039, "grad_norm": 0.7740111947059631, "learning_rate": 9.993514311246166e-05, "loss": 0.9475, "step": 5080 }, { "epoch": 0.032518559216999095, "grad_norm": 1.1379529237747192, "learning_rate": 9.993488737298509e-05, "loss": 0.7626, "step": 5090 }, { "epoch": 0.032582446366737794, "grad_norm": 0.5552259683609009, "learning_rate": 9.993463113062099e-05, "loss": 0.9058, "step": 5100 }, { "epoch": 0.03264633351647649, "grad_norm": 0.7772766351699829, "learning_rate": 9.993437438537194e-05, "loss": 1.0914, "step": 5110 }, { "epoch": 0.0327102206662152, "grad_norm": 0.7294765114784241, "learning_rate": 9.993411713724056e-05, "loss": 0.9447, "step": 5120 }, { "epoch": 0.0327741078159539, "grad_norm": 0.8332342505455017, "learning_rate": 9.993385938622942e-05, "loss": 0.7607, "step": 5130 }, { "epoch": 0.0328379949656926, "grad_norm": 0.759425163269043, "learning_rate": 9.993360113234111e-05, "loss": 0.8551, "step": 5140 }, { "epoch": 0.032901882115431304, "grad_norm": 0.8883112668991089, "learning_rate": 9.993334237557825e-05, "loss": 0.815, "step": 5150 }, { "epoch": 0.03296576926517, "grad_norm": 0.5959163308143616, "learning_rate": 9.993308311594343e-05, "loss": 1.0528, "step": 5160 }, { "epoch": 0.0330296564149087, "grad_norm": 1.0523767471313477, "learning_rate": 9.993282335343925e-05, "loss": 1.0073, "step": 5170 }, { "epoch": 0.03309354356464741, "grad_norm": 0.8208662271499634, "learning_rate": 9.993256308806835e-05, "loss": 0.8802, "step": 5180 }, { "epoch": 0.03315743071438611, "grad_norm": 0.7097920775413513, "learning_rate": 9.993230231983334e-05, "loss": 1.0191, "step": 5190 }, { "epoch": 0.03322131786412481, "grad_norm": 0.7505048513412476, "learning_rate": 9.993204104873686e-05, "loss": 1.0811, "step": 5200 }, { "epoch": 0.03328520501386351, "grad_norm": 0.9009354710578918, "learning_rate": 9.993177927478152e-05, "loss": 0.9172, "step": 5210 }, { "epoch": 0.03334909216360221, "grad_norm": 0.681164562702179, "learning_rate": 9.993151699796996e-05, "loss": 0.8789, "step": 5220 }, { "epoch": 0.03341297931334091, "grad_norm": 0.9279341101646423, "learning_rate": 9.993125421830484e-05, "loss": 0.7841, "step": 5230 }, { "epoch": 0.03347686646307962, "grad_norm": 0.8030073642730713, "learning_rate": 9.993099093578879e-05, "loss": 1.1084, "step": 5240 }, { "epoch": 0.033540753612818316, "grad_norm": 0.8783805966377258, "learning_rate": 9.993072715042447e-05, "loss": 0.9935, "step": 5250 }, { "epoch": 0.03360464076255702, "grad_norm": 1.2054526805877686, "learning_rate": 9.99304628622145e-05, "loss": 1.0037, "step": 5260 }, { "epoch": 0.03366852791229572, "grad_norm": 0.7649316787719727, "learning_rate": 9.99301980711616e-05, "loss": 0.7857, "step": 5270 }, { "epoch": 0.03373241506203442, "grad_norm": 1.0451691150665283, "learning_rate": 9.992993277726841e-05, "loss": 1.0657, "step": 5280 }, { "epoch": 0.03379630221177313, "grad_norm": 1.1677067279815674, "learning_rate": 9.99296669805376e-05, "loss": 0.8225, "step": 5290 }, { "epoch": 0.033860189361511826, "grad_norm": 0.8038674592971802, "learning_rate": 9.992940068097184e-05, "loss": 0.8793, "step": 5300 }, { "epoch": 0.033924076511250525, "grad_norm": 0.8285770416259766, "learning_rate": 9.992913387857383e-05, "loss": 1.175, "step": 5310 }, { "epoch": 0.03398796366098923, "grad_norm": 1.8478131294250488, "learning_rate": 9.992886657334624e-05, "loss": 1.1025, "step": 5320 }, { "epoch": 0.03405185081072793, "grad_norm": 0.6567774415016174, "learning_rate": 9.992859876529177e-05, "loss": 0.979, "step": 5330 }, { "epoch": 0.03411573796046663, "grad_norm": 1.635343074798584, "learning_rate": 9.992833045441312e-05, "loss": 0.9373, "step": 5340 }, { "epoch": 0.034179625110205336, "grad_norm": 0.6428894400596619, "learning_rate": 9.992806164071298e-05, "loss": 0.9726, "step": 5350 }, { "epoch": 0.034243512259944035, "grad_norm": 0.9768702983856201, "learning_rate": 9.992779232419407e-05, "loss": 1.1691, "step": 5360 }, { "epoch": 0.034307399409682734, "grad_norm": 0.9969322681427002, "learning_rate": 9.99275225048591e-05, "loss": 0.9453, "step": 5370 }, { "epoch": 0.03437128655942144, "grad_norm": 1.498533010482788, "learning_rate": 9.992725218271078e-05, "loss": 0.9161, "step": 5380 }, { "epoch": 0.03443517370916014, "grad_norm": 0.6910355687141418, "learning_rate": 9.992698135775185e-05, "loss": 0.8751, "step": 5390 }, { "epoch": 0.03449906085889884, "grad_norm": 0.7530591487884521, "learning_rate": 9.992671002998502e-05, "loss": 1.0573, "step": 5400 }, { "epoch": 0.034562948008637545, "grad_norm": 0.9451344013214111, "learning_rate": 9.992643819941301e-05, "loss": 0.8682, "step": 5410 }, { "epoch": 0.034626835158376244, "grad_norm": 1.7209718227386475, "learning_rate": 9.992616586603859e-05, "loss": 0.8826, "step": 5420 }, { "epoch": 0.034690722308114944, "grad_norm": 0.7069958448410034, "learning_rate": 9.992589302986448e-05, "loss": 0.8965, "step": 5430 }, { "epoch": 0.03475460945785365, "grad_norm": 0.6233651041984558, "learning_rate": 9.992561969089345e-05, "loss": 0.9789, "step": 5440 }, { "epoch": 0.03481849660759235, "grad_norm": 0.7849096655845642, "learning_rate": 9.992534584912823e-05, "loss": 1.0208, "step": 5450 }, { "epoch": 0.03488238375733105, "grad_norm": 0.7504194378852844, "learning_rate": 9.992507150457158e-05, "loss": 0.7951, "step": 5460 }, { "epoch": 0.034946270907069754, "grad_norm": 1.141536831855774, "learning_rate": 9.992479665722627e-05, "loss": 0.7366, "step": 5470 }, { "epoch": 0.03501015805680845, "grad_norm": 0.8907060623168945, "learning_rate": 9.992452130709507e-05, "loss": 1.1784, "step": 5480 }, { "epoch": 0.03507404520654715, "grad_norm": 0.9252203106880188, "learning_rate": 9.992424545418074e-05, "loss": 0.9195, "step": 5490 }, { "epoch": 0.03513793235628586, "grad_norm": 0.9670997262001038, "learning_rate": 9.992396909848608e-05, "loss": 0.8106, "step": 5500 }, { "epoch": 0.03520181950602456, "grad_norm": 0.9867545962333679, "learning_rate": 9.992369224001386e-05, "loss": 0.8976, "step": 5510 }, { "epoch": 0.03526570665576326, "grad_norm": 1.0230097770690918, "learning_rate": 9.992341487876686e-05, "loss": 0.8986, "step": 5520 }, { "epoch": 0.03532959380550196, "grad_norm": 0.7679455876350403, "learning_rate": 9.99231370147479e-05, "loss": 0.9554, "step": 5530 }, { "epoch": 0.03539348095524066, "grad_norm": 0.6599009037017822, "learning_rate": 9.992285864795974e-05, "loss": 0.8623, "step": 5540 }, { "epoch": 0.03545736810497936, "grad_norm": 1.114585041999817, "learning_rate": 9.992257977840521e-05, "loss": 1.0822, "step": 5550 }, { "epoch": 0.03552125525471807, "grad_norm": 0.6967979073524475, "learning_rate": 9.992230040608713e-05, "loss": 1.0806, "step": 5560 }, { "epoch": 0.03558514240445677, "grad_norm": 2.6597609519958496, "learning_rate": 9.992202053100826e-05, "loss": 0.958, "step": 5570 }, { "epoch": 0.035649029554195466, "grad_norm": 0.7488609552383423, "learning_rate": 9.992174015317148e-05, "loss": 0.6722, "step": 5580 }, { "epoch": 0.03571291670393417, "grad_norm": 1.290249228477478, "learning_rate": 9.992145927257958e-05, "loss": 1.1259, "step": 5590 }, { "epoch": 0.03577680385367287, "grad_norm": 0.7017959952354431, "learning_rate": 9.99211778892354e-05, "loss": 0.9599, "step": 5600 }, { "epoch": 0.03584069100341157, "grad_norm": 0.6516076922416687, "learning_rate": 9.992089600314179e-05, "loss": 1.0698, "step": 5610 }, { "epoch": 0.03590457815315028, "grad_norm": 0.860114336013794, "learning_rate": 9.992061361430153e-05, "loss": 0.8568, "step": 5620 }, { "epoch": 0.035968465302888976, "grad_norm": 0.6573166847229004, "learning_rate": 9.992033072271754e-05, "loss": 0.9076, "step": 5630 }, { "epoch": 0.036032352452627675, "grad_norm": 1.0699505805969238, "learning_rate": 9.992004732839261e-05, "loss": 0.8982, "step": 5640 }, { "epoch": 0.03609623960236638, "grad_norm": 0.8025882840156555, "learning_rate": 9.991976343132963e-05, "loss": 0.9928, "step": 5650 }, { "epoch": 0.03616012675210508, "grad_norm": 0.7112436294555664, "learning_rate": 9.991947903153143e-05, "loss": 1.0748, "step": 5660 }, { "epoch": 0.036224013901843787, "grad_norm": 0.8061192631721497, "learning_rate": 9.991919412900091e-05, "loss": 1.0776, "step": 5670 }, { "epoch": 0.036287901051582486, "grad_norm": 3.550689220428467, "learning_rate": 9.99189087237409e-05, "loss": 0.8897, "step": 5680 }, { "epoch": 0.036351788201321185, "grad_norm": 0.6956158876419067, "learning_rate": 9.991862281575431e-05, "loss": 0.9601, "step": 5690 }, { "epoch": 0.03641567535105989, "grad_norm": 2.4917612075805664, "learning_rate": 9.991833640504397e-05, "loss": 1.2047, "step": 5700 }, { "epoch": 0.03647956250079859, "grad_norm": 0.8588683009147644, "learning_rate": 9.991804949161284e-05, "loss": 0.8791, "step": 5710 }, { "epoch": 0.03654344965053729, "grad_norm": 1.8225440979003906, "learning_rate": 9.991776207546373e-05, "loss": 1.1723, "step": 5720 }, { "epoch": 0.036607336800275996, "grad_norm": 0.6750584244728088, "learning_rate": 9.991747415659959e-05, "loss": 1.0424, "step": 5730 }, { "epoch": 0.036671223950014695, "grad_norm": 1.0814725160598755, "learning_rate": 9.99171857350233e-05, "loss": 0.7245, "step": 5740 }, { "epoch": 0.036735111099753394, "grad_norm": 0.6731589436531067, "learning_rate": 9.991689681073776e-05, "loss": 0.7107, "step": 5750 }, { "epoch": 0.0367989982494921, "grad_norm": 1.090672492980957, "learning_rate": 9.991660738374589e-05, "loss": 1.1092, "step": 5760 }, { "epoch": 0.0368628853992308, "grad_norm": 0.9638064503669739, "learning_rate": 9.991631745405059e-05, "loss": 1.0152, "step": 5770 }, { "epoch": 0.0369267725489695, "grad_norm": 0.6535985469818115, "learning_rate": 9.99160270216548e-05, "loss": 0.9731, "step": 5780 }, { "epoch": 0.036990659698708205, "grad_norm": 0.8303619623184204, "learning_rate": 9.991573608656144e-05, "loss": 1.0109, "step": 5790 }, { "epoch": 0.037054546848446904, "grad_norm": 0.8238627910614014, "learning_rate": 9.991544464877342e-05, "loss": 1.1488, "step": 5800 }, { "epoch": 0.0371184339981856, "grad_norm": 0.7430026531219482, "learning_rate": 9.991515270829369e-05, "loss": 0.9808, "step": 5810 }, { "epoch": 0.03718232114792431, "grad_norm": 1.1487149000167847, "learning_rate": 9.99148602651252e-05, "loss": 0.8169, "step": 5820 }, { "epoch": 0.03724620829766301, "grad_norm": 0.8699382543563843, "learning_rate": 9.991456731927087e-05, "loss": 0.9892, "step": 5830 }, { "epoch": 0.03731009544740171, "grad_norm": 0.92801833152771, "learning_rate": 9.991427387073367e-05, "loss": 1.1314, "step": 5840 }, { "epoch": 0.037373982597140414, "grad_norm": 0.9899303913116455, "learning_rate": 9.991397991951656e-05, "loss": 0.7899, "step": 5850 }, { "epoch": 0.03743786974687911, "grad_norm": 0.6273317933082581, "learning_rate": 9.991368546562249e-05, "loss": 1.0946, "step": 5860 }, { "epoch": 0.03750175689661781, "grad_norm": 1.1781492233276367, "learning_rate": 9.991339050905442e-05, "loss": 0.9631, "step": 5870 }, { "epoch": 0.03756564404635652, "grad_norm": 1.5557823181152344, "learning_rate": 9.991309504981533e-05, "loss": 0.8755, "step": 5880 }, { "epoch": 0.03762953119609522, "grad_norm": 1.418256402015686, "learning_rate": 9.991279908790818e-05, "loss": 1.0737, "step": 5890 }, { "epoch": 0.037693418345833916, "grad_norm": 1.275620460510254, "learning_rate": 9.991250262333597e-05, "loss": 0.7169, "step": 5900 }, { "epoch": 0.03775730549557262, "grad_norm": 0.9257436394691467, "learning_rate": 9.991220565610169e-05, "loss": 1.0117, "step": 5910 }, { "epoch": 0.03782119264531132, "grad_norm": 0.6086337566375732, "learning_rate": 9.99119081862083e-05, "loss": 0.9319, "step": 5920 }, { "epoch": 0.03788507979505002, "grad_norm": 1.3489453792572021, "learning_rate": 9.991161021365882e-05, "loss": 1.1381, "step": 5930 }, { "epoch": 0.03794896694478873, "grad_norm": 0.7379159927368164, "learning_rate": 9.991131173845624e-05, "loss": 1.1553, "step": 5940 }, { "epoch": 0.038012854094527426, "grad_norm": 0.8401197195053101, "learning_rate": 9.991101276060358e-05, "loss": 0.8074, "step": 5950 }, { "epoch": 0.038076741244266125, "grad_norm": 1.0958367586135864, "learning_rate": 9.991071328010384e-05, "loss": 1.1319, "step": 5960 }, { "epoch": 0.03814062839400483, "grad_norm": 0.9215190410614014, "learning_rate": 9.991041329696005e-05, "loss": 1.1632, "step": 5970 }, { "epoch": 0.03820451554374353, "grad_norm": 1.5827072858810425, "learning_rate": 9.991011281117521e-05, "loss": 0.9153, "step": 5980 }, { "epoch": 0.03826840269348223, "grad_norm": 0.67779141664505, "learning_rate": 9.990981182275236e-05, "loss": 0.968, "step": 5990 }, { "epoch": 0.038332289843220936, "grad_norm": 1.1568547487258911, "learning_rate": 9.990951033169451e-05, "loss": 0.9781, "step": 6000 }, { "epoch": 0.038396176992959635, "grad_norm": 0.7177845239639282, "learning_rate": 9.990920833800472e-05, "loss": 0.9362, "step": 6010 }, { "epoch": 0.038460064142698334, "grad_norm": 0.7867560982704163, "learning_rate": 9.990890584168604e-05, "loss": 0.8053, "step": 6020 }, { "epoch": 0.03852395129243704, "grad_norm": 0.9753761887550354, "learning_rate": 9.990860284274148e-05, "loss": 0.9772, "step": 6030 }, { "epoch": 0.03858783844217574, "grad_norm": 1.043918490409851, "learning_rate": 9.990829934117413e-05, "loss": 1.0062, "step": 6040 }, { "epoch": 0.03865172559191444, "grad_norm": 0.6653173565864563, "learning_rate": 9.990799533698703e-05, "loss": 0.946, "step": 6050 }, { "epoch": 0.038715612741653145, "grad_norm": 0.6706075072288513, "learning_rate": 9.990769083018322e-05, "loss": 0.9202, "step": 6060 }, { "epoch": 0.038779499891391844, "grad_norm": 1.005500078201294, "learning_rate": 9.99073858207658e-05, "loss": 1.2583, "step": 6070 }, { "epoch": 0.03884338704113055, "grad_norm": 0.9135782122612, "learning_rate": 9.990708030873783e-05, "loss": 1.1592, "step": 6080 }, { "epoch": 0.03890727419086925, "grad_norm": 0.8927890658378601, "learning_rate": 9.990677429410237e-05, "loss": 1.0624, "step": 6090 }, { "epoch": 0.03897116134060795, "grad_norm": 1.1654282808303833, "learning_rate": 9.990646777686255e-05, "loss": 0.8439, "step": 6100 }, { "epoch": 0.039035048490346655, "grad_norm": 0.5983591079711914, "learning_rate": 9.99061607570214e-05, "loss": 0.8542, "step": 6110 }, { "epoch": 0.039098935640085354, "grad_norm": 0.9841302633285522, "learning_rate": 9.990585323458204e-05, "loss": 1.0852, "step": 6120 }, { "epoch": 0.03916282278982405, "grad_norm": 1.078748106956482, "learning_rate": 9.990554520954755e-05, "loss": 0.8696, "step": 6130 }, { "epoch": 0.03922670993956276, "grad_norm": 0.9046047925949097, "learning_rate": 9.990523668192106e-05, "loss": 0.9837, "step": 6140 }, { "epoch": 0.03929059708930146, "grad_norm": 0.6112083196640015, "learning_rate": 9.990492765170567e-05, "loss": 1.445, "step": 6150 }, { "epoch": 0.03935448423904016, "grad_norm": 0.8192219138145447, "learning_rate": 9.990461811890447e-05, "loss": 0.7521, "step": 6160 }, { "epoch": 0.039418371388778864, "grad_norm": 1.2310230731964111, "learning_rate": 9.99043080835206e-05, "loss": 0.7873, "step": 6170 }, { "epoch": 0.03948225853851756, "grad_norm": 0.5166013836860657, "learning_rate": 9.990399754555717e-05, "loss": 1.0726, "step": 6180 }, { "epoch": 0.03954614568825626, "grad_norm": 0.8496847748756409, "learning_rate": 9.990368650501731e-05, "loss": 0.8312, "step": 6190 }, { "epoch": 0.03961003283799497, "grad_norm": 1.445300579071045, "learning_rate": 9.990337496190416e-05, "loss": 0.8953, "step": 6200 }, { "epoch": 0.03967391998773367, "grad_norm": 2.797938108444214, "learning_rate": 9.990306291622085e-05, "loss": 0.8305, "step": 6210 }, { "epoch": 0.03973780713747237, "grad_norm": 0.5867908596992493, "learning_rate": 9.990275036797054e-05, "loss": 0.7997, "step": 6220 }, { "epoch": 0.03980169428721107, "grad_norm": 0.5474823713302612, "learning_rate": 9.990243731715634e-05, "loss": 1.3339, "step": 6230 }, { "epoch": 0.03986558143694977, "grad_norm": 1.1061484813690186, "learning_rate": 9.990212376378143e-05, "loss": 0.8513, "step": 6240 }, { "epoch": 0.03992946858668847, "grad_norm": 1.0674853324890137, "learning_rate": 9.990180970784897e-05, "loss": 1.0124, "step": 6250 }, { "epoch": 0.03999335573642718, "grad_norm": 0.7848487496376038, "learning_rate": 9.99014951493621e-05, "loss": 0.9535, "step": 6260 }, { "epoch": 0.04005724288616588, "grad_norm": 0.7292889356613159, "learning_rate": 9.9901180088324e-05, "loss": 1.1792, "step": 6270 }, { "epoch": 0.040121130035904576, "grad_norm": 0.7035486698150635, "learning_rate": 9.990086452473785e-05, "loss": 0.8471, "step": 6280 }, { "epoch": 0.04018501718564328, "grad_norm": 0.6115634441375732, "learning_rate": 9.990054845860683e-05, "loss": 1.1244, "step": 6290 }, { "epoch": 0.04024890433538198, "grad_norm": 2.171461582183838, "learning_rate": 9.990023188993412e-05, "loss": 1.0045, "step": 6300 }, { "epoch": 0.04031279148512068, "grad_norm": 0.8362821936607361, "learning_rate": 9.989991481872292e-05, "loss": 1.0352, "step": 6310 }, { "epoch": 0.040376678634859386, "grad_norm": 0.8392160534858704, "learning_rate": 9.989959724497638e-05, "loss": 0.785, "step": 6320 }, { "epoch": 0.040440565784598086, "grad_norm": 0.4593855142593384, "learning_rate": 9.989927916869773e-05, "loss": 0.8819, "step": 6330 }, { "epoch": 0.040504452934336785, "grad_norm": 0.6949111223220825, "learning_rate": 9.98989605898902e-05, "loss": 0.9824, "step": 6340 }, { "epoch": 0.04056834008407549, "grad_norm": 0.6681846976280212, "learning_rate": 9.989864150855693e-05, "loss": 0.7795, "step": 6350 }, { "epoch": 0.04063222723381419, "grad_norm": 0.9278548359870911, "learning_rate": 9.989832192470118e-05, "loss": 0.9975, "step": 6360 }, { "epoch": 0.04069611438355289, "grad_norm": 0.7522639632225037, "learning_rate": 9.989800183832616e-05, "loss": 1.0204, "step": 6370 }, { "epoch": 0.040760001533291595, "grad_norm": 0.9609561562538147, "learning_rate": 9.98976812494351e-05, "loss": 1.0157, "step": 6380 }, { "epoch": 0.040823888683030295, "grad_norm": 0.7092857956886292, "learning_rate": 9.989736015803123e-05, "loss": 0.9443, "step": 6390 }, { "epoch": 0.040887775832768994, "grad_norm": 4.257565498352051, "learning_rate": 9.989703856411776e-05, "loss": 1.134, "step": 6400 }, { "epoch": 0.0409516629825077, "grad_norm": 1.1755651235580444, "learning_rate": 9.989671646769796e-05, "loss": 1.1108, "step": 6410 }, { "epoch": 0.0410155501322464, "grad_norm": 0.8459087610244751, "learning_rate": 9.989639386877505e-05, "loss": 1.0194, "step": 6420 }, { "epoch": 0.0410794372819851, "grad_norm": 1.175000786781311, "learning_rate": 9.989607076735229e-05, "loss": 0.8072, "step": 6430 }, { "epoch": 0.041143324431723804, "grad_norm": 1.2269272804260254, "learning_rate": 9.989574716343294e-05, "loss": 1.1758, "step": 6440 }, { "epoch": 0.041207211581462504, "grad_norm": 0.7292816042900085, "learning_rate": 9.989542305702022e-05, "loss": 0.9037, "step": 6450 }, { "epoch": 0.0412710987312012, "grad_norm": 1.1013445854187012, "learning_rate": 9.989509844811745e-05, "loss": 0.7594, "step": 6460 }, { "epoch": 0.04133498588093991, "grad_norm": 1.5162911415100098, "learning_rate": 9.989477333672787e-05, "loss": 0.8458, "step": 6470 }, { "epoch": 0.04139887303067861, "grad_norm": 0.5727777481079102, "learning_rate": 9.989444772285475e-05, "loss": 1.0281, "step": 6480 }, { "epoch": 0.041462760180417314, "grad_norm": 0.940905749797821, "learning_rate": 9.989412160650137e-05, "loss": 0.8714, "step": 6490 }, { "epoch": 0.041526647330156014, "grad_norm": 1.0898019075393677, "learning_rate": 9.989379498767104e-05, "loss": 0.8905, "step": 6500 }, { "epoch": 0.04159053447989471, "grad_norm": 1.05965256690979, "learning_rate": 9.989346786636701e-05, "loss": 1.0419, "step": 6510 }, { "epoch": 0.04165442162963342, "grad_norm": 1.0670409202575684, "learning_rate": 9.989314024259262e-05, "loss": 0.7306, "step": 6520 }, { "epoch": 0.04171830877937212, "grad_norm": 0.9134021401405334, "learning_rate": 9.989281211635114e-05, "loss": 0.9002, "step": 6530 }, { "epoch": 0.04178219592911082, "grad_norm": 0.9163311719894409, "learning_rate": 9.989248348764586e-05, "loss": 0.9131, "step": 6540 }, { "epoch": 0.04184608307884952, "grad_norm": 0.6874496936798096, "learning_rate": 9.989215435648011e-05, "loss": 0.9497, "step": 6550 }, { "epoch": 0.04190997022858822, "grad_norm": 0.9504197239875793, "learning_rate": 9.989182472285721e-05, "loss": 1.06, "step": 6560 }, { "epoch": 0.04197385737832692, "grad_norm": 0.794982373714447, "learning_rate": 9.989149458678046e-05, "loss": 0.8137, "step": 6570 }, { "epoch": 0.04203774452806563, "grad_norm": 0.9030359983444214, "learning_rate": 9.989116394825322e-05, "loss": 0.7989, "step": 6580 }, { "epoch": 0.04210163167780433, "grad_norm": 0.7701511979103088, "learning_rate": 9.989083280727878e-05, "loss": 1.0566, "step": 6590 }, { "epoch": 0.042165518827543026, "grad_norm": 0.8130073547363281, "learning_rate": 9.98905011638605e-05, "loss": 0.9397, "step": 6600 }, { "epoch": 0.04222940597728173, "grad_norm": 0.6246233582496643, "learning_rate": 9.989016901800171e-05, "loss": 0.8776, "step": 6610 }, { "epoch": 0.04229329312702043, "grad_norm": 0.7861520648002625, "learning_rate": 9.988983636970576e-05, "loss": 1.0794, "step": 6620 }, { "epoch": 0.04235718027675913, "grad_norm": 1.3345977067947388, "learning_rate": 9.988950321897599e-05, "loss": 0.8676, "step": 6630 }, { "epoch": 0.04242106742649784, "grad_norm": 0.56337571144104, "learning_rate": 9.988916956581577e-05, "loss": 0.8426, "step": 6640 }, { "epoch": 0.042484954576236536, "grad_norm": 1.3534024953842163, "learning_rate": 9.988883541022844e-05, "loss": 0.7897, "step": 6650 }, { "epoch": 0.042548841725975235, "grad_norm": 1.3062078952789307, "learning_rate": 9.988850075221738e-05, "loss": 1.1495, "step": 6660 }, { "epoch": 0.04261272887571394, "grad_norm": 0.8563300967216492, "learning_rate": 9.988816559178597e-05, "loss": 0.7691, "step": 6670 }, { "epoch": 0.04267661602545264, "grad_norm": 0.6267048120498657, "learning_rate": 9.988782992893757e-05, "loss": 0.9558, "step": 6680 }, { "epoch": 0.04274050317519134, "grad_norm": 1.3723206520080566, "learning_rate": 9.988749376367556e-05, "loss": 0.9185, "step": 6690 }, { "epoch": 0.042804390324930046, "grad_norm": 1.9447133541107178, "learning_rate": 9.988715709600332e-05, "loss": 1.0383, "step": 6700 }, { "epoch": 0.042868277474668745, "grad_norm": 0.8852369785308838, "learning_rate": 9.988681992592426e-05, "loss": 0.8813, "step": 6710 }, { "epoch": 0.042932164624407444, "grad_norm": 2.174041986465454, "learning_rate": 9.988648225344177e-05, "loss": 0.9662, "step": 6720 }, { "epoch": 0.04299605177414615, "grad_norm": 1.9878665208816528, "learning_rate": 9.988614407855924e-05, "loss": 0.9924, "step": 6730 }, { "epoch": 0.04305993892388485, "grad_norm": 0.9836265444755554, "learning_rate": 9.988580540128008e-05, "loss": 1.2755, "step": 6740 }, { "epoch": 0.04312382607362355, "grad_norm": 0.999160647392273, "learning_rate": 9.98854662216077e-05, "loss": 0.9726, "step": 6750 }, { "epoch": 0.043187713223362255, "grad_norm": 1.9516860246658325, "learning_rate": 9.988512653954552e-05, "loss": 0.7816, "step": 6760 }, { "epoch": 0.043251600373100954, "grad_norm": 0.7745450735092163, "learning_rate": 9.988478635509696e-05, "loss": 0.7726, "step": 6770 }, { "epoch": 0.04331548752283965, "grad_norm": 0.8929428458213806, "learning_rate": 9.988444566826544e-05, "loss": 1.0001, "step": 6780 }, { "epoch": 0.04337937467257836, "grad_norm": 0.895820140838623, "learning_rate": 9.98841044790544e-05, "loss": 0.8765, "step": 6790 }, { "epoch": 0.04344326182231706, "grad_norm": 0.6711694598197937, "learning_rate": 9.988376278746727e-05, "loss": 0.9975, "step": 6800 }, { "epoch": 0.04350714897205576, "grad_norm": 0.9492961764335632, "learning_rate": 9.988342059350751e-05, "loss": 1.0356, "step": 6810 }, { "epoch": 0.043571036121794464, "grad_norm": 0.7187815308570862, "learning_rate": 9.988307789717853e-05, "loss": 0.8538, "step": 6820 }, { "epoch": 0.04363492327153316, "grad_norm": 0.9014946222305298, "learning_rate": 9.98827346984838e-05, "loss": 1.0214, "step": 6830 }, { "epoch": 0.04369881042127186, "grad_norm": 0.5608994960784912, "learning_rate": 9.98823909974268e-05, "loss": 0.8462, "step": 6840 }, { "epoch": 0.04376269757101057, "grad_norm": 0.8809041976928711, "learning_rate": 9.988204679401094e-05, "loss": 0.813, "step": 6850 }, { "epoch": 0.04382658472074927, "grad_norm": 0.7527191638946533, "learning_rate": 9.988170208823972e-05, "loss": 1.0194, "step": 6860 }, { "epoch": 0.04389047187048797, "grad_norm": 0.7817595601081848, "learning_rate": 9.988135688011662e-05, "loss": 0.8165, "step": 6870 }, { "epoch": 0.04395435902022667, "grad_norm": 0.8186140656471252, "learning_rate": 9.988101116964508e-05, "loss": 0.8789, "step": 6880 }, { "epoch": 0.04401824616996537, "grad_norm": 0.6612401008605957, "learning_rate": 9.988066495682863e-05, "loss": 0.8621, "step": 6890 }, { "epoch": 0.04408213331970407, "grad_norm": 0.8166273832321167, "learning_rate": 9.988031824167073e-05, "loss": 1.0722, "step": 6900 }, { "epoch": 0.04414602046944278, "grad_norm": 1.0065597295761108, "learning_rate": 9.987997102417486e-05, "loss": 1.082, "step": 6910 }, { "epoch": 0.04420990761918148, "grad_norm": 1.0010764598846436, "learning_rate": 9.987962330434452e-05, "loss": 0.8206, "step": 6920 }, { "epoch": 0.04427379476892018, "grad_norm": 0.7217119932174683, "learning_rate": 9.987927508218324e-05, "loss": 0.8516, "step": 6930 }, { "epoch": 0.04433768191865888, "grad_norm": 1.464766502380371, "learning_rate": 9.987892635769449e-05, "loss": 1.1353, "step": 6940 }, { "epoch": 0.04440156906839758, "grad_norm": 0.887629508972168, "learning_rate": 9.987857713088182e-05, "loss": 0.8636, "step": 6950 }, { "epoch": 0.04446545621813629, "grad_norm": 1.562030553817749, "learning_rate": 9.987822740174871e-05, "loss": 1.2412, "step": 6960 }, { "epoch": 0.044529343367874986, "grad_norm": 0.6418665647506714, "learning_rate": 9.987787717029871e-05, "loss": 1.1301, "step": 6970 }, { "epoch": 0.044593230517613686, "grad_norm": 0.7377752065658569, "learning_rate": 9.987752643653533e-05, "loss": 0.89, "step": 6980 }, { "epoch": 0.04465711766735239, "grad_norm": 0.709084689617157, "learning_rate": 9.987717520046211e-05, "loss": 0.9194, "step": 6990 }, { "epoch": 0.04472100481709109, "grad_norm": 0.7699615359306335, "learning_rate": 9.98768234620826e-05, "loss": 0.995, "step": 7000 }, { "epoch": 0.04478489196682979, "grad_norm": 0.8531057238578796, "learning_rate": 9.987647122140031e-05, "loss": 0.8096, "step": 7010 }, { "epoch": 0.044848779116568496, "grad_norm": 1.1459274291992188, "learning_rate": 9.987611847841883e-05, "loss": 0.9038, "step": 7020 }, { "epoch": 0.044912666266307195, "grad_norm": 0.966291606426239, "learning_rate": 9.987576523314167e-05, "loss": 0.9996, "step": 7030 }, { "epoch": 0.044976553416045895, "grad_norm": 1.0549588203430176, "learning_rate": 9.987541148557238e-05, "loss": 0.7135, "step": 7040 }, { "epoch": 0.0450404405657846, "grad_norm": 0.8475518226623535, "learning_rate": 9.987505723571458e-05, "loss": 0.7685, "step": 7050 }, { "epoch": 0.0451043277155233, "grad_norm": 0.8754829168319702, "learning_rate": 9.98747024835718e-05, "loss": 0.9184, "step": 7060 }, { "epoch": 0.045168214865262, "grad_norm": 0.8908385038375854, "learning_rate": 9.987434722914762e-05, "loss": 1.0456, "step": 7070 }, { "epoch": 0.045232102015000705, "grad_norm": 0.9609813094139099, "learning_rate": 9.987399147244562e-05, "loss": 1.1562, "step": 7080 }, { "epoch": 0.045295989164739404, "grad_norm": 0.681609034538269, "learning_rate": 9.987363521346937e-05, "loss": 0.8802, "step": 7090 }, { "epoch": 0.045359876314478104, "grad_norm": 0.6809660792350769, "learning_rate": 9.987327845222246e-05, "loss": 0.9104, "step": 7100 }, { "epoch": 0.04542376346421681, "grad_norm": 0.5972456932067871, "learning_rate": 9.98729211887085e-05, "loss": 0.9686, "step": 7110 }, { "epoch": 0.04548765061395551, "grad_norm": 2.145796537399292, "learning_rate": 9.987256342293108e-05, "loss": 0.8764, "step": 7120 }, { "epoch": 0.04555153776369421, "grad_norm": 1.2157313823699951, "learning_rate": 9.98722051548938e-05, "loss": 0.8955, "step": 7130 }, { "epoch": 0.045615424913432914, "grad_norm": 0.8759172558784485, "learning_rate": 9.987184638460026e-05, "loss": 0.8679, "step": 7140 }, { "epoch": 0.04567931206317161, "grad_norm": 1.0199391841888428, "learning_rate": 9.987148711205408e-05, "loss": 0.7592, "step": 7150 }, { "epoch": 0.04574319921291031, "grad_norm": 0.7216569185256958, "learning_rate": 9.98711273372589e-05, "loss": 0.7954, "step": 7160 }, { "epoch": 0.04580708636264902, "grad_norm": 1.0680534839630127, "learning_rate": 9.98707670602183e-05, "loss": 1.0779, "step": 7170 }, { "epoch": 0.04587097351238772, "grad_norm": 0.9365562796592712, "learning_rate": 9.987040628093594e-05, "loss": 1.0918, "step": 7180 }, { "epoch": 0.04593486066212642, "grad_norm": 1.0162864923477173, "learning_rate": 9.987004499941545e-05, "loss": 0.791, "step": 7190 }, { "epoch": 0.04599874781186512, "grad_norm": 0.9427816271781921, "learning_rate": 9.986968321566045e-05, "loss": 0.8263, "step": 7200 }, { "epoch": 0.04606263496160382, "grad_norm": 0.9530696868896484, "learning_rate": 9.98693209296746e-05, "loss": 1.0719, "step": 7210 }, { "epoch": 0.04612652211134252, "grad_norm": 0.687778890132904, "learning_rate": 9.986895814146156e-05, "loss": 0.8541, "step": 7220 }, { "epoch": 0.04619040926108123, "grad_norm": 0.8100598454475403, "learning_rate": 9.986859485102495e-05, "loss": 1.0194, "step": 7230 }, { "epoch": 0.04625429641081993, "grad_norm": 0.5516176819801331, "learning_rate": 9.986823105836847e-05, "loss": 0.8347, "step": 7240 }, { "epoch": 0.046318183560558626, "grad_norm": 0.8812345862388611, "learning_rate": 9.986786676349573e-05, "loss": 1.0472, "step": 7250 }, { "epoch": 0.04638207071029733, "grad_norm": 1.0025354623794556, "learning_rate": 9.986750196641047e-05, "loss": 1.0196, "step": 7260 }, { "epoch": 0.04644595786003603, "grad_norm": 1.2470890283584595, "learning_rate": 9.986713666711629e-05, "loss": 0.7237, "step": 7270 }, { "epoch": 0.04650984500977473, "grad_norm": 0.7719841599464417, "learning_rate": 9.986677086561691e-05, "loss": 0.9012, "step": 7280 }, { "epoch": 0.04657373215951344, "grad_norm": 0.5865141749382019, "learning_rate": 9.9866404561916e-05, "loss": 0.7885, "step": 7290 }, { "epoch": 0.046637619309252136, "grad_norm": 0.8722718954086304, "learning_rate": 9.986603775601728e-05, "loss": 0.9654, "step": 7300 }, { "epoch": 0.046701506458990835, "grad_norm": 0.9440786838531494, "learning_rate": 9.98656704479244e-05, "loss": 1.1144, "step": 7310 }, { "epoch": 0.04676539360872954, "grad_norm": 0.8505666851997375, "learning_rate": 9.986530263764108e-05, "loss": 0.9502, "step": 7320 }, { "epoch": 0.04682928075846824, "grad_norm": 0.7318026423454285, "learning_rate": 9.986493432517103e-05, "loss": 0.6851, "step": 7330 }, { "epoch": 0.04689316790820695, "grad_norm": 1.4378130435943604, "learning_rate": 9.986456551051795e-05, "loss": 0.8454, "step": 7340 }, { "epoch": 0.046957055057945646, "grad_norm": 0.9807822704315186, "learning_rate": 9.986419619368554e-05, "loss": 1.0638, "step": 7350 }, { "epoch": 0.047020942207684345, "grad_norm": 1.2284691333770752, "learning_rate": 9.986382637467757e-05, "loss": 0.9615, "step": 7360 }, { "epoch": 0.04708482935742305, "grad_norm": 0.7769535183906555, "learning_rate": 9.986345605349769e-05, "loss": 0.8708, "step": 7370 }, { "epoch": 0.04714871650716175, "grad_norm": 1.48138427734375, "learning_rate": 9.98630852301497e-05, "loss": 0.7993, "step": 7380 }, { "epoch": 0.04721260365690045, "grad_norm": 0.605939507484436, "learning_rate": 9.986271390463728e-05, "loss": 0.8898, "step": 7390 }, { "epoch": 0.047276490806639156, "grad_norm": 0.7884547710418701, "learning_rate": 9.986234207696421e-05, "loss": 0.9975, "step": 7400 }, { "epoch": 0.047340377956377855, "grad_norm": 0.9767579436302185, "learning_rate": 9.986196974713422e-05, "loss": 0.9493, "step": 7410 }, { "epoch": 0.047404265106116554, "grad_norm": 0.9091633558273315, "learning_rate": 9.986159691515105e-05, "loss": 0.7876, "step": 7420 }, { "epoch": 0.04746815225585526, "grad_norm": 0.6155557036399841, "learning_rate": 9.986122358101847e-05, "loss": 0.5978, "step": 7430 }, { "epoch": 0.04753203940559396, "grad_norm": 0.8261324763298035, "learning_rate": 9.986084974474024e-05, "loss": 0.9533, "step": 7440 }, { "epoch": 0.04759592655533266, "grad_norm": 0.5973717570304871, "learning_rate": 9.98604754063201e-05, "loss": 0.8045, "step": 7450 }, { "epoch": 0.047659813705071365, "grad_norm": 1.0176916122436523, "learning_rate": 9.986010056576184e-05, "loss": 1.0215, "step": 7460 }, { "epoch": 0.047723700854810064, "grad_norm": 0.5865172147750854, "learning_rate": 9.985972522306923e-05, "loss": 0.7648, "step": 7470 }, { "epoch": 0.04778758800454876, "grad_norm": 1.0286486148834229, "learning_rate": 9.985934937824605e-05, "loss": 0.8718, "step": 7480 }, { "epoch": 0.04785147515428747, "grad_norm": 1.0322641134262085, "learning_rate": 9.98589730312961e-05, "loss": 0.9538, "step": 7490 }, { "epoch": 0.04791536230402617, "grad_norm": 0.8804035782814026, "learning_rate": 9.985859618222316e-05, "loss": 0.7283, "step": 7500 }, { "epoch": 0.04797924945376487, "grad_norm": 0.7622368931770325, "learning_rate": 9.985821883103102e-05, "loss": 0.7618, "step": 7510 }, { "epoch": 0.048043136603503574, "grad_norm": 1.1401050090789795, "learning_rate": 9.985784097772347e-05, "loss": 1.0667, "step": 7520 }, { "epoch": 0.04810702375324227, "grad_norm": 0.6780824661254883, "learning_rate": 9.985746262230433e-05, "loss": 0.9327, "step": 7530 }, { "epoch": 0.04817091090298097, "grad_norm": 1.0564121007919312, "learning_rate": 9.985708376477743e-05, "loss": 0.857, "step": 7540 }, { "epoch": 0.04823479805271968, "grad_norm": 0.45248645544052124, "learning_rate": 9.985670440514654e-05, "loss": 0.7797, "step": 7550 }, { "epoch": 0.04829868520245838, "grad_norm": 0.9228289127349854, "learning_rate": 9.985632454341551e-05, "loss": 1.2661, "step": 7560 }, { "epoch": 0.04836257235219708, "grad_norm": 0.665448784828186, "learning_rate": 9.985594417958816e-05, "loss": 0.8736, "step": 7570 }, { "epoch": 0.04842645950193578, "grad_norm": 0.7093620896339417, "learning_rate": 9.985556331366832e-05, "loss": 0.9296, "step": 7580 }, { "epoch": 0.04849034665167448, "grad_norm": 1.1496485471725464, "learning_rate": 9.985518194565983e-05, "loss": 1.0429, "step": 7590 }, { "epoch": 0.04855423380141318, "grad_norm": 0.8305206298828125, "learning_rate": 9.985480007556653e-05, "loss": 0.9499, "step": 7600 }, { "epoch": 0.04861812095115189, "grad_norm": 0.8451396822929382, "learning_rate": 9.985441770339226e-05, "loss": 0.9502, "step": 7610 }, { "epoch": 0.048682008100890586, "grad_norm": 1.2433000802993774, "learning_rate": 9.985403482914087e-05, "loss": 0.6543, "step": 7620 }, { "epoch": 0.048745895250629286, "grad_norm": 0.8674241304397583, "learning_rate": 9.985365145281622e-05, "loss": 1.1627, "step": 7630 }, { "epoch": 0.04880978240036799, "grad_norm": 0.5980839133262634, "learning_rate": 9.985326757442217e-05, "loss": 1.1205, "step": 7640 }, { "epoch": 0.04887366955010669, "grad_norm": 1.4166803359985352, "learning_rate": 9.98528831939626e-05, "loss": 0.8682, "step": 7650 }, { "epoch": 0.04893755669984539, "grad_norm": 0.8415298461914062, "learning_rate": 9.985249831144135e-05, "loss": 0.9133, "step": 7660 }, { "epoch": 0.049001443849584096, "grad_norm": 1.0600535869598389, "learning_rate": 9.985211292686231e-05, "loss": 0.9593, "step": 7670 }, { "epoch": 0.049065330999322795, "grad_norm": 0.5692518353462219, "learning_rate": 9.985172704022939e-05, "loss": 1.1105, "step": 7680 }, { "epoch": 0.049129218149061495, "grad_norm": 1.1608545780181885, "learning_rate": 9.985134065154643e-05, "loss": 0.9287, "step": 7690 }, { "epoch": 0.0491931052988002, "grad_norm": 0.9091508984565735, "learning_rate": 9.985095376081734e-05, "loss": 0.8312, "step": 7700 }, { "epoch": 0.0492569924485389, "grad_norm": 0.8366988897323608, "learning_rate": 9.985056636804604e-05, "loss": 1.0451, "step": 7710 }, { "epoch": 0.0493208795982776, "grad_norm": 1.0978457927703857, "learning_rate": 9.98501784732364e-05, "loss": 0.8821, "step": 7720 }, { "epoch": 0.049384766748016305, "grad_norm": 1.7002284526824951, "learning_rate": 9.984979007639233e-05, "loss": 0.7092, "step": 7730 }, { "epoch": 0.049448653897755004, "grad_norm": 1.77642023563385, "learning_rate": 9.984940117751773e-05, "loss": 1.0623, "step": 7740 }, { "epoch": 0.04951254104749371, "grad_norm": 0.800308883190155, "learning_rate": 9.984901177661656e-05, "loss": 1.3445, "step": 7750 }, { "epoch": 0.04957642819723241, "grad_norm": 0.9408762454986572, "learning_rate": 9.98486218736927e-05, "loss": 0.8466, "step": 7760 }, { "epoch": 0.04964031534697111, "grad_norm": 0.7024977207183838, "learning_rate": 9.98482314687501e-05, "loss": 0.7186, "step": 7770 }, { "epoch": 0.049704202496709815, "grad_norm": 0.7420535087585449, "learning_rate": 9.98478405617927e-05, "loss": 1.0408, "step": 7780 }, { "epoch": 0.049768089646448514, "grad_norm": 1.0378546714782715, "learning_rate": 9.98474491528244e-05, "loss": 0.8885, "step": 7790 }, { "epoch": 0.04983197679618721, "grad_norm": 1.380505919456482, "learning_rate": 9.984705724184917e-05, "loss": 1.113, "step": 7800 }, { "epoch": 0.04989586394592592, "grad_norm": 1.8946232795715332, "learning_rate": 9.984666482887096e-05, "loss": 0.8355, "step": 7810 }, { "epoch": 0.04995975109566462, "grad_norm": 1.4878778457641602, "learning_rate": 9.98462719138937e-05, "loss": 0.992, "step": 7820 }, { "epoch": 0.05002363824540332, "grad_norm": 0.7730852365493774, "learning_rate": 9.984587849692136e-05, "loss": 0.7539, "step": 7830 }, { "epoch": 0.050087525395142024, "grad_norm": 0.83015376329422, "learning_rate": 9.984548457795791e-05, "loss": 0.8696, "step": 7840 }, { "epoch": 0.05015141254488072, "grad_norm": 0.7511310577392578, "learning_rate": 9.98450901570073e-05, "loss": 0.9013, "step": 7850 }, { "epoch": 0.05021529969461942, "grad_norm": 0.9059261679649353, "learning_rate": 9.984469523407349e-05, "loss": 0.8444, "step": 7860 }, { "epoch": 0.05027918684435813, "grad_norm": 0.9825949668884277, "learning_rate": 9.98442998091605e-05, "loss": 0.8864, "step": 7870 }, { "epoch": 0.05034307399409683, "grad_norm": 0.904929518699646, "learning_rate": 9.984390388227228e-05, "loss": 0.7628, "step": 7880 }, { "epoch": 0.05040696114383553, "grad_norm": 0.736785888671875, "learning_rate": 9.984350745341284e-05, "loss": 0.6913, "step": 7890 }, { "epoch": 0.05047084829357423, "grad_norm": 0.7877079248428345, "learning_rate": 9.984311052258615e-05, "loss": 1.2899, "step": 7900 }, { "epoch": 0.05053473544331293, "grad_norm": 3.8321728706359863, "learning_rate": 9.984271308979622e-05, "loss": 0.9465, "step": 7910 }, { "epoch": 0.05059862259305163, "grad_norm": 0.729813277721405, "learning_rate": 9.984231515504705e-05, "loss": 1.1176, "step": 7920 }, { "epoch": 0.05066250974279034, "grad_norm": 1.07712984085083, "learning_rate": 9.984191671834264e-05, "loss": 0.821, "step": 7930 }, { "epoch": 0.05072639689252904, "grad_norm": 0.6421816349029541, "learning_rate": 9.984151777968701e-05, "loss": 0.8634, "step": 7940 }, { "epoch": 0.050790284042267736, "grad_norm": 1.0871955156326294, "learning_rate": 9.984111833908419e-05, "loss": 0.9175, "step": 7950 }, { "epoch": 0.05085417119200644, "grad_norm": 0.9562147855758667, "learning_rate": 9.984071839653817e-05, "loss": 0.8648, "step": 7960 }, { "epoch": 0.05091805834174514, "grad_norm": 0.8465697169303894, "learning_rate": 9.9840317952053e-05, "loss": 0.7018, "step": 7970 }, { "epoch": 0.05098194549148384, "grad_norm": 0.4053485095500946, "learning_rate": 9.983991700563273e-05, "loss": 0.8683, "step": 7980 }, { "epoch": 0.05104583264122255, "grad_norm": 0.7025613188743591, "learning_rate": 9.983951555728135e-05, "loss": 0.9431, "step": 7990 }, { "epoch": 0.051109719790961246, "grad_norm": 0.7401816248893738, "learning_rate": 9.983911360700296e-05, "loss": 1.1364, "step": 8000 }, { "epoch": 0.051173606940699945, "grad_norm": 0.41972461342811584, "learning_rate": 9.983871115480155e-05, "loss": 0.9925, "step": 8010 }, { "epoch": 0.05123749409043865, "grad_norm": 0.577347457408905, "learning_rate": 9.983830820068123e-05, "loss": 0.7687, "step": 8020 }, { "epoch": 0.05130138124017735, "grad_norm": 0.8155549764633179, "learning_rate": 9.983790474464601e-05, "loss": 0.9115, "step": 8030 }, { "epoch": 0.05136526838991605, "grad_norm": 0.9730279445648193, "learning_rate": 9.983750078669998e-05, "loss": 1.1313, "step": 8040 }, { "epoch": 0.051429155539654756, "grad_norm": 0.8205385208129883, "learning_rate": 9.98370963268472e-05, "loss": 0.9971, "step": 8050 }, { "epoch": 0.051493042689393455, "grad_norm": 0.5464890599250793, "learning_rate": 9.983669136509175e-05, "loss": 0.7868, "step": 8060 }, { "epoch": 0.051556929839132154, "grad_norm": 1.3623446226119995, "learning_rate": 9.98362859014377e-05, "loss": 0.9343, "step": 8070 }, { "epoch": 0.05162081698887086, "grad_norm": 0.8901773691177368, "learning_rate": 9.983587993588914e-05, "loss": 0.7135, "step": 8080 }, { "epoch": 0.05168470413860956, "grad_norm": 0.7160339951515198, "learning_rate": 9.983547346845015e-05, "loss": 1.2925, "step": 8090 }, { "epoch": 0.05174859128834826, "grad_norm": 0.6623441576957703, "learning_rate": 9.983506649912482e-05, "loss": 0.923, "step": 8100 }, { "epoch": 0.051812478438086965, "grad_norm": 0.469149112701416, "learning_rate": 9.983465902791726e-05, "loss": 0.94, "step": 8110 }, { "epoch": 0.051876365587825664, "grad_norm": 0.5665640234947205, "learning_rate": 9.98342510548316e-05, "loss": 1.0792, "step": 8120 }, { "epoch": 0.05194025273756436, "grad_norm": 1.4578264951705933, "learning_rate": 9.983384257987189e-05, "loss": 1.0587, "step": 8130 }, { "epoch": 0.05200413988730307, "grad_norm": 0.8157141804695129, "learning_rate": 9.983343360304227e-05, "loss": 1.2347, "step": 8140 }, { "epoch": 0.05206802703704177, "grad_norm": 0.9772050976753235, "learning_rate": 9.983302412434688e-05, "loss": 1.1827, "step": 8150 }, { "epoch": 0.05213191418678047, "grad_norm": 1.2389028072357178, "learning_rate": 9.983261414378982e-05, "loss": 0.8998, "step": 8160 }, { "epoch": 0.052195801336519174, "grad_norm": 1.1656652688980103, "learning_rate": 9.983220366137522e-05, "loss": 0.9351, "step": 8170 }, { "epoch": 0.05225968848625787, "grad_norm": 1.1599071025848389, "learning_rate": 9.983179267710721e-05, "loss": 0.8263, "step": 8180 }, { "epoch": 0.05232357563599658, "grad_norm": 1.0255035161972046, "learning_rate": 9.983138119098993e-05, "loss": 1.1271, "step": 8190 }, { "epoch": 0.05238746278573528, "grad_norm": 0.7129418849945068, "learning_rate": 9.983096920302755e-05, "loss": 0.7378, "step": 8200 }, { "epoch": 0.05245134993547398, "grad_norm": 1.268712043762207, "learning_rate": 9.983055671322421e-05, "loss": 0.7541, "step": 8210 }, { "epoch": 0.05251523708521268, "grad_norm": 0.8625295758247375, "learning_rate": 9.983014372158403e-05, "loss": 0.7441, "step": 8220 }, { "epoch": 0.05257912423495138, "grad_norm": 3.3473124504089355, "learning_rate": 9.982973022811122e-05, "loss": 1.0331, "step": 8230 }, { "epoch": 0.05264301138469008, "grad_norm": 1.0163989067077637, "learning_rate": 9.982931623280989e-05, "loss": 0.7206, "step": 8240 }, { "epoch": 0.05270689853442879, "grad_norm": 0.6889442205429077, "learning_rate": 9.982890173568426e-05, "loss": 0.7788, "step": 8250 }, { "epoch": 0.05277078568416749, "grad_norm": 0.7175592184066772, "learning_rate": 9.982848673673846e-05, "loss": 0.847, "step": 8260 }, { "epoch": 0.052834672833906186, "grad_norm": 1.019832730293274, "learning_rate": 9.98280712359767e-05, "loss": 0.8542, "step": 8270 }, { "epoch": 0.05289855998364489, "grad_norm": 0.9718403220176697, "learning_rate": 9.982765523340316e-05, "loss": 1.0609, "step": 8280 }, { "epoch": 0.05296244713338359, "grad_norm": 2.7732856273651123, "learning_rate": 9.982723872902202e-05, "loss": 0.9938, "step": 8290 }, { "epoch": 0.05302633428312229, "grad_norm": 0.6997831463813782, "learning_rate": 9.982682172283748e-05, "loss": 0.7695, "step": 8300 }, { "epoch": 0.053090221432861, "grad_norm": 1.2381385564804077, "learning_rate": 9.982640421485374e-05, "loss": 0.8043, "step": 8310 }, { "epoch": 0.053154108582599696, "grad_norm": 1.2460087537765503, "learning_rate": 9.9825986205075e-05, "loss": 0.9461, "step": 8320 }, { "epoch": 0.053217995732338395, "grad_norm": 0.7866740822792053, "learning_rate": 9.982556769350549e-05, "loss": 0.88, "step": 8330 }, { "epoch": 0.0532818828820771, "grad_norm": 1.1013973951339722, "learning_rate": 9.982514868014938e-05, "loss": 0.7032, "step": 8340 }, { "epoch": 0.0533457700318158, "grad_norm": 0.7456531524658203, "learning_rate": 9.982472916501093e-05, "loss": 0.8763, "step": 8350 }, { "epoch": 0.0534096571815545, "grad_norm": 0.6022664904594421, "learning_rate": 9.982430914809437e-05, "loss": 1.0766, "step": 8360 }, { "epoch": 0.053473544331293206, "grad_norm": 0.6867753267288208, "learning_rate": 9.982388862940389e-05, "loss": 0.8823, "step": 8370 }, { "epoch": 0.053537431481031905, "grad_norm": 1.045599102973938, "learning_rate": 9.982346760894375e-05, "loss": 1.0784, "step": 8380 }, { "epoch": 0.053601318630770604, "grad_norm": 1.3521573543548584, "learning_rate": 9.982304608671819e-05, "loss": 1.1522, "step": 8390 }, { "epoch": 0.05366520578050931, "grad_norm": 0.6618836522102356, "learning_rate": 9.982262406273146e-05, "loss": 0.863, "step": 8400 }, { "epoch": 0.05372909293024801, "grad_norm": 0.6689035892486572, "learning_rate": 9.98222015369878e-05, "loss": 0.9005, "step": 8410 }, { "epoch": 0.05379298007998671, "grad_norm": 1.0590460300445557, "learning_rate": 9.982177850949147e-05, "loss": 1.0022, "step": 8420 }, { "epoch": 0.053856867229725415, "grad_norm": 0.6324277520179749, "learning_rate": 9.982135498024673e-05, "loss": 0.7492, "step": 8430 }, { "epoch": 0.053920754379464114, "grad_norm": 0.5392162203788757, "learning_rate": 9.982093094925784e-05, "loss": 0.991, "step": 8440 }, { "epoch": 0.05398464152920281, "grad_norm": 0.6738571524620056, "learning_rate": 9.982050641652908e-05, "loss": 1.0112, "step": 8450 }, { "epoch": 0.05404852867894152, "grad_norm": 0.8277943730354309, "learning_rate": 9.98200813820647e-05, "loss": 0.6247, "step": 8460 }, { "epoch": 0.05411241582868022, "grad_norm": 1.3968684673309326, "learning_rate": 9.981965584586901e-05, "loss": 1.0051, "step": 8470 }, { "epoch": 0.05417630297841892, "grad_norm": 1.391640543937683, "learning_rate": 9.981922980794629e-05, "loss": 0.9332, "step": 8480 }, { "epoch": 0.054240190128157624, "grad_norm": 2.0874507427215576, "learning_rate": 9.981880326830083e-05, "loss": 1.135, "step": 8490 }, { "epoch": 0.05430407727789632, "grad_norm": 1.9418469667434692, "learning_rate": 9.981837622693692e-05, "loss": 0.8689, "step": 8500 }, { "epoch": 0.05436796442763502, "grad_norm": 0.9285494089126587, "learning_rate": 9.981794868385886e-05, "loss": 0.8521, "step": 8510 }, { "epoch": 0.05443185157737373, "grad_norm": 1.062789797782898, "learning_rate": 9.981752063907096e-05, "loss": 1.0655, "step": 8520 }, { "epoch": 0.05449573872711243, "grad_norm": 0.6997897624969482, "learning_rate": 9.981709209257752e-05, "loss": 0.9636, "step": 8530 }, { "epoch": 0.05455962587685113, "grad_norm": 0.8409900665283203, "learning_rate": 9.981666304438286e-05, "loss": 0.9073, "step": 8540 }, { "epoch": 0.05462351302658983, "grad_norm": 0.7529276013374329, "learning_rate": 9.981623349449131e-05, "loss": 0.695, "step": 8550 }, { "epoch": 0.05468740017632853, "grad_norm": 0.6798946261405945, "learning_rate": 9.981580344290722e-05, "loss": 0.9083, "step": 8560 }, { "epoch": 0.05475128732606723, "grad_norm": 0.537013828754425, "learning_rate": 9.981537288963487e-05, "loss": 0.9872, "step": 8570 }, { "epoch": 0.05481517447580594, "grad_norm": 0.9144914150238037, "learning_rate": 9.981494183467861e-05, "loss": 0.9987, "step": 8580 }, { "epoch": 0.05487906162554464, "grad_norm": 1.6605632305145264, "learning_rate": 9.98145102780428e-05, "loss": 0.9811, "step": 8590 }, { "epoch": 0.05494294877528334, "grad_norm": 0.8611153960227966, "learning_rate": 9.981407821973176e-05, "loss": 1.0801, "step": 8600 }, { "epoch": 0.05500683592502204, "grad_norm": 0.9995184540748596, "learning_rate": 9.981364565974988e-05, "loss": 0.9886, "step": 8610 }, { "epoch": 0.05507072307476074, "grad_norm": 1.9788289070129395, "learning_rate": 9.981321259810149e-05, "loss": 0.8339, "step": 8620 }, { "epoch": 0.05513461022449945, "grad_norm": 0.6516178250312805, "learning_rate": 9.981277903479095e-05, "loss": 0.87, "step": 8630 }, { "epoch": 0.05519849737423815, "grad_norm": 0.6122477054595947, "learning_rate": 9.981234496982262e-05, "loss": 0.9143, "step": 8640 }, { "epoch": 0.055262384523976846, "grad_norm": 0.6674822568893433, "learning_rate": 9.98119104032009e-05, "loss": 0.9851, "step": 8650 }, { "epoch": 0.05532627167371555, "grad_norm": 0.7896667122840881, "learning_rate": 9.981147533493013e-05, "loss": 0.9507, "step": 8660 }, { "epoch": 0.05539015882345425, "grad_norm": 0.5288309454917908, "learning_rate": 9.981103976501474e-05, "loss": 0.8592, "step": 8670 }, { "epoch": 0.05545404597319295, "grad_norm": 1.2801772356033325, "learning_rate": 9.981060369345905e-05, "loss": 0.799, "step": 8680 }, { "epoch": 0.055517933122931656, "grad_norm": 1.178462266921997, "learning_rate": 9.981016712026752e-05, "loss": 0.8998, "step": 8690 }, { "epoch": 0.055581820272670356, "grad_norm": 0.5843381285667419, "learning_rate": 9.98097300454445e-05, "loss": 0.9922, "step": 8700 }, { "epoch": 0.055645707422409055, "grad_norm": 1.011044979095459, "learning_rate": 9.980929246899441e-05, "loss": 0.8379, "step": 8710 }, { "epoch": 0.05570959457214776, "grad_norm": 0.9214301109313965, "learning_rate": 9.980885439092165e-05, "loss": 0.6383, "step": 8720 }, { "epoch": 0.05577348172188646, "grad_norm": 0.6694325804710388, "learning_rate": 9.980841581123064e-05, "loss": 1.1735, "step": 8730 }, { "epoch": 0.05583736887162516, "grad_norm": 0.6404210329055786, "learning_rate": 9.98079767299258e-05, "loss": 0.7791, "step": 8740 }, { "epoch": 0.055901256021363865, "grad_norm": 1.0678333044052124, "learning_rate": 9.980753714701152e-05, "loss": 0.8481, "step": 8750 }, { "epoch": 0.055965143171102565, "grad_norm": 2.0661401748657227, "learning_rate": 9.980709706249227e-05, "loss": 1.0899, "step": 8760 }, { "epoch": 0.056029030320841264, "grad_norm": 1.161922812461853, "learning_rate": 9.980665647637246e-05, "loss": 0.7383, "step": 8770 }, { "epoch": 0.05609291747057997, "grad_norm": 0.5117482542991638, "learning_rate": 9.980621538865654e-05, "loss": 0.9479, "step": 8780 }, { "epoch": 0.05615680462031867, "grad_norm": 1.124691367149353, "learning_rate": 9.980581798085118e-05, "loss": 1.0286, "step": 8790 }, { "epoch": 0.05622069177005737, "grad_norm": 0.9648489952087402, "learning_rate": 9.980537594011486e-05, "loss": 0.7825, "step": 8800 }, { "epoch": 0.056284578919796074, "grad_norm": 0.9334906339645386, "learning_rate": 9.980493339779533e-05, "loss": 0.8359, "step": 8810 }, { "epoch": 0.056348466069534774, "grad_norm": 0.8304029107093811, "learning_rate": 9.980449035389702e-05, "loss": 0.7827, "step": 8820 }, { "epoch": 0.05641235321927347, "grad_norm": 0.8497231006622314, "learning_rate": 9.980404680842441e-05, "loss": 1.0369, "step": 8830 }, { "epoch": 0.05647624036901218, "grad_norm": 0.7895182371139526, "learning_rate": 9.980360276138196e-05, "loss": 0.8317, "step": 8840 }, { "epoch": 0.05654012751875088, "grad_norm": 2.521169900894165, "learning_rate": 9.980315821277415e-05, "loss": 1.1953, "step": 8850 }, { "epoch": 0.05660401466848958, "grad_norm": 0.8677668571472168, "learning_rate": 9.980271316260544e-05, "loss": 0.6768, "step": 8860 }, { "epoch": 0.05666790181822828, "grad_norm": 0.6117026805877686, "learning_rate": 9.980226761088033e-05, "loss": 0.8991, "step": 8870 }, { "epoch": 0.05673178896796698, "grad_norm": 0.5636959075927734, "learning_rate": 9.98018215576033e-05, "loss": 0.9742, "step": 8880 }, { "epoch": 0.05679567611770568, "grad_norm": 1.0202407836914062, "learning_rate": 9.980137500277885e-05, "loss": 0.8069, "step": 8890 }, { "epoch": 0.05685956326744439, "grad_norm": 0.7063365578651428, "learning_rate": 9.980092794641144e-05, "loss": 0.7919, "step": 8900 }, { "epoch": 0.05692345041718309, "grad_norm": 0.6419750452041626, "learning_rate": 9.980048038850564e-05, "loss": 1.0765, "step": 8910 }, { "epoch": 0.056987337566921786, "grad_norm": 0.8232806921005249, "learning_rate": 9.98000323290659e-05, "loss": 0.9938, "step": 8920 }, { "epoch": 0.05705122471666049, "grad_norm": 0.846300482749939, "learning_rate": 9.979958376809675e-05, "loss": 0.9364, "step": 8930 }, { "epoch": 0.05711511186639919, "grad_norm": 0.9861621856689453, "learning_rate": 9.979913470560271e-05, "loss": 0.9568, "step": 8940 }, { "epoch": 0.05717899901613789, "grad_norm": 1.035204529762268, "learning_rate": 9.97986851415883e-05, "loss": 1.1155, "step": 8950 }, { "epoch": 0.0572428861658766, "grad_norm": 0.901535153388977, "learning_rate": 9.979823507605806e-05, "loss": 0.9014, "step": 8960 }, { "epoch": 0.057306773315615296, "grad_norm": 0.7851259708404541, "learning_rate": 9.97977845090165e-05, "loss": 0.9278, "step": 8970 }, { "epoch": 0.057370660465353995, "grad_norm": 0.8578255772590637, "learning_rate": 9.979733344046818e-05, "loss": 1.0668, "step": 8980 }, { "epoch": 0.0574345476150927, "grad_norm": 0.5631706714630127, "learning_rate": 9.979688187041761e-05, "loss": 0.7958, "step": 8990 }, { "epoch": 0.0574984347648314, "grad_norm": 0.9356205463409424, "learning_rate": 9.979642979886938e-05, "loss": 0.9709, "step": 9000 }, { "epoch": 0.05756232191457011, "grad_norm": 1.1016316413879395, "learning_rate": 9.979597722582801e-05, "loss": 1.0941, "step": 9010 }, { "epoch": 0.057626209064308806, "grad_norm": 0.7269836068153381, "learning_rate": 9.979552415129806e-05, "loss": 0.8328, "step": 9020 }, { "epoch": 0.057690096214047505, "grad_norm": 1.0700838565826416, "learning_rate": 9.979507057528412e-05, "loss": 1.0288, "step": 9030 }, { "epoch": 0.05775398336378621, "grad_norm": 0.9521405100822449, "learning_rate": 9.979461649779074e-05, "loss": 0.8238, "step": 9040 }, { "epoch": 0.05781787051352491, "grad_norm": 1.3190817832946777, "learning_rate": 9.97941619188225e-05, "loss": 0.9142, "step": 9050 }, { "epoch": 0.05788175766326361, "grad_norm": 0.7254020571708679, "learning_rate": 9.979370683838396e-05, "loss": 0.952, "step": 9060 }, { "epoch": 0.057945644813002316, "grad_norm": 0.6510186195373535, "learning_rate": 9.979325125647972e-05, "loss": 0.9684, "step": 9070 }, { "epoch": 0.058009531962741015, "grad_norm": 0.8847187757492065, "learning_rate": 9.979279517311435e-05, "loss": 0.867, "step": 9080 }, { "epoch": 0.058073419112479714, "grad_norm": 1.1535148620605469, "learning_rate": 9.979233858829246e-05, "loss": 1.1381, "step": 9090 }, { "epoch": 0.05813730626221842, "grad_norm": 0.8919582366943359, "learning_rate": 9.979188150201866e-05, "loss": 0.9911, "step": 9100 }, { "epoch": 0.05820119341195712, "grad_norm": 0.8089918494224548, "learning_rate": 9.979142391429753e-05, "loss": 1.0435, "step": 9110 }, { "epoch": 0.05826508056169582, "grad_norm": 1.1607420444488525, "learning_rate": 9.979096582513366e-05, "loss": 0.8656, "step": 9120 }, { "epoch": 0.058328967711434525, "grad_norm": 0.8984375596046448, "learning_rate": 9.979050723453171e-05, "loss": 0.7627, "step": 9130 }, { "epoch": 0.058392854861173224, "grad_norm": 1.1916580200195312, "learning_rate": 9.979004814249629e-05, "loss": 0.8041, "step": 9140 }, { "epoch": 0.05845674201091192, "grad_norm": 1.0592631101608276, "learning_rate": 9.978958854903198e-05, "loss": 0.8423, "step": 9150 }, { "epoch": 0.05852062916065063, "grad_norm": 0.8369486331939697, "learning_rate": 9.978912845414347e-05, "loss": 0.7743, "step": 9160 }, { "epoch": 0.05858451631038933, "grad_norm": 0.8720589280128479, "learning_rate": 9.978866785783533e-05, "loss": 0.8537, "step": 9170 }, { "epoch": 0.05864840346012803, "grad_norm": 1.644795298576355, "learning_rate": 9.978820676011227e-05, "loss": 0.7972, "step": 9180 }, { "epoch": 0.058712290609866734, "grad_norm": 2.7408289909362793, "learning_rate": 9.978774516097886e-05, "loss": 1.3147, "step": 9190 }, { "epoch": 0.05877617775960543, "grad_norm": 0.5846157670021057, "learning_rate": 9.97872830604398e-05, "loss": 0.948, "step": 9200 }, { "epoch": 0.05884006490934413, "grad_norm": 0.5981711149215698, "learning_rate": 9.978682045849975e-05, "loss": 0.9317, "step": 9210 }, { "epoch": 0.05890395205908284, "grad_norm": 0.8747972249984741, "learning_rate": 9.97863573551633e-05, "loss": 1.0936, "step": 9220 }, { "epoch": 0.05896783920882154, "grad_norm": 0.8239877223968506, "learning_rate": 9.978589375043519e-05, "loss": 0.9512, "step": 9230 }, { "epoch": 0.05903172635856024, "grad_norm": 0.48801517486572266, "learning_rate": 9.978542964432005e-05, "loss": 0.8562, "step": 9240 }, { "epoch": 0.05909561350829894, "grad_norm": 2.152211904525757, "learning_rate": 9.978496503682258e-05, "loss": 1.0353, "step": 9250 }, { "epoch": 0.05915950065803764, "grad_norm": 0.9108942747116089, "learning_rate": 9.978449992794742e-05, "loss": 0.9188, "step": 9260 }, { "epoch": 0.05922338780777634, "grad_norm": 0.41772526502609253, "learning_rate": 9.978403431769927e-05, "loss": 0.8177, "step": 9270 }, { "epoch": 0.05928727495751505, "grad_norm": 0.5694353580474854, "learning_rate": 9.978356820608284e-05, "loss": 0.8956, "step": 9280 }, { "epoch": 0.059351162107253747, "grad_norm": 0.620496928691864, "learning_rate": 9.978310159310282e-05, "loss": 0.631, "step": 9290 }, { "epoch": 0.059415049256992446, "grad_norm": 3.017289638519287, "learning_rate": 9.978263447876388e-05, "loss": 0.7887, "step": 9300 }, { "epoch": 0.05947893640673115, "grad_norm": 1.065492868423462, "learning_rate": 9.978216686307075e-05, "loss": 0.8404, "step": 9310 }, { "epoch": 0.05954282355646985, "grad_norm": 0.5826980471611023, "learning_rate": 9.978169874602813e-05, "loss": 0.8956, "step": 9320 }, { "epoch": 0.05960671070620855, "grad_norm": 0.9797850847244263, "learning_rate": 9.978123012764074e-05, "loss": 1.0606, "step": 9330 }, { "epoch": 0.059670597855947256, "grad_norm": 0.6139065027236938, "learning_rate": 9.97807610079133e-05, "loss": 0.9635, "step": 9340 }, { "epoch": 0.059734485005685956, "grad_norm": 0.7059098482131958, "learning_rate": 9.978029138685052e-05, "loss": 0.861, "step": 9350 }, { "epoch": 0.059798372155424655, "grad_norm": 0.8045505285263062, "learning_rate": 9.977982126445712e-05, "loss": 0.923, "step": 9360 }, { "epoch": 0.05986225930516336, "grad_norm": NaN, "learning_rate": 9.977939772566934e-05, "loss": 1.1034, "step": 9370 }, { "epoch": 0.05992614645490206, "grad_norm": 0.8647670745849609, "learning_rate": 9.977892665076088e-05, "loss": 0.9245, "step": 9380 }, { "epoch": 0.05999003360464076, "grad_norm": 1.1494901180267334, "learning_rate": 9.977845507453554e-05, "loss": 0.9954, "step": 9390 }, { "epoch": 0.060053920754379465, "grad_norm": 0.6723154783248901, "learning_rate": 9.977798299699811e-05, "loss": 0.7105, "step": 9400 }, { "epoch": 0.060117807904118165, "grad_norm": 1.0932044982910156, "learning_rate": 9.977751041815333e-05, "loss": 0.994, "step": 9410 }, { "epoch": 0.06018169505385687, "grad_norm": 1.036632776260376, "learning_rate": 9.977703733800594e-05, "loss": 0.9975, "step": 9420 }, { "epoch": 0.06024558220359557, "grad_norm": 0.9420500993728638, "learning_rate": 9.977656375656072e-05, "loss": 0.8054, "step": 9430 }, { "epoch": 0.06030946935333427, "grad_norm": 0.8750972747802734, "learning_rate": 9.977608967382246e-05, "loss": 1.233, "step": 9440 }, { "epoch": 0.060373356503072975, "grad_norm": 2.4813504219055176, "learning_rate": 9.977561508979591e-05, "loss": 1.0237, "step": 9450 }, { "epoch": 0.060437243652811674, "grad_norm": 0.8663612604141235, "learning_rate": 9.977514000448584e-05, "loss": 0.9739, "step": 9460 }, { "epoch": 0.060501130802550374, "grad_norm": 0.5622289776802063, "learning_rate": 9.977466441789707e-05, "loss": 0.6195, "step": 9470 }, { "epoch": 0.06056501795228908, "grad_norm": 0.7465640902519226, "learning_rate": 9.977418833003436e-05, "loss": 0.6977, "step": 9480 }, { "epoch": 0.06062890510202778, "grad_norm": 0.8643200993537903, "learning_rate": 9.97737117409025e-05, "loss": 1.1056, "step": 9490 }, { "epoch": 0.06069279225176648, "grad_norm": 0.8004162311553955, "learning_rate": 9.977323465050631e-05, "loss": 0.8349, "step": 9500 }, { "epoch": 0.060756679401505184, "grad_norm": 0.7937789559364319, "learning_rate": 9.977275705885058e-05, "loss": 1.0755, "step": 9510 }, { "epoch": 0.06082056655124388, "grad_norm": 0.7888356447219849, "learning_rate": 9.977227896594014e-05, "loss": 1.242, "step": 9520 }, { "epoch": 0.06088445370098258, "grad_norm": 0.6252095103263855, "learning_rate": 9.977180037177979e-05, "loss": 1.1968, "step": 9530 }, { "epoch": 0.06094834085072129, "grad_norm": 0.6318356990814209, "learning_rate": 9.977132127637434e-05, "loss": 1.0921, "step": 9540 }, { "epoch": 0.06101222800045999, "grad_norm": 0.6336533427238464, "learning_rate": 9.977084167972863e-05, "loss": 0.7744, "step": 9550 }, { "epoch": 0.06107611515019869, "grad_norm": 0.7241688966751099, "learning_rate": 9.97703615818475e-05, "loss": 0.7843, "step": 9560 }, { "epoch": 0.06114000229993739, "grad_norm": 1.5715322494506836, "learning_rate": 9.976988098273576e-05, "loss": 1.1104, "step": 9570 }, { "epoch": 0.06120388944967609, "grad_norm": 0.5444793105125427, "learning_rate": 9.976939988239826e-05, "loss": 0.9894, "step": 9580 }, { "epoch": 0.06126777659941479, "grad_norm": 0.785284698009491, "learning_rate": 9.976891828083985e-05, "loss": 0.9782, "step": 9590 }, { "epoch": 0.0613316637491535, "grad_norm": 1.1315600872039795, "learning_rate": 9.976843617806538e-05, "loss": 0.9443, "step": 9600 }, { "epoch": 0.0613955508988922, "grad_norm": 5.169201850891113, "learning_rate": 9.97679535740797e-05, "loss": 0.9371, "step": 9610 }, { "epoch": 0.061459438048630896, "grad_norm": 0.6818580031394958, "learning_rate": 9.976747046888767e-05, "loss": 0.8102, "step": 9620 }, { "epoch": 0.0615233251983696, "grad_norm": 0.8099622130393982, "learning_rate": 9.976698686249416e-05, "loss": 1.0892, "step": 9630 }, { "epoch": 0.0615872123481083, "grad_norm": 0.8413625955581665, "learning_rate": 9.976650275490404e-05, "loss": 0.9822, "step": 9640 }, { "epoch": 0.061651099497847, "grad_norm": 1.4564588069915771, "learning_rate": 9.976601814612217e-05, "loss": 1.1034, "step": 9650 }, { "epoch": 0.06171498664758571, "grad_norm": 0.897906482219696, "learning_rate": 9.976553303615346e-05, "loss": 0.9956, "step": 9660 }, { "epoch": 0.061778873797324406, "grad_norm": 0.5349118113517761, "learning_rate": 9.976504742500277e-05, "loss": 0.9361, "step": 9670 }, { "epoch": 0.061842760947063105, "grad_norm": 0.8950748443603516, "learning_rate": 9.9764561312675e-05, "loss": 0.8486, "step": 9680 }, { "epoch": 0.06190664809680181, "grad_norm": 1.0401899814605713, "learning_rate": 9.976407469917504e-05, "loss": 0.855, "step": 9690 }, { "epoch": 0.06197053524654051, "grad_norm": 1.0119845867156982, "learning_rate": 9.976358758450781e-05, "loss": 0.8117, "step": 9700 }, { "epoch": 0.06203442239627921, "grad_norm": 1.028308629989624, "learning_rate": 9.976309996867819e-05, "loss": 1.0832, "step": 9710 }, { "epoch": 0.062098309546017916, "grad_norm": 0.6654931902885437, "learning_rate": 9.976261185169111e-05, "loss": 0.9543, "step": 9720 }, { "epoch": 0.062162196695756615, "grad_norm": 0.7170969843864441, "learning_rate": 9.976212323355148e-05, "loss": 0.7589, "step": 9730 }, { "epoch": 0.062226083845495314, "grad_norm": 0.7951648831367493, "learning_rate": 9.97616341142642e-05, "loss": 0.7643, "step": 9740 }, { "epoch": 0.06228997099523402, "grad_norm": 0.8642029166221619, "learning_rate": 9.976114449383422e-05, "loss": 0.7792, "step": 9750 }, { "epoch": 0.06235385814497272, "grad_norm": 0.7159494757652283, "learning_rate": 9.976065437226648e-05, "loss": 0.7695, "step": 9760 }, { "epoch": 0.06241774529471142, "grad_norm": 0.8568373918533325, "learning_rate": 9.976016374956589e-05, "loss": 0.9835, "step": 9770 }, { "epoch": 0.062481632444450125, "grad_norm": 0.7960609793663025, "learning_rate": 9.97596726257374e-05, "loss": 0.7966, "step": 9780 }, { "epoch": 0.06254551959418883, "grad_norm": 0.9307446479797363, "learning_rate": 9.975918100078598e-05, "loss": 0.9899, "step": 9790 }, { "epoch": 0.06260940674392752, "grad_norm": 0.6713595986366272, "learning_rate": 9.975868887471654e-05, "loss": 0.9225, "step": 9800 }, { "epoch": 0.06267329389366623, "grad_norm": 0.8115236163139343, "learning_rate": 9.975819624753405e-05, "loss": 1.0275, "step": 9810 }, { "epoch": 0.06273718104340494, "grad_norm": 1.7901041507720947, "learning_rate": 9.975770311924348e-05, "loss": 1.0027, "step": 9820 }, { "epoch": 0.06280106819314363, "grad_norm": 1.7000713348388672, "learning_rate": 9.975720948984981e-05, "loss": 0.7821, "step": 9830 }, { "epoch": 0.06286495534288233, "grad_norm": 0.7414657473564148, "learning_rate": 9.975671535935797e-05, "loss": 1.1558, "step": 9840 }, { "epoch": 0.06292884249262104, "grad_norm": 1.6867907047271729, "learning_rate": 9.975622072777299e-05, "loss": 0.8346, "step": 9850 }, { "epoch": 0.06299272964235973, "grad_norm": 0.8193734884262085, "learning_rate": 9.97557255950998e-05, "loss": 1.0878, "step": 9860 }, { "epoch": 0.06305661679209844, "grad_norm": 0.8677065968513489, "learning_rate": 9.975522996134341e-05, "loss": 1.0119, "step": 9870 }, { "epoch": 0.06312050394183714, "grad_norm": 0.9119147658348083, "learning_rate": 9.975473382650882e-05, "loss": 0.9826, "step": 9880 }, { "epoch": 0.06318439109157584, "grad_norm": 0.5988776683807373, "learning_rate": 9.9754237190601e-05, "loss": 0.9762, "step": 9890 }, { "epoch": 0.06324827824131454, "grad_norm": 0.7673864364624023, "learning_rate": 9.9753740053625e-05, "loss": 1.0638, "step": 9900 }, { "epoch": 0.06331216539105325, "grad_norm": 0.7230051159858704, "learning_rate": 9.975324241558577e-05, "loss": 1.005, "step": 9910 }, { "epoch": 0.06337605254079194, "grad_norm": 0.7979596257209778, "learning_rate": 9.975274427648834e-05, "loss": 0.816, "step": 9920 }, { "epoch": 0.06343993969053065, "grad_norm": 0.8814641237258911, "learning_rate": 9.975224563633774e-05, "loss": 0.808, "step": 9930 }, { "epoch": 0.06350382684026935, "grad_norm": 1.0135507583618164, "learning_rate": 9.975174649513899e-05, "loss": 0.6825, "step": 9940 }, { "epoch": 0.06356771399000805, "grad_norm": 2.055793046951294, "learning_rate": 9.97512468528971e-05, "loss": 0.9164, "step": 9950 }, { "epoch": 0.06363160113974675, "grad_norm": 1.3944244384765625, "learning_rate": 9.975074670961712e-05, "loss": 0.6302, "step": 9960 }, { "epoch": 0.06369548828948546, "grad_norm": 0.828702986240387, "learning_rate": 9.97502460653041e-05, "loss": 0.8697, "step": 9970 }, { "epoch": 0.06375937543922415, "grad_norm": 0.6198043823242188, "learning_rate": 9.974974491996303e-05, "loss": 1.019, "step": 9980 }, { "epoch": 0.06382326258896286, "grad_norm": 1.0051112174987793, "learning_rate": 9.9749243273599e-05, "loss": 0.8931, "step": 9990 }, { "epoch": 0.06388714973870156, "grad_norm": 0.7894333004951477, "learning_rate": 9.974874112621706e-05, "loss": 0.729, "step": 10000 }, { "epoch": 0.06395103688844025, "grad_norm": 1.0666780471801758, "learning_rate": 9.974823847782226e-05, "loss": 0.8405, "step": 10010 }, { "epoch": 0.06401492403817896, "grad_norm": 0.8409984111785889, "learning_rate": 9.974773532841965e-05, "loss": 0.7593, "step": 10020 }, { "epoch": 0.06407881118791767, "grad_norm": 0.7679229974746704, "learning_rate": 9.97472316780143e-05, "loss": 0.9023, "step": 10030 }, { "epoch": 0.06414269833765636, "grad_norm": 0.893464207649231, "learning_rate": 9.97467275266113e-05, "loss": 0.9048, "step": 10040 }, { "epoch": 0.06420658548739507, "grad_norm": 0.8160121440887451, "learning_rate": 9.974622287421571e-05, "loss": 0.7204, "step": 10050 }, { "epoch": 0.06427047263713377, "grad_norm": 1.0811116695404053, "learning_rate": 9.974571772083264e-05, "loss": 1.0378, "step": 10060 }, { "epoch": 0.06433435978687246, "grad_norm": 2.037599802017212, "learning_rate": 9.974521206646714e-05, "loss": 1.205, "step": 10070 }, { "epoch": 0.06439824693661117, "grad_norm": 1.153348445892334, "learning_rate": 9.974470591112431e-05, "loss": 1.1017, "step": 10080 }, { "epoch": 0.06446213408634988, "grad_norm": 0.8410546183586121, "learning_rate": 9.974419925480927e-05, "loss": 0.9647, "step": 10090 }, { "epoch": 0.06452602123608857, "grad_norm": 1.0550462007522583, "learning_rate": 9.97436920975271e-05, "loss": 0.6909, "step": 10100 }, { "epoch": 0.06458990838582727, "grad_norm": 0.7067312598228455, "learning_rate": 9.974318443928292e-05, "loss": 1.0198, "step": 10110 }, { "epoch": 0.06465379553556598, "grad_norm": 0.9904884696006775, "learning_rate": 9.974267628008184e-05, "loss": 0.853, "step": 10120 }, { "epoch": 0.06471768268530467, "grad_norm": 0.9248889684677124, "learning_rate": 9.974216761992899e-05, "loss": 0.8722, "step": 10130 }, { "epoch": 0.06478156983504338, "grad_norm": 0.9939360618591309, "learning_rate": 9.974165845882946e-05, "loss": 0.7184, "step": 10140 }, { "epoch": 0.06484545698478208, "grad_norm": 0.7473933696746826, "learning_rate": 9.97411487967884e-05, "loss": 1.1064, "step": 10150 }, { "epoch": 0.06490934413452078, "grad_norm": 0.6957441568374634, "learning_rate": 9.974063863381093e-05, "loss": 1.0598, "step": 10160 }, { "epoch": 0.06497323128425948, "grad_norm": 0.5153073072433472, "learning_rate": 9.974012796990222e-05, "loss": 1.0821, "step": 10170 }, { "epoch": 0.06503711843399819, "grad_norm": 0.6289156675338745, "learning_rate": 9.973961680506736e-05, "loss": 0.7954, "step": 10180 }, { "epoch": 0.06510100558373688, "grad_norm": 0.8114803433418274, "learning_rate": 9.973910513931155e-05, "loss": 1.0314, "step": 10190 }, { "epoch": 0.06516489273347559, "grad_norm": 0.9270540475845337, "learning_rate": 9.973859297263992e-05, "loss": 1.0626, "step": 10200 }, { "epoch": 0.0652287798832143, "grad_norm": 0.7939660549163818, "learning_rate": 9.973808030505762e-05, "loss": 1.0844, "step": 10210 }, { "epoch": 0.06529266703295299, "grad_norm": 0.7727285027503967, "learning_rate": 9.973756713656983e-05, "loss": 1.1614, "step": 10220 }, { "epoch": 0.06535655418269169, "grad_norm": 0.628436803817749, "learning_rate": 9.973705346718172e-05, "loss": 1.0243, "step": 10230 }, { "epoch": 0.0654204413324304, "grad_norm": 0.6849284172058105, "learning_rate": 9.973653929689843e-05, "loss": 0.9389, "step": 10240 }, { "epoch": 0.06548432848216909, "grad_norm": 1.1843525171279907, "learning_rate": 9.973602462572517e-05, "loss": 1.1492, "step": 10250 }, { "epoch": 0.0655482156319078, "grad_norm": 0.8269469141960144, "learning_rate": 9.973550945366713e-05, "loss": 1.2698, "step": 10260 }, { "epoch": 0.0656121027816465, "grad_norm": 1.4048844575881958, "learning_rate": 9.973499378072945e-05, "loss": 0.9471, "step": 10270 }, { "epoch": 0.0656759899313852, "grad_norm": 0.4660175144672394, "learning_rate": 9.973447760691738e-05, "loss": 1.0006, "step": 10280 }, { "epoch": 0.0657398770811239, "grad_norm": 0.5896976590156555, "learning_rate": 9.973396093223609e-05, "loss": 0.9568, "step": 10290 }, { "epoch": 0.06580376423086261, "grad_norm": 0.8697670102119446, "learning_rate": 9.973344375669078e-05, "loss": 1.0002, "step": 10300 }, { "epoch": 0.0658676513806013, "grad_norm": 0.6355106234550476, "learning_rate": 9.973292608028667e-05, "loss": 0.847, "step": 10310 }, { "epoch": 0.06593153853034, "grad_norm": 0.8912832736968994, "learning_rate": 9.973240790302898e-05, "loss": 0.9665, "step": 10320 }, { "epoch": 0.06599542568007871, "grad_norm": 0.662343442440033, "learning_rate": 9.97318892249229e-05, "loss": 0.8517, "step": 10330 }, { "epoch": 0.0660593128298174, "grad_norm": 0.7244953513145447, "learning_rate": 9.973137004597368e-05, "loss": 0.8731, "step": 10340 }, { "epoch": 0.06612319997955611, "grad_norm": 0.9315572381019592, "learning_rate": 9.973085036618655e-05, "loss": 0.8918, "step": 10350 }, { "epoch": 0.06618708712929482, "grad_norm": 0.7289429306983948, "learning_rate": 9.973033018556671e-05, "loss": 0.8263, "step": 10360 }, { "epoch": 0.06625097427903351, "grad_norm": 1.0025968551635742, "learning_rate": 9.972980950411944e-05, "loss": 0.8438, "step": 10370 }, { "epoch": 0.06631486142877221, "grad_norm": 1.0387686491012573, "learning_rate": 9.972928832184996e-05, "loss": 1.0417, "step": 10380 }, { "epoch": 0.06637874857851092, "grad_norm": 1.1046053171157837, "learning_rate": 9.972876663876352e-05, "loss": 1.0033, "step": 10390 }, { "epoch": 0.06644263572824961, "grad_norm": 0.7507469058036804, "learning_rate": 9.972824445486539e-05, "loss": 0.7265, "step": 10400 }, { "epoch": 0.06650652287798832, "grad_norm": 0.8238981366157532, "learning_rate": 9.972772177016081e-05, "loss": 1.0, "step": 10410 }, { "epoch": 0.06657041002772703, "grad_norm": 0.9973478317260742, "learning_rate": 9.972719858465504e-05, "loss": 1.1175, "step": 10420 }, { "epoch": 0.06663429717746572, "grad_norm": 1.0181374549865723, "learning_rate": 9.972667489835338e-05, "loss": 0.9529, "step": 10430 }, { "epoch": 0.06669818432720442, "grad_norm": 0.5428194403648376, "learning_rate": 9.972615071126108e-05, "loss": 0.6749, "step": 10440 }, { "epoch": 0.06676207147694313, "grad_norm": 1.1994624137878418, "learning_rate": 9.972562602338341e-05, "loss": 0.8246, "step": 10450 }, { "epoch": 0.06682595862668182, "grad_norm": 1.502936601638794, "learning_rate": 9.972510083472569e-05, "loss": 0.9699, "step": 10460 }, { "epoch": 0.06688984577642053, "grad_norm": 0.9399340748786926, "learning_rate": 9.972457514529316e-05, "loss": 0.8597, "step": 10470 }, { "epoch": 0.06695373292615923, "grad_norm": 1.0776817798614502, "learning_rate": 9.972404895509116e-05, "loss": 1.0443, "step": 10480 }, { "epoch": 0.06701762007589794, "grad_norm": 1.5870468616485596, "learning_rate": 9.972352226412495e-05, "loss": 1.1327, "step": 10490 }, { "epoch": 0.06708150722563663, "grad_norm": 0.8504364490509033, "learning_rate": 9.972299507239988e-05, "loss": 0.9158, "step": 10500 }, { "epoch": 0.06714539437537534, "grad_norm": 0.7087526321411133, "learning_rate": 9.972246737992122e-05, "loss": 0.9687, "step": 10510 }, { "epoch": 0.06720928152511405, "grad_norm": 0.9799100756645203, "learning_rate": 9.972193918669429e-05, "loss": 1.1421, "step": 10520 }, { "epoch": 0.06727316867485274, "grad_norm": 0.6044210195541382, "learning_rate": 9.972141049272444e-05, "loss": 1.1096, "step": 10530 }, { "epoch": 0.06733705582459144, "grad_norm": 0.8850777745246887, "learning_rate": 9.972088129801693e-05, "loss": 0.8467, "step": 10540 }, { "epoch": 0.06740094297433015, "grad_norm": 0.8483796715736389, "learning_rate": 9.972035160257717e-05, "loss": 0.9819, "step": 10550 }, { "epoch": 0.06746483012406884, "grad_norm": 1.1407147645950317, "learning_rate": 9.971982140641043e-05, "loss": 0.9107, "step": 10560 }, { "epoch": 0.06752871727380755, "grad_norm": 0.834553599357605, "learning_rate": 9.971929070952209e-05, "loss": 1.1262, "step": 10570 }, { "epoch": 0.06759260442354625, "grad_norm": 1.0828417539596558, "learning_rate": 9.971875951191747e-05, "loss": 0.9017, "step": 10580 }, { "epoch": 0.06765649157328495, "grad_norm": 0.5860454440116882, "learning_rate": 9.971822781360194e-05, "loss": 0.7191, "step": 10590 }, { "epoch": 0.06772037872302365, "grad_norm": 0.767382025718689, "learning_rate": 9.971769561458084e-05, "loss": 1.048, "step": 10600 }, { "epoch": 0.06778426587276236, "grad_norm": 0.6914779543876648, "learning_rate": 9.971716291485953e-05, "loss": 0.949, "step": 10610 }, { "epoch": 0.06784815302250105, "grad_norm": 1.306636929512024, "learning_rate": 9.971662971444338e-05, "loss": 0.8191, "step": 10620 }, { "epoch": 0.06791204017223976, "grad_norm": 1.0141420364379883, "learning_rate": 9.971609601333776e-05, "loss": 0.9747, "step": 10630 }, { "epoch": 0.06797592732197846, "grad_norm": 0.7582118511199951, "learning_rate": 9.971556181154802e-05, "loss": 0.7757, "step": 10640 }, { "epoch": 0.06803981447171716, "grad_norm": 0.6744197010993958, "learning_rate": 9.971502710907958e-05, "loss": 0.7907, "step": 10650 }, { "epoch": 0.06810370162145586, "grad_norm": 1.1960172653198242, "learning_rate": 9.971449190593782e-05, "loss": 0.9023, "step": 10660 }, { "epoch": 0.06816758877119457, "grad_norm": 1.0107911825180054, "learning_rate": 9.971395620212811e-05, "loss": 0.918, "step": 10670 }, { "epoch": 0.06823147592093326, "grad_norm": 0.6501746773719788, "learning_rate": 9.971341999765585e-05, "loss": 0.9352, "step": 10680 }, { "epoch": 0.06829536307067197, "grad_norm": 0.9184291362762451, "learning_rate": 9.971288329252644e-05, "loss": 1.1747, "step": 10690 }, { "epoch": 0.06835925022041067, "grad_norm": 0.5910547971725464, "learning_rate": 9.971234608674529e-05, "loss": 0.7598, "step": 10700 }, { "epoch": 0.06842313737014936, "grad_norm": 0.8851799964904785, "learning_rate": 9.97118083803178e-05, "loss": 0.9643, "step": 10710 }, { "epoch": 0.06848702451988807, "grad_norm": 0.6597937941551208, "learning_rate": 9.97112701732494e-05, "loss": 0.9897, "step": 10720 }, { "epoch": 0.06855091166962678, "grad_norm": 0.6581412553787231, "learning_rate": 9.97107314655455e-05, "loss": 1.1446, "step": 10730 }, { "epoch": 0.06861479881936547, "grad_norm": 0.5868738293647766, "learning_rate": 9.971019225721153e-05, "loss": 1.0789, "step": 10740 }, { "epoch": 0.06867868596910418, "grad_norm": 0.6730684041976929, "learning_rate": 9.970965254825292e-05, "loss": 0.9802, "step": 10750 }, { "epoch": 0.06874257311884288, "grad_norm": 0.8661940097808838, "learning_rate": 9.970911233867511e-05, "loss": 1.0777, "step": 10760 }, { "epoch": 0.06880646026858157, "grad_norm": 1.0571337938308716, "learning_rate": 9.970857162848352e-05, "loss": 1.0175, "step": 10770 }, { "epoch": 0.06887034741832028, "grad_norm": 1.2184176445007324, "learning_rate": 9.970803041768362e-05, "loss": 0.9196, "step": 10780 }, { "epoch": 0.06893423456805899, "grad_norm": 0.6517652869224548, "learning_rate": 9.970748870628083e-05, "loss": 0.9498, "step": 10790 }, { "epoch": 0.06899812171779768, "grad_norm": 1.2037395238876343, "learning_rate": 9.970694649428065e-05, "loss": 0.785, "step": 10800 }, { "epoch": 0.06906200886753638, "grad_norm": 0.8196636438369751, "learning_rate": 9.97064037816885e-05, "loss": 0.9136, "step": 10810 }, { "epoch": 0.06912589601727509, "grad_norm": 0.9403445720672607, "learning_rate": 9.970586056850988e-05, "loss": 0.847, "step": 10820 }, { "epoch": 0.06918978316701378, "grad_norm": 0.5096237659454346, "learning_rate": 9.970531685475024e-05, "loss": 0.8693, "step": 10830 }, { "epoch": 0.06925367031675249, "grad_norm": 0.5676767230033875, "learning_rate": 9.970477264041505e-05, "loss": 0.9367, "step": 10840 }, { "epoch": 0.0693175574664912, "grad_norm": 0.9769662618637085, "learning_rate": 9.970422792550978e-05, "loss": 0.9091, "step": 10850 }, { "epoch": 0.06938144461622989, "grad_norm": 0.6873984932899475, "learning_rate": 9.970368271003995e-05, "loss": 0.9392, "step": 10860 }, { "epoch": 0.0694453317659686, "grad_norm": 1.1281991004943848, "learning_rate": 9.970313699401104e-05, "loss": 0.8311, "step": 10870 }, { "epoch": 0.0695092189157073, "grad_norm": 0.8184236288070679, "learning_rate": 9.970259077742855e-05, "loss": 0.7781, "step": 10880 }, { "epoch": 0.06957310606544599, "grad_norm": 0.7411293983459473, "learning_rate": 9.970204406029796e-05, "loss": 0.8319, "step": 10890 }, { "epoch": 0.0696369932151847, "grad_norm": 0.8405719995498657, "learning_rate": 9.97014968426248e-05, "loss": 1.1157, "step": 10900 }, { "epoch": 0.0697008803649234, "grad_norm": 0.8236634731292725, "learning_rate": 9.970094912441454e-05, "loss": 0.8209, "step": 10910 }, { "epoch": 0.0697647675146621, "grad_norm": 0.7503064870834351, "learning_rate": 9.970040090567275e-05, "loss": 1.1207, "step": 10920 }, { "epoch": 0.0698286546644008, "grad_norm": 1.037656545639038, "learning_rate": 9.969985218640492e-05, "loss": 1.0938, "step": 10930 }, { "epoch": 0.06989254181413951, "grad_norm": 2.2834203243255615, "learning_rate": 9.969930296661658e-05, "loss": 1.0299, "step": 10940 }, { "epoch": 0.0699564289638782, "grad_norm": 0.47441643476486206, "learning_rate": 9.969875324631327e-05, "loss": 0.8998, "step": 10950 }, { "epoch": 0.0700203161136169, "grad_norm": 0.8986606597900391, "learning_rate": 9.969820302550051e-05, "loss": 0.8735, "step": 10960 }, { "epoch": 0.07008420326335561, "grad_norm": 0.6057919263839722, "learning_rate": 9.969765230418386e-05, "loss": 0.8311, "step": 10970 }, { "epoch": 0.0701480904130943, "grad_norm": 0.9726822972297668, "learning_rate": 9.969710108236885e-05, "loss": 1.0337, "step": 10980 }, { "epoch": 0.07021197756283301, "grad_norm": 0.875328779220581, "learning_rate": 9.969654936006102e-05, "loss": 0.978, "step": 10990 }, { "epoch": 0.07027586471257172, "grad_norm": 1.4699301719665527, "learning_rate": 9.969599713726599e-05, "loss": 0.709, "step": 11000 }, { "epoch": 0.07033975186231041, "grad_norm": 0.9150874614715576, "learning_rate": 9.969544441398924e-05, "loss": 0.7534, "step": 11010 }, { "epoch": 0.07040363901204912, "grad_norm": 0.9999013543128967, "learning_rate": 9.969489119023638e-05, "loss": 1.0469, "step": 11020 }, { "epoch": 0.07046752616178782, "grad_norm": 1.0596497058868408, "learning_rate": 9.969433746601298e-05, "loss": 0.8638, "step": 11030 }, { "epoch": 0.07053141331152651, "grad_norm": 0.5560715198516846, "learning_rate": 9.96937832413246e-05, "loss": 0.8614, "step": 11040 }, { "epoch": 0.07059530046126522, "grad_norm": 0.7285141944885254, "learning_rate": 9.969322851617684e-05, "loss": 0.7894, "step": 11050 }, { "epoch": 0.07065918761100393, "grad_norm": 0.8218443393707275, "learning_rate": 9.969267329057526e-05, "loss": 1.116, "step": 11060 }, { "epoch": 0.07072307476074262, "grad_norm": 0.7729995250701904, "learning_rate": 9.96921175645255e-05, "loss": 0.9044, "step": 11070 }, { "epoch": 0.07078696191048132, "grad_norm": 0.719794511795044, "learning_rate": 9.96915613380331e-05, "loss": 1.182, "step": 11080 }, { "epoch": 0.07085084906022003, "grad_norm": 0.9527838230133057, "learning_rate": 9.96910046111037e-05, "loss": 1.0074, "step": 11090 }, { "epoch": 0.07091473620995872, "grad_norm": 0.7101008892059326, "learning_rate": 9.969044738374289e-05, "loss": 1.0559, "step": 11100 }, { "epoch": 0.07097862335969743, "grad_norm": 0.4492223560810089, "learning_rate": 9.968988965595629e-05, "loss": 0.727, "step": 11110 }, { "epoch": 0.07104251050943614, "grad_norm": 0.6947804093360901, "learning_rate": 9.968933142774952e-05, "loss": 0.9424, "step": 11120 }, { "epoch": 0.07110639765917483, "grad_norm": 1.0676300525665283, "learning_rate": 9.968877269912819e-05, "loss": 0.7982, "step": 11130 }, { "epoch": 0.07117028480891353, "grad_norm": 0.7446919679641724, "learning_rate": 9.968821347009792e-05, "loss": 0.9773, "step": 11140 }, { "epoch": 0.07123417195865224, "grad_norm": 1.2251659631729126, "learning_rate": 9.968765374066437e-05, "loss": 0.8226, "step": 11150 }, { "epoch": 0.07129805910839093, "grad_norm": 0.635826051235199, "learning_rate": 9.968709351083315e-05, "loss": 0.7913, "step": 11160 }, { "epoch": 0.07136194625812964, "grad_norm": 1.43468177318573, "learning_rate": 9.968653278060992e-05, "loss": 0.7686, "step": 11170 }, { "epoch": 0.07142583340786834, "grad_norm": 6.540151596069336, "learning_rate": 9.968597155000033e-05, "loss": 0.8114, "step": 11180 }, { "epoch": 0.07148972055760704, "grad_norm": 1.3150196075439453, "learning_rate": 9.968540981901e-05, "loss": 0.6353, "step": 11190 }, { "epoch": 0.07155360770734574, "grad_norm": 0.5050914883613586, "learning_rate": 9.968484758764462e-05, "loss": 0.6865, "step": 11200 }, { "epoch": 0.07161749485708445, "grad_norm": 0.9180815815925598, "learning_rate": 9.968428485590983e-05, "loss": 0.9142, "step": 11210 }, { "epoch": 0.07168138200682314, "grad_norm": 1.4517556428909302, "learning_rate": 9.968372162381133e-05, "loss": 0.7999, "step": 11220 }, { "epoch": 0.07174526915656185, "grad_norm": 0.6034737229347229, "learning_rate": 9.968315789135475e-05, "loss": 1.23, "step": 11230 }, { "epoch": 0.07180915630630055, "grad_norm": 0.9869849681854248, "learning_rate": 9.96825936585458e-05, "loss": 1.0731, "step": 11240 }, { "epoch": 0.07187304345603925, "grad_norm": 0.6998457908630371, "learning_rate": 9.968202892539014e-05, "loss": 1.1126, "step": 11250 }, { "epoch": 0.07193693060577795, "grad_norm": 0.7587766647338867, "learning_rate": 9.968146369189349e-05, "loss": 0.9376, "step": 11260 }, { "epoch": 0.07200081775551666, "grad_norm": 0.9407736659049988, "learning_rate": 9.96808979580615e-05, "loss": 0.7904, "step": 11270 }, { "epoch": 0.07206470490525535, "grad_norm": 1.7557258605957031, "learning_rate": 9.968033172389989e-05, "loss": 0.8119, "step": 11280 }, { "epoch": 0.07212859205499406, "grad_norm": 0.6084944605827332, "learning_rate": 9.967976498941436e-05, "loss": 0.7708, "step": 11290 }, { "epoch": 0.07219247920473276, "grad_norm": 0.7556819915771484, "learning_rate": 9.967919775461063e-05, "loss": 0.7996, "step": 11300 }, { "epoch": 0.07225636635447147, "grad_norm": 0.7954988479614258, "learning_rate": 9.967863001949438e-05, "loss": 1.1191, "step": 11310 }, { "epoch": 0.07232025350421016, "grad_norm": 0.7278555631637573, "learning_rate": 9.967806178407135e-05, "loss": 0.8343, "step": 11320 }, { "epoch": 0.07238414065394887, "grad_norm": 0.7036782503128052, "learning_rate": 9.967749304834728e-05, "loss": 0.8194, "step": 11330 }, { "epoch": 0.07244802780368757, "grad_norm": 1.3781989812850952, "learning_rate": 9.967692381232786e-05, "loss": 0.8285, "step": 11340 }, { "epoch": 0.07251191495342627, "grad_norm": 0.885075569152832, "learning_rate": 9.967635407601886e-05, "loss": 1.0042, "step": 11350 }, { "epoch": 0.07257580210316497, "grad_norm": 0.6959792375564575, "learning_rate": 9.967578383942597e-05, "loss": 0.8172, "step": 11360 }, { "epoch": 0.07263968925290368, "grad_norm": 1.800525188446045, "learning_rate": 9.967521310255498e-05, "loss": 0.8708, "step": 11370 }, { "epoch": 0.07270357640264237, "grad_norm": 0.6853658556938171, "learning_rate": 9.96746418654116e-05, "loss": 0.8518, "step": 11380 }, { "epoch": 0.07276746355238108, "grad_norm": 0.7943517565727234, "learning_rate": 9.967407012800163e-05, "loss": 0.7797, "step": 11390 }, { "epoch": 0.07283135070211978, "grad_norm": 0.7777195572853088, "learning_rate": 9.967349789033078e-05, "loss": 0.7811, "step": 11400 }, { "epoch": 0.07289523785185847, "grad_norm": 0.9152284860610962, "learning_rate": 9.967292515240486e-05, "loss": 0.7322, "step": 11410 }, { "epoch": 0.07295912500159718, "grad_norm": 1.2940709590911865, "learning_rate": 9.967235191422957e-05, "loss": 0.7784, "step": 11420 }, { "epoch": 0.07302301215133589, "grad_norm": 1.4273176193237305, "learning_rate": 9.967177817581075e-05, "loss": 1.334, "step": 11430 }, { "epoch": 0.07308689930107458, "grad_norm": 0.9415301084518433, "learning_rate": 9.967120393715414e-05, "loss": 1.33, "step": 11440 }, { "epoch": 0.07315078645081328, "grad_norm": 1.6769905090332031, "learning_rate": 9.967062919826552e-05, "loss": 0.8804, "step": 11450 }, { "epoch": 0.07321467360055199, "grad_norm": 0.8233237266540527, "learning_rate": 9.967005395915072e-05, "loss": 0.9747, "step": 11460 }, { "epoch": 0.07327856075029068, "grad_norm": 0.793849527835846, "learning_rate": 9.966947821981551e-05, "loss": 0.736, "step": 11470 }, { "epoch": 0.07334244790002939, "grad_norm": 0.8288117051124573, "learning_rate": 9.966890198026566e-05, "loss": 0.9165, "step": 11480 }, { "epoch": 0.0734063350497681, "grad_norm": 0.7047694325447083, "learning_rate": 9.966832524050702e-05, "loss": 0.8662, "step": 11490 }, { "epoch": 0.07347022219950679, "grad_norm": 0.6443949937820435, "learning_rate": 9.966774800054535e-05, "loss": 1.0167, "step": 11500 }, { "epoch": 0.0735341093492455, "grad_norm": 0.6362110376358032, "learning_rate": 9.966717026038651e-05, "loss": 1.0175, "step": 11510 }, { "epoch": 0.0735979964989842, "grad_norm": 0.7651115655899048, "learning_rate": 9.96665920200363e-05, "loss": 0.914, "step": 11520 }, { "epoch": 0.07366188364872289, "grad_norm": 0.7375466823577881, "learning_rate": 9.966601327950052e-05, "loss": 0.9936, "step": 11530 }, { "epoch": 0.0737257707984616, "grad_norm": 0.7288793325424194, "learning_rate": 9.966543403878503e-05, "loss": 1.1943, "step": 11540 }, { "epoch": 0.0737896579482003, "grad_norm": 0.8896105289459229, "learning_rate": 9.966485429789565e-05, "loss": 1.0228, "step": 11550 }, { "epoch": 0.073853545097939, "grad_norm": 1.1143486499786377, "learning_rate": 9.966427405683823e-05, "loss": 0.8327, "step": 11560 }, { "epoch": 0.0739174322476777, "grad_norm": 0.9701015949249268, "learning_rate": 9.96636933156186e-05, "loss": 0.8488, "step": 11570 }, { "epoch": 0.07398131939741641, "grad_norm": 0.8440617322921753, "learning_rate": 9.966311207424261e-05, "loss": 1.1248, "step": 11580 }, { "epoch": 0.0740452065471551, "grad_norm": 1.1028122901916504, "learning_rate": 9.96625303327161e-05, "loss": 0.941, "step": 11590 }, { "epoch": 0.07410909369689381, "grad_norm": 0.8367504477500916, "learning_rate": 9.966194809104498e-05, "loss": 1.0069, "step": 11600 }, { "epoch": 0.07417298084663251, "grad_norm": 0.6582353115081787, "learning_rate": 9.966136534923507e-05, "loss": 1.0914, "step": 11610 }, { "epoch": 0.0742368679963712, "grad_norm": 0.720551609992981, "learning_rate": 9.966078210729224e-05, "loss": 0.8932, "step": 11620 }, { "epoch": 0.07430075514610991, "grad_norm": 1.5726115703582764, "learning_rate": 9.966019836522235e-05, "loss": 0.666, "step": 11630 }, { "epoch": 0.07436464229584862, "grad_norm": 0.8888491988182068, "learning_rate": 9.965961412303133e-05, "loss": 0.8511, "step": 11640 }, { "epoch": 0.07442852944558731, "grad_norm": 0.9958298206329346, "learning_rate": 9.965902938072503e-05, "loss": 0.8403, "step": 11650 }, { "epoch": 0.07449241659532602, "grad_norm": 0.9258823394775391, "learning_rate": 9.965844413830934e-05, "loss": 0.9406, "step": 11660 }, { "epoch": 0.07455630374506472, "grad_norm": 0.6303139328956604, "learning_rate": 9.965785839579016e-05, "loss": 0.8162, "step": 11670 }, { "epoch": 0.07462019089480341, "grad_norm": 0.8224695920944214, "learning_rate": 9.965727215317338e-05, "loss": 0.8578, "step": 11680 }, { "epoch": 0.07468407804454212, "grad_norm": 0.7703375816345215, "learning_rate": 9.965668541046491e-05, "loss": 0.9871, "step": 11690 }, { "epoch": 0.07474796519428083, "grad_norm": 0.5986992716789246, "learning_rate": 9.965609816767066e-05, "loss": 0.793, "step": 11700 }, { "epoch": 0.07481185234401952, "grad_norm": 0.7556684613227844, "learning_rate": 9.965551042479655e-05, "loss": 0.9343, "step": 11710 }, { "epoch": 0.07487573949375823, "grad_norm": 0.7659729719161987, "learning_rate": 9.965492218184848e-05, "loss": 0.8594, "step": 11720 }, { "epoch": 0.07493962664349693, "grad_norm": 0.7803331017494202, "learning_rate": 9.965433343883239e-05, "loss": 0.7292, "step": 11730 }, { "epoch": 0.07500351379323562, "grad_norm": 0.9800279140472412, "learning_rate": 9.96537441957542e-05, "loss": 0.7982, "step": 11740 }, { "epoch": 0.07506740094297433, "grad_norm": 1.3977315425872803, "learning_rate": 9.965315445261986e-05, "loss": 0.7011, "step": 11750 }, { "epoch": 0.07513128809271304, "grad_norm": 0.6457341313362122, "learning_rate": 9.965256420943529e-05, "loss": 0.8958, "step": 11760 }, { "epoch": 0.07519517524245173, "grad_norm": 0.789249062538147, "learning_rate": 9.965197346620645e-05, "loss": 0.8956, "step": 11770 }, { "epoch": 0.07525906239219043, "grad_norm": 0.8489546179771423, "learning_rate": 9.965138222293928e-05, "loss": 0.8684, "step": 11780 }, { "epoch": 0.07532294954192914, "grad_norm": 0.7303208112716675, "learning_rate": 9.965079047963974e-05, "loss": 0.9646, "step": 11790 }, { "epoch": 0.07538683669166783, "grad_norm": 3.839034080505371, "learning_rate": 9.965019823631378e-05, "loss": 0.8553, "step": 11800 }, { "epoch": 0.07545072384140654, "grad_norm": 1.2064359188079834, "learning_rate": 9.964960549296736e-05, "loss": 1.0195, "step": 11810 }, { "epoch": 0.07551461099114525, "grad_norm": 0.7502697706222534, "learning_rate": 9.964901224960647e-05, "loss": 0.9259, "step": 11820 }, { "epoch": 0.07557849814088394, "grad_norm": 0.5781645774841309, "learning_rate": 9.964841850623709e-05, "loss": 0.8668, "step": 11830 }, { "epoch": 0.07564238529062264, "grad_norm": 0.8652671575546265, "learning_rate": 9.964782426286516e-05, "loss": 0.8489, "step": 11840 }, { "epoch": 0.07570627244036135, "grad_norm": 0.9653028845787048, "learning_rate": 9.96472295194967e-05, "loss": 0.9514, "step": 11850 }, { "epoch": 0.07577015959010004, "grad_norm": 2.5349843502044678, "learning_rate": 9.964663427613769e-05, "loss": 1.0536, "step": 11860 }, { "epoch": 0.07583404673983875, "grad_norm": 1.0257644653320312, "learning_rate": 9.96460385327941e-05, "loss": 1.05, "step": 11870 }, { "epoch": 0.07589793388957745, "grad_norm": 0.6599146723747253, "learning_rate": 9.964544228947199e-05, "loss": 0.9347, "step": 11880 }, { "epoch": 0.07596182103931615, "grad_norm": 1.0453253984451294, "learning_rate": 9.96448455461773e-05, "loss": 0.9054, "step": 11890 }, { "epoch": 0.07602570818905485, "grad_norm": 0.5662599802017212, "learning_rate": 9.964424830291607e-05, "loss": 0.9117, "step": 11900 }, { "epoch": 0.07608959533879356, "grad_norm": 0.6186836361885071, "learning_rate": 9.964365055969431e-05, "loss": 0.9725, "step": 11910 }, { "epoch": 0.07615348248853225, "grad_norm": 0.8609874844551086, "learning_rate": 9.964305231651804e-05, "loss": 0.9634, "step": 11920 }, { "epoch": 0.07621736963827096, "grad_norm": 0.8729275465011597, "learning_rate": 9.96424535733933e-05, "loss": 0.9384, "step": 11930 }, { "epoch": 0.07628125678800966, "grad_norm": 0.9938400387763977, "learning_rate": 9.964185433032609e-05, "loss": 0.8695, "step": 11940 }, { "epoch": 0.07634514393774836, "grad_norm": 0.836526095867157, "learning_rate": 9.964125458732247e-05, "loss": 0.9405, "step": 11950 }, { "epoch": 0.07640903108748706, "grad_norm": 0.7302273511886597, "learning_rate": 9.964065434438846e-05, "loss": 1.0793, "step": 11960 }, { "epoch": 0.07647291823722577, "grad_norm": 0.49212926626205444, "learning_rate": 9.964005360153013e-05, "loss": 0.8772, "step": 11970 }, { "epoch": 0.07653680538696446, "grad_norm": 0.6889157295227051, "learning_rate": 9.963945235875351e-05, "loss": 0.9, "step": 11980 }, { "epoch": 0.07660069253670317, "grad_norm": 0.9073895215988159, "learning_rate": 9.963885061606466e-05, "loss": 1.2127, "step": 11990 }, { "epoch": 0.07666457968644187, "grad_norm": 0.8105494976043701, "learning_rate": 9.963824837346963e-05, "loss": 0.8683, "step": 12000 }, { "epoch": 0.07672846683618056, "grad_norm": 0.9559453129768372, "learning_rate": 9.963764563097451e-05, "loss": 0.8229, "step": 12010 }, { "epoch": 0.07679235398591927, "grad_norm": 0.7197737693786621, "learning_rate": 9.963704238858535e-05, "loss": 1.0417, "step": 12020 }, { "epoch": 0.07685624113565798, "grad_norm": 1.704092025756836, "learning_rate": 9.963643864630823e-05, "loss": 0.8046, "step": 12030 }, { "epoch": 0.07692012828539667, "grad_norm": 0.7579613327980042, "learning_rate": 9.963583440414923e-05, "loss": 0.9269, "step": 12040 }, { "epoch": 0.07698401543513538, "grad_norm": 1.0408282279968262, "learning_rate": 9.963522966211444e-05, "loss": 1.0785, "step": 12050 }, { "epoch": 0.07704790258487408, "grad_norm": 0.5655786991119385, "learning_rate": 9.963462442020994e-05, "loss": 0.8481, "step": 12060 }, { "epoch": 0.07711178973461277, "grad_norm": 0.6558650732040405, "learning_rate": 9.963401867844184e-05, "loss": 0.9213, "step": 12070 }, { "epoch": 0.07717567688435148, "grad_norm": 0.9138306975364685, "learning_rate": 9.963341243681623e-05, "loss": 0.8109, "step": 12080 }, { "epoch": 0.07723956403409019, "grad_norm": 0.8476769924163818, "learning_rate": 9.963280569533923e-05, "loss": 0.8877, "step": 12090 }, { "epoch": 0.07730345118382888, "grad_norm": 1.9213597774505615, "learning_rate": 9.963219845401692e-05, "loss": 0.8959, "step": 12100 }, { "epoch": 0.07736733833356758, "grad_norm": 0.6933993697166443, "learning_rate": 9.963159071285544e-05, "loss": 0.8968, "step": 12110 }, { "epoch": 0.07743122548330629, "grad_norm": 0.6891202926635742, "learning_rate": 9.963098247186091e-05, "loss": 1.2008, "step": 12120 }, { "epoch": 0.07749511263304498, "grad_norm": 0.7064499855041504, "learning_rate": 9.963037373103944e-05, "loss": 0.9018, "step": 12130 }, { "epoch": 0.07755899978278369, "grad_norm": 0.7487188577651978, "learning_rate": 9.962976449039717e-05, "loss": 1.0011, "step": 12140 }, { "epoch": 0.0776228869325224, "grad_norm": 0.8367332816123962, "learning_rate": 9.962915474994023e-05, "loss": 0.9068, "step": 12150 }, { "epoch": 0.0776867740822611, "grad_norm": 1.0736783742904663, "learning_rate": 9.962854450967478e-05, "loss": 0.9293, "step": 12160 }, { "epoch": 0.0777506612319998, "grad_norm": 0.715390682220459, "learning_rate": 9.962793376960695e-05, "loss": 0.9036, "step": 12170 }, { "epoch": 0.0778145483817385, "grad_norm": 1.1531165838241577, "learning_rate": 9.962732252974289e-05, "loss": 0.7847, "step": 12180 }, { "epoch": 0.0778784355314772, "grad_norm": 0.6619348526000977, "learning_rate": 9.962671079008876e-05, "loss": 1.0075, "step": 12190 }, { "epoch": 0.0779423226812159, "grad_norm": 1.0544220209121704, "learning_rate": 9.962609855065072e-05, "loss": 0.9982, "step": 12200 }, { "epoch": 0.0780062098309546, "grad_norm": 0.6626638174057007, "learning_rate": 9.962548581143494e-05, "loss": 1.0559, "step": 12210 }, { "epoch": 0.07807009698069331, "grad_norm": 1.291588544845581, "learning_rate": 9.962487257244757e-05, "loss": 1.0497, "step": 12220 }, { "epoch": 0.078133984130432, "grad_norm": 0.7503036260604858, "learning_rate": 9.962425883369481e-05, "loss": 0.9837, "step": 12230 }, { "epoch": 0.07819787128017071, "grad_norm": 0.789021909236908, "learning_rate": 9.962364459518283e-05, "loss": 0.8779, "step": 12240 }, { "epoch": 0.07826175842990941, "grad_norm": 1.2305183410644531, "learning_rate": 9.962302985691783e-05, "loss": 0.9292, "step": 12250 }, { "epoch": 0.0783256455796481, "grad_norm": 1.5961018800735474, "learning_rate": 9.962241461890598e-05, "loss": 0.9467, "step": 12260 }, { "epoch": 0.07838953272938681, "grad_norm": 0.5835550427436829, "learning_rate": 9.962179888115348e-05, "loss": 1.0957, "step": 12270 }, { "epoch": 0.07845341987912552, "grad_norm": 1.0020620822906494, "learning_rate": 9.962118264366655e-05, "loss": 0.9427, "step": 12280 }, { "epoch": 0.07851730702886421, "grad_norm": 0.6819837689399719, "learning_rate": 9.962056590645136e-05, "loss": 1.0855, "step": 12290 }, { "epoch": 0.07858119417860292, "grad_norm": 1.3488112688064575, "learning_rate": 9.961994866951416e-05, "loss": 0.6407, "step": 12300 }, { "epoch": 0.07864508132834162, "grad_norm": 0.8530036807060242, "learning_rate": 9.961933093286115e-05, "loss": 1.0095, "step": 12310 }, { "epoch": 0.07870896847808032, "grad_norm": 0.7318217158317566, "learning_rate": 9.961871269649854e-05, "loss": 0.8607, "step": 12320 }, { "epoch": 0.07877285562781902, "grad_norm": 0.5192087292671204, "learning_rate": 9.96180939604326e-05, "loss": 0.7035, "step": 12330 }, { "epoch": 0.07883674277755773, "grad_norm": 0.8365872502326965, "learning_rate": 9.961747472466949e-05, "loss": 1.4109, "step": 12340 }, { "epoch": 0.07890062992729642, "grad_norm": 0.9271693229675293, "learning_rate": 9.96168549892155e-05, "loss": 0.842, "step": 12350 }, { "epoch": 0.07896451707703513, "grad_norm": 1.00367271900177, "learning_rate": 9.961623475407684e-05, "loss": 1.0556, "step": 12360 }, { "epoch": 0.07902840422677383, "grad_norm": 1.339418888092041, "learning_rate": 9.96156140192598e-05, "loss": 0.8171, "step": 12370 }, { "epoch": 0.07909229137651252, "grad_norm": 1.03416109085083, "learning_rate": 9.961499278477058e-05, "loss": 0.8902, "step": 12380 }, { "epoch": 0.07915617852625123, "grad_norm": 0.847169041633606, "learning_rate": 9.961437105061546e-05, "loss": 0.9201, "step": 12390 }, { "epoch": 0.07922006567598994, "grad_norm": 1.1525788307189941, "learning_rate": 9.961374881680072e-05, "loss": 1.054, "step": 12400 }, { "epoch": 0.07928395282572863, "grad_norm": 0.7588199973106384, "learning_rate": 9.96131260833326e-05, "loss": 0.9179, "step": 12410 }, { "epoch": 0.07934783997546734, "grad_norm": 1.2406294345855713, "learning_rate": 9.961250285021737e-05, "loss": 1.1218, "step": 12420 }, { "epoch": 0.07941172712520604, "grad_norm": 0.7575234174728394, "learning_rate": 9.961187911746133e-05, "loss": 1.0122, "step": 12430 }, { "epoch": 0.07947561427494473, "grad_norm": 0.7496919631958008, "learning_rate": 9.961125488507072e-05, "loss": 1.0282, "step": 12440 }, { "epoch": 0.07953950142468344, "grad_norm": 0.8383338451385498, "learning_rate": 9.961063015305188e-05, "loss": 0.9828, "step": 12450 }, { "epoch": 0.07960338857442215, "grad_norm": 1.0005531311035156, "learning_rate": 9.961000492141106e-05, "loss": 1.061, "step": 12460 }, { "epoch": 0.07966727572416084, "grad_norm": 0.9767794013023376, "learning_rate": 9.960937919015458e-05, "loss": 1.0097, "step": 12470 }, { "epoch": 0.07973116287389954, "grad_norm": 0.7348878383636475, "learning_rate": 9.960875295928874e-05, "loss": 0.8203, "step": 12480 }, { "epoch": 0.07979505002363825, "grad_norm": 0.7473248243331909, "learning_rate": 9.960812622881982e-05, "loss": 0.8261, "step": 12490 }, { "epoch": 0.07985893717337694, "grad_norm": 0.6296994686126709, "learning_rate": 9.960749899875417e-05, "loss": 0.9531, "step": 12500 }, { "epoch": 0.07992282432311565, "grad_norm": 0.48655831813812256, "learning_rate": 9.960687126909807e-05, "loss": 0.8131, "step": 12510 }, { "epoch": 0.07998671147285435, "grad_norm": 0.8312428593635559, "learning_rate": 9.960624303985787e-05, "loss": 0.7988, "step": 12520 }, { "epoch": 0.08005059862259305, "grad_norm": 0.7593886256217957, "learning_rate": 9.96056143110399e-05, "loss": 0.6993, "step": 12530 }, { "epoch": 0.08011448577233175, "grad_norm": 0.9787190556526184, "learning_rate": 9.960498508265046e-05, "loss": 1.1168, "step": 12540 }, { "epoch": 0.08017837292207046, "grad_norm": 1.374013066291809, "learning_rate": 9.960435535469591e-05, "loss": 0.959, "step": 12550 }, { "epoch": 0.08024226007180915, "grad_norm": 0.632503867149353, "learning_rate": 9.960372512718258e-05, "loss": 0.9161, "step": 12560 }, { "epoch": 0.08030614722154786, "grad_norm": 0.7403663992881775, "learning_rate": 9.960309440011685e-05, "loss": 0.5914, "step": 12570 }, { "epoch": 0.08037003437128656, "grad_norm": 0.691646158695221, "learning_rate": 9.960246317350503e-05, "loss": 0.8991, "step": 12580 }, { "epoch": 0.08043392152102526, "grad_norm": 0.5965979099273682, "learning_rate": 9.960183144735348e-05, "loss": 0.81, "step": 12590 }, { "epoch": 0.08049780867076396, "grad_norm": 0.9545162320137024, "learning_rate": 9.960119922166859e-05, "loss": 1.0659, "step": 12600 }, { "epoch": 0.08056169582050267, "grad_norm": 2.2266764640808105, "learning_rate": 9.960056649645673e-05, "loss": 1.2056, "step": 12610 }, { "epoch": 0.08062558297024136, "grad_norm": 1.257367730140686, "learning_rate": 9.959993327172423e-05, "loss": 1.0144, "step": 12620 }, { "epoch": 0.08068947011998007, "grad_norm": 0.8366072177886963, "learning_rate": 9.959929954747751e-05, "loss": 0.896, "step": 12630 }, { "epoch": 0.08075335726971877, "grad_norm": 0.71613609790802, "learning_rate": 9.959866532372292e-05, "loss": 0.7121, "step": 12640 }, { "epoch": 0.08081724441945747, "grad_norm": 0.678428053855896, "learning_rate": 9.959803060046687e-05, "loss": 0.8114, "step": 12650 }, { "epoch": 0.08088113156919617, "grad_norm": 0.8528268337249756, "learning_rate": 9.959739537771573e-05, "loss": 0.9052, "step": 12660 }, { "epoch": 0.08094501871893488, "grad_norm": 0.8090612292289734, "learning_rate": 9.959675965547592e-05, "loss": 0.9429, "step": 12670 }, { "epoch": 0.08100890586867357, "grad_norm": 1.0413676500320435, "learning_rate": 9.959612343375385e-05, "loss": 0.9671, "step": 12680 }, { "epoch": 0.08107279301841228, "grad_norm": 0.6349504590034485, "learning_rate": 9.959548671255588e-05, "loss": 1.0272, "step": 12690 }, { "epoch": 0.08113668016815098, "grad_norm": 1.0371969938278198, "learning_rate": 9.959484949188846e-05, "loss": 0.7439, "step": 12700 }, { "epoch": 0.08120056731788967, "grad_norm": 0.7047412991523743, "learning_rate": 9.9594211771758e-05, "loss": 0.8986, "step": 12710 }, { "epoch": 0.08126445446762838, "grad_norm": 0.659905195236206, "learning_rate": 9.959357355217093e-05, "loss": 0.7917, "step": 12720 }, { "epoch": 0.08132834161736709, "grad_norm": 0.7714025378227234, "learning_rate": 9.959293483313368e-05, "loss": 0.826, "step": 12730 }, { "epoch": 0.08139222876710578, "grad_norm": 1.3492543697357178, "learning_rate": 9.959229561465266e-05, "loss": 1.0079, "step": 12740 }, { "epoch": 0.08145611591684448, "grad_norm": 0.7474777698516846, "learning_rate": 9.959165589673432e-05, "loss": 0.8973, "step": 12750 }, { "epoch": 0.08152000306658319, "grad_norm": 0.6047500371932983, "learning_rate": 9.959101567938509e-05, "loss": 0.8909, "step": 12760 }, { "epoch": 0.08158389021632188, "grad_norm": 0.7488225698471069, "learning_rate": 9.959037496261146e-05, "loss": 0.9554, "step": 12770 }, { "epoch": 0.08164777736606059, "grad_norm": 1.0440471172332764, "learning_rate": 9.958973374641982e-05, "loss": 0.7622, "step": 12780 }, { "epoch": 0.0817116645157993, "grad_norm": 0.6892119646072388, "learning_rate": 9.958909203081668e-05, "loss": 0.9316, "step": 12790 }, { "epoch": 0.08177555166553799, "grad_norm": 0.7813330292701721, "learning_rate": 9.958844981580847e-05, "loss": 1.0202, "step": 12800 }, { "epoch": 0.0818394388152767, "grad_norm": 0.926389217376709, "learning_rate": 9.958780710140167e-05, "loss": 1.0061, "step": 12810 }, { "epoch": 0.0819033259650154, "grad_norm": 0.7981832027435303, "learning_rate": 9.958716388760277e-05, "loss": 0.9619, "step": 12820 }, { "epoch": 0.08196721311475409, "grad_norm": 0.7643110752105713, "learning_rate": 9.958652017441822e-05, "loss": 1.0358, "step": 12830 }, { "epoch": 0.0820311002644928, "grad_norm": 2.3932769298553467, "learning_rate": 9.958587596185451e-05, "loss": 0.8638, "step": 12840 }, { "epoch": 0.0820949874142315, "grad_norm": 0.6485501527786255, "learning_rate": 9.958523124991814e-05, "loss": 0.8252, "step": 12850 }, { "epoch": 0.0821588745639702, "grad_norm": 1.1081517934799194, "learning_rate": 9.958458603861559e-05, "loss": 0.6834, "step": 12860 }, { "epoch": 0.0822227617137089, "grad_norm": 0.6985851526260376, "learning_rate": 9.958394032795335e-05, "loss": 0.8498, "step": 12870 }, { "epoch": 0.08228664886344761, "grad_norm": 0.9049435257911682, "learning_rate": 9.958329411793796e-05, "loss": 0.832, "step": 12880 }, { "epoch": 0.0823505360131863, "grad_norm": 1.0366233587265015, "learning_rate": 9.958264740857588e-05, "loss": 0.6583, "step": 12890 }, { "epoch": 0.08241442316292501, "grad_norm": 0.5812174081802368, "learning_rate": 9.958200019987364e-05, "loss": 0.8656, "step": 12900 }, { "epoch": 0.08247831031266371, "grad_norm": 0.5848665237426758, "learning_rate": 9.95813524918378e-05, "loss": 0.9233, "step": 12910 }, { "epoch": 0.0825421974624024, "grad_norm": 0.8434141278266907, "learning_rate": 9.958070428447481e-05, "loss": 0.8677, "step": 12920 }, { "epoch": 0.08260608461214111, "grad_norm": 0.6627490520477295, "learning_rate": 9.958005557779125e-05, "loss": 1.0076, "step": 12930 }, { "epoch": 0.08266997176187982, "grad_norm": 0.5368894934654236, "learning_rate": 9.957940637179364e-05, "loss": 1.0226, "step": 12940 }, { "epoch": 0.08273385891161851, "grad_norm": 0.9018540978431702, "learning_rate": 9.95787566664885e-05, "loss": 0.9392, "step": 12950 }, { "epoch": 0.08279774606135722, "grad_norm": 0.9104921817779541, "learning_rate": 9.957810646188242e-05, "loss": 0.8816, "step": 12960 }, { "epoch": 0.08286163321109592, "grad_norm": 1.005777359008789, "learning_rate": 9.957745575798189e-05, "loss": 0.9567, "step": 12970 }, { "epoch": 0.08292552036083463, "grad_norm": 0.9677864909172058, "learning_rate": 9.957680455479348e-05, "loss": 0.865, "step": 12980 }, { "epoch": 0.08298940751057332, "grad_norm": 0.5736163854598999, "learning_rate": 9.957615285232379e-05, "loss": 0.9897, "step": 12990 }, { "epoch": 0.08305329466031203, "grad_norm": 1.2024660110473633, "learning_rate": 9.957550065057932e-05, "loss": 0.8672, "step": 13000 }, { "epoch": 0.08311718181005073, "grad_norm": 0.7755523920059204, "learning_rate": 9.95748479495667e-05, "loss": 0.833, "step": 13010 }, { "epoch": 0.08318106895978943, "grad_norm": 2.249293088912964, "learning_rate": 9.957419474929246e-05, "loss": 0.9011, "step": 13020 }, { "epoch": 0.08324495610952813, "grad_norm": 1.1623985767364502, "learning_rate": 9.957354104976317e-05, "loss": 0.9665, "step": 13030 }, { "epoch": 0.08330884325926684, "grad_norm": 0.9698325395584106, "learning_rate": 9.957288685098547e-05, "loss": 0.581, "step": 13040 }, { "epoch": 0.08337273040900553, "grad_norm": 1.5064680576324463, "learning_rate": 9.957223215296589e-05, "loss": 0.993, "step": 13050 }, { "epoch": 0.08343661755874424, "grad_norm": 0.9795089960098267, "learning_rate": 9.957157695571106e-05, "loss": 0.8646, "step": 13060 }, { "epoch": 0.08350050470848294, "grad_norm": 1.1535509824752808, "learning_rate": 9.957092125922755e-05, "loss": 0.9196, "step": 13070 }, { "epoch": 0.08356439185822163, "grad_norm": 0.5842729210853577, "learning_rate": 9.957026506352198e-05, "loss": 1.1444, "step": 13080 }, { "epoch": 0.08362827900796034, "grad_norm": 1.164316177368164, "learning_rate": 9.956960836860096e-05, "loss": 0.8979, "step": 13090 }, { "epoch": 0.08369216615769905, "grad_norm": 1.0627108812332153, "learning_rate": 9.956895117447112e-05, "loss": 1.1704, "step": 13100 }, { "epoch": 0.08375605330743774, "grad_norm": 0.5449188947677612, "learning_rate": 9.956829348113903e-05, "loss": 0.9608, "step": 13110 }, { "epoch": 0.08381994045717645, "grad_norm": 0.8680428862571716, "learning_rate": 9.956763528861135e-05, "loss": 0.9228, "step": 13120 }, { "epoch": 0.08388382760691515, "grad_norm": 0.9110902547836304, "learning_rate": 9.95669765968947e-05, "loss": 1.4213, "step": 13130 }, { "epoch": 0.08394771475665384, "grad_norm": 2.3549108505249023, "learning_rate": 9.956631740599571e-05, "loss": 0.9036, "step": 13140 }, { "epoch": 0.08401160190639255, "grad_norm": 0.9437476992607117, "learning_rate": 9.956565771592103e-05, "loss": 0.9577, "step": 13150 }, { "epoch": 0.08407548905613126, "grad_norm": 0.5156351923942566, "learning_rate": 9.956499752667729e-05, "loss": 1.0223, "step": 13160 }, { "epoch": 0.08413937620586995, "grad_norm": 0.6962876915931702, "learning_rate": 9.956433683827115e-05, "loss": 0.7827, "step": 13170 }, { "epoch": 0.08420326335560865, "grad_norm": 1.191227912902832, "learning_rate": 9.956367565070927e-05, "loss": 0.7738, "step": 13180 }, { "epoch": 0.08426715050534736, "grad_norm": 0.9918831586837769, "learning_rate": 9.956301396399829e-05, "loss": 1.2268, "step": 13190 }, { "epoch": 0.08433103765508605, "grad_norm": 1.3545849323272705, "learning_rate": 9.956235177814488e-05, "loss": 0.9728, "step": 13200 }, { "epoch": 0.08439492480482476, "grad_norm": 0.8052165508270264, "learning_rate": 9.956168909315571e-05, "loss": 0.8022, "step": 13210 }, { "epoch": 0.08445881195456346, "grad_norm": 1.1841431856155396, "learning_rate": 9.956102590903744e-05, "loss": 0.8663, "step": 13220 }, { "epoch": 0.08452269910430216, "grad_norm": 1.1858928203582764, "learning_rate": 9.956036222579679e-05, "loss": 0.8862, "step": 13230 }, { "epoch": 0.08458658625404086, "grad_norm": 0.6900216937065125, "learning_rate": 9.955969804344039e-05, "loss": 0.7973, "step": 13240 }, { "epoch": 0.08465047340377957, "grad_norm": 0.737177848815918, "learning_rate": 9.955903336197497e-05, "loss": 0.6908, "step": 13250 }, { "epoch": 0.08471436055351826, "grad_norm": 1.1123918294906616, "learning_rate": 9.955836818140721e-05, "loss": 0.8086, "step": 13260 }, { "epoch": 0.08477824770325697, "grad_norm": 0.9774020910263062, "learning_rate": 9.95577025017438e-05, "loss": 0.8224, "step": 13270 }, { "epoch": 0.08484213485299567, "grad_norm": 1.0861930847167969, "learning_rate": 9.955703632299144e-05, "loss": 1.216, "step": 13280 }, { "epoch": 0.08490602200273437, "grad_norm": 0.6377803683280945, "learning_rate": 9.955636964515688e-05, "loss": 1.0431, "step": 13290 }, { "epoch": 0.08496990915247307, "grad_norm": 0.799303412437439, "learning_rate": 9.95557024682468e-05, "loss": 0.8008, "step": 13300 }, { "epoch": 0.08503379630221178, "grad_norm": 0.6764736175537109, "learning_rate": 9.955503479226791e-05, "loss": 0.856, "step": 13310 }, { "epoch": 0.08509768345195047, "grad_norm": 0.7718757390975952, "learning_rate": 9.955436661722696e-05, "loss": 1.1674, "step": 13320 }, { "epoch": 0.08516157060168918, "grad_norm": 0.8467085957527161, "learning_rate": 9.955369794313066e-05, "loss": 0.7126, "step": 13330 }, { "epoch": 0.08522545775142788, "grad_norm": 0.7613494992256165, "learning_rate": 9.955302876998576e-05, "loss": 0.8779, "step": 13340 }, { "epoch": 0.08528934490116657, "grad_norm": 1.5320026874542236, "learning_rate": 9.955235909779898e-05, "loss": 0.92, "step": 13350 }, { "epoch": 0.08535323205090528, "grad_norm": 0.9841747879981995, "learning_rate": 9.955168892657709e-05, "loss": 1.195, "step": 13360 }, { "epoch": 0.08541711920064399, "grad_norm": 0.9456724524497986, "learning_rate": 9.955101825632681e-05, "loss": 0.8966, "step": 13370 }, { "epoch": 0.08548100635038268, "grad_norm": 0.6288855671882629, "learning_rate": 9.95503470870549e-05, "loss": 1.018, "step": 13380 }, { "epoch": 0.08554489350012139, "grad_norm": 0.6074085831642151, "learning_rate": 9.954967541876816e-05, "loss": 1.1021, "step": 13390 }, { "epoch": 0.08560878064986009, "grad_norm": 0.6871976852416992, "learning_rate": 9.954900325147329e-05, "loss": 0.7936, "step": 13400 }, { "epoch": 0.08567266779959878, "grad_norm": 1.1917479038238525, "learning_rate": 9.954833058517712e-05, "loss": 1.0316, "step": 13410 }, { "epoch": 0.08573655494933749, "grad_norm": 0.8669334650039673, "learning_rate": 9.954765741988638e-05, "loss": 0.7559, "step": 13420 }, { "epoch": 0.0858004420990762, "grad_norm": 1.0920523405075073, "learning_rate": 9.954698375560786e-05, "loss": 1.0566, "step": 13430 }, { "epoch": 0.08586432924881489, "grad_norm": 0.6692205667495728, "learning_rate": 9.954630959234835e-05, "loss": 1.1381, "step": 13440 }, { "epoch": 0.0859282163985536, "grad_norm": 0.435250461101532, "learning_rate": 9.954563493011464e-05, "loss": 0.656, "step": 13450 }, { "epoch": 0.0859921035482923, "grad_norm": 0.719704806804657, "learning_rate": 9.954495976891354e-05, "loss": 0.9106, "step": 13460 }, { "epoch": 0.08605599069803099, "grad_norm": 1.0210596323013306, "learning_rate": 9.95442841087518e-05, "loss": 1.0513, "step": 13470 }, { "epoch": 0.0861198778477697, "grad_norm": 0.8312535881996155, "learning_rate": 9.954360794963629e-05, "loss": 0.9642, "step": 13480 }, { "epoch": 0.0861837649975084, "grad_norm": 0.7173671126365662, "learning_rate": 9.954299897983244e-05, "loss": 0.9963, "step": 13490 }, { "epoch": 0.0862476521472471, "grad_norm": 0.8660849928855896, "learning_rate": 9.954232187272345e-05, "loss": 0.7152, "step": 13500 }, { "epoch": 0.0863115392969858, "grad_norm": 0.757793664932251, "learning_rate": 9.954164426668044e-05, "loss": 1.0053, "step": 13510 }, { "epoch": 0.08637542644672451, "grad_norm": 0.6356269717216492, "learning_rate": 9.954096616171018e-05, "loss": 1.0546, "step": 13520 }, { "epoch": 0.0864393135964632, "grad_norm": 0.6191072463989258, "learning_rate": 9.954028755781956e-05, "loss": 0.8486, "step": 13530 }, { "epoch": 0.08650320074620191, "grad_norm": 1.0523960590362549, "learning_rate": 9.953960845501537e-05, "loss": 0.8107, "step": 13540 }, { "epoch": 0.08656708789594061, "grad_norm": 0.7796614170074463, "learning_rate": 9.953892885330447e-05, "loss": 0.8723, "step": 13550 }, { "epoch": 0.0866309750456793, "grad_norm": 0.7295846939086914, "learning_rate": 9.953824875269369e-05, "loss": 0.913, "step": 13560 }, { "epoch": 0.08669486219541801, "grad_norm": 1.0830540657043457, "learning_rate": 9.95375681531899e-05, "loss": 0.8378, "step": 13570 }, { "epoch": 0.08675874934515672, "grad_norm": 1.3589000701904297, "learning_rate": 9.953688705479994e-05, "loss": 0.9502, "step": 13580 }, { "epoch": 0.08682263649489541, "grad_norm": 0.796097993850708, "learning_rate": 9.953620545753067e-05, "loss": 0.6924, "step": 13590 }, { "epoch": 0.08688652364463412, "grad_norm": 0.933182954788208, "learning_rate": 9.953552336138896e-05, "loss": 1.0789, "step": 13600 }, { "epoch": 0.08695041079437282, "grad_norm": 0.830242395401001, "learning_rate": 9.953484076638166e-05, "loss": 0.7949, "step": 13610 }, { "epoch": 0.08701429794411152, "grad_norm": 0.751649022102356, "learning_rate": 9.953415767251568e-05, "loss": 0.8326, "step": 13620 }, { "epoch": 0.08707818509385022, "grad_norm": 3.0472450256347656, "learning_rate": 9.953347407979788e-05, "loss": 0.7271, "step": 13630 }, { "epoch": 0.08714207224358893, "grad_norm": 0.4621819853782654, "learning_rate": 9.953278998823513e-05, "loss": 1.0762, "step": 13640 }, { "epoch": 0.08720595939332762, "grad_norm": 0.8232291340827942, "learning_rate": 9.953210539783434e-05, "loss": 0.7763, "step": 13650 }, { "epoch": 0.08726984654306633, "grad_norm": 1.4312729835510254, "learning_rate": 9.953142030860238e-05, "loss": 0.8253, "step": 13660 }, { "epoch": 0.08733373369280503, "grad_norm": 0.9248529672622681, "learning_rate": 9.95307347205462e-05, "loss": 1.0337, "step": 13670 }, { "epoch": 0.08739762084254372, "grad_norm": 0.6953186392784119, "learning_rate": 9.953004863367264e-05, "loss": 0.93, "step": 13680 }, { "epoch": 0.08746150799228243, "grad_norm": 0.8455583453178406, "learning_rate": 9.952936204798866e-05, "loss": 0.8386, "step": 13690 }, { "epoch": 0.08752539514202114, "grad_norm": 1.119112253189087, "learning_rate": 9.952867496350115e-05, "loss": 0.8611, "step": 13700 }, { "epoch": 0.08758928229175983, "grad_norm": 0.8125833868980408, "learning_rate": 9.952798738021703e-05, "loss": 0.9875, "step": 13710 }, { "epoch": 0.08765316944149854, "grad_norm": 0.6726895570755005, "learning_rate": 9.952729929814323e-05, "loss": 1.0715, "step": 13720 }, { "epoch": 0.08771705659123724, "grad_norm": 0.6909586787223816, "learning_rate": 9.952661071728669e-05, "loss": 0.9544, "step": 13730 }, { "epoch": 0.08778094374097593, "grad_norm": 0.983298122882843, "learning_rate": 9.952592163765432e-05, "loss": 1.0024, "step": 13740 }, { "epoch": 0.08784483089071464, "grad_norm": 1.025319218635559, "learning_rate": 9.952523205925309e-05, "loss": 0.7382, "step": 13750 }, { "epoch": 0.08790871804045335, "grad_norm": 2.100965976715088, "learning_rate": 9.952454198208991e-05, "loss": 0.8063, "step": 13760 }, { "epoch": 0.08797260519019204, "grad_norm": 0.9277796149253845, "learning_rate": 9.952385140617174e-05, "loss": 0.963, "step": 13770 }, { "epoch": 0.08803649233993074, "grad_norm": 1.1502079963684082, "learning_rate": 9.952316033150556e-05, "loss": 0.857, "step": 13780 }, { "epoch": 0.08810037948966945, "grad_norm": 0.5683081746101379, "learning_rate": 9.952246875809831e-05, "loss": 0.8632, "step": 13790 }, { "epoch": 0.08816426663940814, "grad_norm": 0.7985507249832153, "learning_rate": 9.952177668595695e-05, "loss": 0.693, "step": 13800 }, { "epoch": 0.08822815378914685, "grad_norm": 0.9982643723487854, "learning_rate": 9.952108411508845e-05, "loss": 0.7695, "step": 13810 }, { "epoch": 0.08829204093888555, "grad_norm": 0.8026944994926453, "learning_rate": 9.952039104549981e-05, "loss": 0.9093, "step": 13820 }, { "epoch": 0.08835592808862426, "grad_norm": 0.9833221435546875, "learning_rate": 9.951969747719798e-05, "loss": 1.1041, "step": 13830 }, { "epoch": 0.08841981523836295, "grad_norm": 0.7445331811904907, "learning_rate": 9.951900341018996e-05, "loss": 1.1706, "step": 13840 }, { "epoch": 0.08848370238810166, "grad_norm": 0.7326770424842834, "learning_rate": 9.951830884448274e-05, "loss": 1.1022, "step": 13850 }, { "epoch": 0.08854758953784037, "grad_norm": 1.3713650703430176, "learning_rate": 9.95176137800833e-05, "loss": 0.812, "step": 13860 }, { "epoch": 0.08861147668757906, "grad_norm": 0.8719102740287781, "learning_rate": 9.951691821699864e-05, "loss": 1.037, "step": 13870 }, { "epoch": 0.08867536383731776, "grad_norm": 0.7241623997688293, "learning_rate": 9.951622215523579e-05, "loss": 0.9797, "step": 13880 }, { "epoch": 0.08873925098705647, "grad_norm": 0.9998733401298523, "learning_rate": 9.951552559480176e-05, "loss": 1.0036, "step": 13890 }, { "epoch": 0.08880313813679516, "grad_norm": 1.31692373752594, "learning_rate": 9.951482853570353e-05, "loss": 1.0621, "step": 13900 }, { "epoch": 0.08886702528653387, "grad_norm": 0.509678840637207, "learning_rate": 9.951413097794816e-05, "loss": 0.7828, "step": 13910 }, { "epoch": 0.08893091243627257, "grad_norm": 0.6443775296211243, "learning_rate": 9.951343292154263e-05, "loss": 0.8265, "step": 13920 }, { "epoch": 0.08899479958601127, "grad_norm": 1.014041781425476, "learning_rate": 9.9512734366494e-05, "loss": 1.0371, "step": 13930 }, { "epoch": 0.08905868673574997, "grad_norm": 0.8309150338172913, "learning_rate": 9.951203531280931e-05, "loss": 0.9042, "step": 13940 }, { "epoch": 0.08912257388548868, "grad_norm": 0.6780155897140503, "learning_rate": 9.951133576049558e-05, "loss": 0.8917, "step": 13950 }, { "epoch": 0.08918646103522737, "grad_norm": 0.7868662476539612, "learning_rate": 9.951063570955988e-05, "loss": 0.9667, "step": 13960 }, { "epoch": 0.08925034818496608, "grad_norm": 0.6636529564857483, "learning_rate": 9.950993516000924e-05, "loss": 0.8601, "step": 13970 }, { "epoch": 0.08931423533470478, "grad_norm": 0.8302227854728699, "learning_rate": 9.950923411185071e-05, "loss": 0.9081, "step": 13980 }, { "epoch": 0.08937812248444348, "grad_norm": 0.9507797360420227, "learning_rate": 9.950853256509138e-05, "loss": 0.7923, "step": 13990 }, { "epoch": 0.08944200963418218, "grad_norm": 0.5564282536506653, "learning_rate": 9.950783051973828e-05, "loss": 0.9981, "step": 14000 }, { "epoch": 0.08950589678392089, "grad_norm": 1.1084082126617432, "learning_rate": 9.950712797579849e-05, "loss": 0.7917, "step": 14010 }, { "epoch": 0.08956978393365958, "grad_norm": 1.2243750095367432, "learning_rate": 9.950642493327911e-05, "loss": 1.0782, "step": 14020 }, { "epoch": 0.08963367108339829, "grad_norm": 1.1874489784240723, "learning_rate": 9.950572139218719e-05, "loss": 0.9879, "step": 14030 }, { "epoch": 0.08969755823313699, "grad_norm": 0.6582461595535278, "learning_rate": 9.950501735252984e-05, "loss": 0.8992, "step": 14040 }, { "epoch": 0.08976144538287568, "grad_norm": 0.945318341255188, "learning_rate": 9.950431281431413e-05, "loss": 0.9753, "step": 14050 }, { "epoch": 0.08982533253261439, "grad_norm": 1.02214777469635, "learning_rate": 9.950360777754716e-05, "loss": 0.8625, "step": 14060 }, { "epoch": 0.0898892196823531, "grad_norm": 0.6554903388023376, "learning_rate": 9.950290224223604e-05, "loss": 0.7558, "step": 14070 }, { "epoch": 0.08995310683209179, "grad_norm": 0.9139891266822815, "learning_rate": 9.950219620838786e-05, "loss": 0.9843, "step": 14080 }, { "epoch": 0.0900169939818305, "grad_norm": 0.6926449537277222, "learning_rate": 9.950148967600974e-05, "loss": 0.6626, "step": 14090 }, { "epoch": 0.0900808811315692, "grad_norm": 1.608420968055725, "learning_rate": 9.95007826451088e-05, "loss": 1.0037, "step": 14100 }, { "epoch": 0.0901447682813079, "grad_norm": 0.9414392113685608, "learning_rate": 9.950007511569214e-05, "loss": 0.9188, "step": 14110 }, { "epoch": 0.0902086554310466, "grad_norm": 0.8587938547134399, "learning_rate": 9.949936708776691e-05, "loss": 0.9312, "step": 14120 }, { "epoch": 0.0902725425807853, "grad_norm": 1.4284396171569824, "learning_rate": 9.949865856134024e-05, "loss": 1.1385, "step": 14130 }, { "epoch": 0.090336429730524, "grad_norm": 0.7485639452934265, "learning_rate": 9.949794953641925e-05, "loss": 0.9224, "step": 14140 }, { "epoch": 0.0904003168802627, "grad_norm": 0.7703597545623779, "learning_rate": 9.949724001301108e-05, "loss": 0.8031, "step": 14150 }, { "epoch": 0.09046420403000141, "grad_norm": 0.6931461095809937, "learning_rate": 9.949652999112289e-05, "loss": 0.8585, "step": 14160 }, { "epoch": 0.0905280911797401, "grad_norm": 0.9867964386940002, "learning_rate": 9.94958194707618e-05, "loss": 0.9662, "step": 14170 }, { "epoch": 0.09059197832947881, "grad_norm": 0.7029063105583191, "learning_rate": 9.949510845193501e-05, "loss": 0.9446, "step": 14180 }, { "epoch": 0.09065586547921752, "grad_norm": 0.6712666153907776, "learning_rate": 9.949439693464965e-05, "loss": 0.7581, "step": 14190 }, { "epoch": 0.09071975262895621, "grad_norm": 0.8002526760101318, "learning_rate": 9.94936849189129e-05, "loss": 1.1783, "step": 14200 }, { "epoch": 0.09078363977869491, "grad_norm": 1.9806957244873047, "learning_rate": 9.949297240473192e-05, "loss": 0.8167, "step": 14210 }, { "epoch": 0.09084752692843362, "grad_norm": 0.6431198716163635, "learning_rate": 9.949225939211391e-05, "loss": 1.1454, "step": 14220 }, { "epoch": 0.09091141407817231, "grad_norm": 1.083142638206482, "learning_rate": 9.9491545881066e-05, "loss": 1.2549, "step": 14230 }, { "epoch": 0.09097530122791102, "grad_norm": 0.520418643951416, "learning_rate": 9.949083187159542e-05, "loss": 0.7501, "step": 14240 }, { "epoch": 0.09103918837764972, "grad_norm": 1.0432296991348267, "learning_rate": 9.949011736370935e-05, "loss": 0.8595, "step": 14250 }, { "epoch": 0.09110307552738842, "grad_norm": 0.8031591773033142, "learning_rate": 9.948940235741499e-05, "loss": 0.7955, "step": 14260 }, { "epoch": 0.09116696267712712, "grad_norm": 0.7311345934867859, "learning_rate": 9.948868685271952e-05, "loss": 0.9517, "step": 14270 }, { "epoch": 0.09123084982686583, "grad_norm": 1.3706258535385132, "learning_rate": 9.948797084963016e-05, "loss": 0.9347, "step": 14280 }, { "epoch": 0.09129473697660452, "grad_norm": 0.5846802592277527, "learning_rate": 9.948725434815413e-05, "loss": 0.7575, "step": 14290 }, { "epoch": 0.09135862412634323, "grad_norm": 0.7384892702102661, "learning_rate": 9.948653734829863e-05, "loss": 0.9603, "step": 14300 }, { "epoch": 0.09142251127608193, "grad_norm": 2.91487717628479, "learning_rate": 9.948581985007089e-05, "loss": 1.0739, "step": 14310 }, { "epoch": 0.09148639842582063, "grad_norm": 0.6311538815498352, "learning_rate": 9.948510185347813e-05, "loss": 1.0676, "step": 14320 }, { "epoch": 0.09155028557555933, "grad_norm": 0.6362346410751343, "learning_rate": 9.948438335852759e-05, "loss": 1.1728, "step": 14330 }, { "epoch": 0.09161417272529804, "grad_norm": 0.6874721646308899, "learning_rate": 9.94836643652265e-05, "loss": 1.2022, "step": 14340 }, { "epoch": 0.09167805987503673, "grad_norm": 0.721106231212616, "learning_rate": 9.948294487358208e-05, "loss": 1.097, "step": 14350 }, { "epoch": 0.09174194702477544, "grad_norm": 1.0813249349594116, "learning_rate": 9.948222488360162e-05, "loss": 1.2724, "step": 14360 }, { "epoch": 0.09180583417451414, "grad_norm": 0.8952019810676575, "learning_rate": 9.948150439529233e-05, "loss": 0.8907, "step": 14370 }, { "epoch": 0.09186972132425283, "grad_norm": 0.8344172835350037, "learning_rate": 9.94807834086615e-05, "loss": 0.8321, "step": 14380 }, { "epoch": 0.09193360847399154, "grad_norm": 0.9786416888237, "learning_rate": 9.948006192371635e-05, "loss": 0.7653, "step": 14390 }, { "epoch": 0.09199749562373025, "grad_norm": 1.2197997570037842, "learning_rate": 9.947933994046419e-05, "loss": 0.9922, "step": 14400 }, { "epoch": 0.09206138277346894, "grad_norm": 0.915473222732544, "learning_rate": 9.947861745891227e-05, "loss": 0.921, "step": 14410 }, { "epoch": 0.09212526992320764, "grad_norm": 0.9322916865348816, "learning_rate": 9.947789447906785e-05, "loss": 1.0827, "step": 14420 }, { "epoch": 0.09218915707294635, "grad_norm": 1.073462963104248, "learning_rate": 9.947717100093825e-05, "loss": 0.9149, "step": 14430 }, { "epoch": 0.09225304422268504, "grad_norm": 1.7424027919769287, "learning_rate": 9.947644702453072e-05, "loss": 0.9262, "step": 14440 }, { "epoch": 0.09231693137242375, "grad_norm": 0.5602442026138306, "learning_rate": 9.947572254985258e-05, "loss": 0.8065, "step": 14450 }, { "epoch": 0.09238081852216246, "grad_norm": 0.7667688727378845, "learning_rate": 9.94749975769111e-05, "loss": 0.9672, "step": 14460 }, { "epoch": 0.09244470567190115, "grad_norm": 0.8217202425003052, "learning_rate": 9.947427210571359e-05, "loss": 0.8194, "step": 14470 }, { "epoch": 0.09250859282163985, "grad_norm": 0.7690846920013428, "learning_rate": 9.947354613626737e-05, "loss": 0.6602, "step": 14480 }, { "epoch": 0.09257247997137856, "grad_norm": 0.7123977541923523, "learning_rate": 9.947281966857973e-05, "loss": 0.9875, "step": 14490 }, { "epoch": 0.09263636712111725, "grad_norm": 1.3590373992919922, "learning_rate": 9.947209270265801e-05, "loss": 1.0355, "step": 14500 }, { "epoch": 0.09270025427085596, "grad_norm": 2.0925168991088867, "learning_rate": 9.947136523850949e-05, "loss": 0.9441, "step": 14510 }, { "epoch": 0.09276414142059466, "grad_norm": 0.7630490064620972, "learning_rate": 9.947063727614155e-05, "loss": 0.7035, "step": 14520 }, { "epoch": 0.09282802857033336, "grad_norm": 0.5995486378669739, "learning_rate": 9.946990881556148e-05, "loss": 0.8794, "step": 14530 }, { "epoch": 0.09289191572007206, "grad_norm": 0.5936999917030334, "learning_rate": 9.946917985677664e-05, "loss": 0.8, "step": 14540 }, { "epoch": 0.09295580286981077, "grad_norm": 2.0189425945281982, "learning_rate": 9.946845039979436e-05, "loss": 0.9379, "step": 14550 }, { "epoch": 0.09301969001954946, "grad_norm": 0.9083710312843323, "learning_rate": 9.946772044462197e-05, "loss": 1.1928, "step": 14560 }, { "epoch": 0.09308357716928817, "grad_norm": 0.7872990965843201, "learning_rate": 9.946698999126686e-05, "loss": 0.9303, "step": 14570 }, { "epoch": 0.09314746431902687, "grad_norm": 0.9097589254379272, "learning_rate": 9.946625903973636e-05, "loss": 0.8706, "step": 14580 }, { "epoch": 0.09321135146876557, "grad_norm": 1.2268530130386353, "learning_rate": 9.946552759003783e-05, "loss": 0.7452, "step": 14590 }, { "epoch": 0.09327523861850427, "grad_norm": 0.7525649070739746, "learning_rate": 9.946479564217866e-05, "loss": 1.1206, "step": 14600 }, { "epoch": 0.09333912576824298, "grad_norm": 0.9777686595916748, "learning_rate": 9.946406319616619e-05, "loss": 0.9522, "step": 14610 }, { "epoch": 0.09340301291798167, "grad_norm": 0.7327966690063477, "learning_rate": 9.946333025200781e-05, "loss": 0.7119, "step": 14620 }, { "epoch": 0.09346690006772038, "grad_norm": 0.8345320820808411, "learning_rate": 9.946259680971091e-05, "loss": 0.9164, "step": 14630 }, { "epoch": 0.09353078721745908, "grad_norm": 1.128624439239502, "learning_rate": 9.946186286928288e-05, "loss": 0.8583, "step": 14640 }, { "epoch": 0.09359467436719779, "grad_norm": 0.753193199634552, "learning_rate": 9.946112843073107e-05, "loss": 1.0453, "step": 14650 }, { "epoch": 0.09365856151693648, "grad_norm": 0.9466274380683899, "learning_rate": 9.946039349406294e-05, "loss": 1.1494, "step": 14660 }, { "epoch": 0.09372244866667519, "grad_norm": 0.8753125667572021, "learning_rate": 9.945965805928583e-05, "loss": 0.7926, "step": 14670 }, { "epoch": 0.0937863358164139, "grad_norm": 0.7783929109573364, "learning_rate": 9.94589221264072e-05, "loss": 1.0401, "step": 14680 }, { "epoch": 0.09385022296615259, "grad_norm": 1.0802748203277588, "learning_rate": 9.945818569543441e-05, "loss": 0.7928, "step": 14690 }, { "epoch": 0.09391411011589129, "grad_norm": 0.8250336647033691, "learning_rate": 9.945744876637491e-05, "loss": 0.9204, "step": 14700 }, { "epoch": 0.09397799726563, "grad_norm": 0.6883922815322876, "learning_rate": 9.945671133923614e-05, "loss": 0.8513, "step": 14710 }, { "epoch": 0.09404188441536869, "grad_norm": 0.4683299958705902, "learning_rate": 9.945597341402547e-05, "loss": 0.6514, "step": 14720 }, { "epoch": 0.0941057715651074, "grad_norm": 0.6585717797279358, "learning_rate": 9.945523499075037e-05, "loss": 0.9824, "step": 14730 }, { "epoch": 0.0941696587148461, "grad_norm": 0.5519923567771912, "learning_rate": 9.945449606941826e-05, "loss": 1.007, "step": 14740 }, { "epoch": 0.0942335458645848, "grad_norm": 0.6457942128181458, "learning_rate": 9.945375665003661e-05, "loss": 0.6664, "step": 14750 }, { "epoch": 0.0942974330143235, "grad_norm": 0.906104326248169, "learning_rate": 9.945301673261285e-05, "loss": 0.8221, "step": 14760 }, { "epoch": 0.0943613201640622, "grad_norm": 0.8347557187080383, "learning_rate": 9.945227631715442e-05, "loss": 0.8833, "step": 14770 }, { "epoch": 0.0944252073138009, "grad_norm": 0.6181365847587585, "learning_rate": 9.945153540366877e-05, "loss": 1.0287, "step": 14780 }, { "epoch": 0.0944890944635396, "grad_norm": 0.6475619077682495, "learning_rate": 9.945079399216339e-05, "loss": 0.8144, "step": 14790 }, { "epoch": 0.09455298161327831, "grad_norm": 0.6462060809135437, "learning_rate": 9.945005208264572e-05, "loss": 0.8489, "step": 14800 }, { "epoch": 0.094616868763017, "grad_norm": 0.6881303787231445, "learning_rate": 9.944938393828552e-05, "loss": 0.8207, "step": 14810 }, { "epoch": 0.09468075591275571, "grad_norm": 0.6425718069076538, "learning_rate": 9.944864108256513e-05, "loss": 0.91, "step": 14820 }, { "epoch": 0.09474464306249442, "grad_norm": 1.9519624710083008, "learning_rate": 9.944789772885414e-05, "loss": 0.8698, "step": 14830 }, { "epoch": 0.09480853021223311, "grad_norm": 0.887140154838562, "learning_rate": 9.944715387716004e-05, "loss": 0.909, "step": 14840 }, { "epoch": 0.09487241736197181, "grad_norm": 0.7273536920547485, "learning_rate": 9.944640952749033e-05, "loss": 1.1605, "step": 14850 }, { "epoch": 0.09493630451171052, "grad_norm": 0.928715169429779, "learning_rate": 9.944566467985249e-05, "loss": 1.0493, "step": 14860 }, { "epoch": 0.09500019166144921, "grad_norm": 0.5552724003791809, "learning_rate": 9.944491933425403e-05, "loss": 1.1027, "step": 14870 }, { "epoch": 0.09506407881118792, "grad_norm": 0.8260436058044434, "learning_rate": 9.944417349070247e-05, "loss": 0.7093, "step": 14880 }, { "epoch": 0.09512796596092662, "grad_norm": 2.4791147708892822, "learning_rate": 9.944342714920529e-05, "loss": 1.1502, "step": 14890 }, { "epoch": 0.09519185311066532, "grad_norm": 0.8212199211120605, "learning_rate": 9.944268030977003e-05, "loss": 1.0912, "step": 14900 }, { "epoch": 0.09525574026040402, "grad_norm": 0.8238768577575684, "learning_rate": 9.94419329724042e-05, "loss": 0.8248, "step": 14910 }, { "epoch": 0.09531962741014273, "grad_norm": 1.0283452272415161, "learning_rate": 9.944118513711535e-05, "loss": 1.0666, "step": 14920 }, { "epoch": 0.09538351455988142, "grad_norm": 0.7515852451324463, "learning_rate": 9.944043680391098e-05, "loss": 0.798, "step": 14930 }, { "epoch": 0.09544740170962013, "grad_norm": 0.8797821998596191, "learning_rate": 9.943968797279864e-05, "loss": 0.8629, "step": 14940 }, { "epoch": 0.09551128885935883, "grad_norm": 0.8942396640777588, "learning_rate": 9.943893864378587e-05, "loss": 0.8589, "step": 14950 }, { "epoch": 0.09557517600909753, "grad_norm": 0.7868557572364807, "learning_rate": 9.943818881688023e-05, "loss": 0.7879, "step": 14960 }, { "epoch": 0.09563906315883623, "grad_norm": 0.766189694404602, "learning_rate": 9.943743849208924e-05, "loss": 1.0051, "step": 14970 }, { "epoch": 0.09570295030857494, "grad_norm": 0.7284533381462097, "learning_rate": 9.943668766942049e-05, "loss": 0.6991, "step": 14980 }, { "epoch": 0.09576683745831363, "grad_norm": 1.0945543050765991, "learning_rate": 9.943593634888151e-05, "loss": 0.8595, "step": 14990 }, { "epoch": 0.09583072460805234, "grad_norm": 1.704253077507019, "learning_rate": 9.943518453047988e-05, "loss": 1.0841, "step": 15000 }, { "epoch": 0.09589461175779104, "grad_norm": 0.537315309047699, "learning_rate": 9.943443221422319e-05, "loss": 1.0965, "step": 15010 }, { "epoch": 0.09595849890752974, "grad_norm": 1.1799222230911255, "learning_rate": 9.9433679400119e-05, "loss": 0.9025, "step": 15020 }, { "epoch": 0.09602238605726844, "grad_norm": 3.8464369773864746, "learning_rate": 9.943292608817489e-05, "loss": 0.8995, "step": 15030 }, { "epoch": 0.09608627320700715, "grad_norm": 1.1854133605957031, "learning_rate": 9.943217227839845e-05, "loss": 1.2093, "step": 15040 }, { "epoch": 0.09615016035674584, "grad_norm": 1.119036078453064, "learning_rate": 9.943141797079727e-05, "loss": 0.6415, "step": 15050 }, { "epoch": 0.09621404750648455, "grad_norm": 0.9091972708702087, "learning_rate": 9.943066316537895e-05, "loss": 0.7339, "step": 15060 }, { "epoch": 0.09627793465622325, "grad_norm": 2.1518936157226562, "learning_rate": 9.942990786215107e-05, "loss": 0.7829, "step": 15070 }, { "epoch": 0.09634182180596194, "grad_norm": 0.8024427890777588, "learning_rate": 9.942915206112126e-05, "loss": 0.9612, "step": 15080 }, { "epoch": 0.09640570895570065, "grad_norm": 0.90773606300354, "learning_rate": 9.942839576229714e-05, "loss": 1.0113, "step": 15090 }, { "epoch": 0.09646959610543936, "grad_norm": 1.2031515836715698, "learning_rate": 9.942763896568632e-05, "loss": 0.929, "step": 15100 }, { "epoch": 0.09653348325517805, "grad_norm": 1.1134458780288696, "learning_rate": 9.942688167129639e-05, "loss": 1.0391, "step": 15110 }, { "epoch": 0.09659737040491675, "grad_norm": 1.0063025951385498, "learning_rate": 9.942612387913501e-05, "loss": 0.8559, "step": 15120 }, { "epoch": 0.09666125755465546, "grad_norm": 0.737177848815918, "learning_rate": 9.94253655892098e-05, "loss": 1.0731, "step": 15130 }, { "epoch": 0.09672514470439415, "grad_norm": 0.8199975490570068, "learning_rate": 9.942460680152842e-05, "loss": 0.8919, "step": 15140 }, { "epoch": 0.09678903185413286, "grad_norm": 0.9995172023773193, "learning_rate": 9.942384751609848e-05, "loss": 0.9533, "step": 15150 }, { "epoch": 0.09685291900387157, "grad_norm": 1.6807196140289307, "learning_rate": 9.942308773292764e-05, "loss": 1.2186, "step": 15160 }, { "epoch": 0.09691680615361026, "grad_norm": 0.6781327724456787, "learning_rate": 9.942232745202353e-05, "loss": 0.9126, "step": 15170 }, { "epoch": 0.09698069330334896, "grad_norm": 0.8096178770065308, "learning_rate": 9.942156667339385e-05, "loss": 0.8445, "step": 15180 }, { "epoch": 0.09704458045308767, "grad_norm": 0.4493632912635803, "learning_rate": 9.942080539704621e-05, "loss": 0.9263, "step": 15190 }, { "epoch": 0.09710846760282636, "grad_norm": 1.0077593326568604, "learning_rate": 9.942004362298834e-05, "loss": 0.8551, "step": 15200 }, { "epoch": 0.09717235475256507, "grad_norm": 0.7614121437072754, "learning_rate": 9.941928135122784e-05, "loss": 0.9088, "step": 15210 }, { "epoch": 0.09723624190230377, "grad_norm": 1.770782470703125, "learning_rate": 9.941851858177244e-05, "loss": 0.8671, "step": 15220 }, { "epoch": 0.09730012905204247, "grad_norm": 0.8057569861412048, "learning_rate": 9.941775531462982e-05, "loss": 0.8172, "step": 15230 }, { "epoch": 0.09736401620178117, "grad_norm": 0.6936876177787781, "learning_rate": 9.941699154980763e-05, "loss": 0.8575, "step": 15240 }, { "epoch": 0.09742790335151988, "grad_norm": 0.6702722311019897, "learning_rate": 9.941622728731359e-05, "loss": 1.004, "step": 15250 }, { "epoch": 0.09749179050125857, "grad_norm": 1.0262168645858765, "learning_rate": 9.94154625271554e-05, "loss": 0.9267, "step": 15260 }, { "epoch": 0.09755567765099728, "grad_norm": 1.287480115890503, "learning_rate": 9.941469726934074e-05, "loss": 0.8412, "step": 15270 }, { "epoch": 0.09761956480073598, "grad_norm": 1.0471506118774414, "learning_rate": 9.941393151387734e-05, "loss": 0.9556, "step": 15280 }, { "epoch": 0.09768345195047468, "grad_norm": 0.948810875415802, "learning_rate": 9.941316526077289e-05, "loss": 1.0511, "step": 15290 }, { "epoch": 0.09774733910021338, "grad_norm": 0.6042103171348572, "learning_rate": 9.941239851003511e-05, "loss": 0.858, "step": 15300 }, { "epoch": 0.09781122624995209, "grad_norm": 0.7108423113822937, "learning_rate": 9.941163126167175e-05, "loss": 1.0698, "step": 15310 }, { "epoch": 0.09787511339969078, "grad_norm": 0.8583425283432007, "learning_rate": 9.94108635156905e-05, "loss": 0.9262, "step": 15320 }, { "epoch": 0.09793900054942949, "grad_norm": 1.3478715419769287, "learning_rate": 9.941009527209911e-05, "loss": 0.8279, "step": 15330 }, { "epoch": 0.09800288769916819, "grad_norm": 0.7297415137290955, "learning_rate": 9.940932653090532e-05, "loss": 0.7739, "step": 15340 }, { "epoch": 0.09806677484890688, "grad_norm": 0.6165359616279602, "learning_rate": 9.940855729211687e-05, "loss": 0.9152, "step": 15350 }, { "epoch": 0.09813066199864559, "grad_norm": 0.6644479632377625, "learning_rate": 9.940778755574149e-05, "loss": 0.8523, "step": 15360 }, { "epoch": 0.0981945491483843, "grad_norm": 0.9046561121940613, "learning_rate": 9.940701732178695e-05, "loss": 0.9418, "step": 15370 }, { "epoch": 0.09825843629812299, "grad_norm": 0.6211059093475342, "learning_rate": 9.9406246590261e-05, "loss": 0.7587, "step": 15380 }, { "epoch": 0.0983223234478617, "grad_norm": 1.164886236190796, "learning_rate": 9.940547536117142e-05, "loss": 1.2, "step": 15390 }, { "epoch": 0.0983862105976004, "grad_norm": 0.9881723523139954, "learning_rate": 9.940470363452596e-05, "loss": 0.7596, "step": 15400 }, { "epoch": 0.0984500977473391, "grad_norm": 6.108283042907715, "learning_rate": 9.940393141033238e-05, "loss": 1.05, "step": 15410 }, { "epoch": 0.0985139848970778, "grad_norm": 0.5831863880157471, "learning_rate": 9.940315868859847e-05, "loss": 1.2292, "step": 15420 }, { "epoch": 0.0985778720468165, "grad_norm": 1.4908435344696045, "learning_rate": 9.940238546933203e-05, "loss": 0.99, "step": 15430 }, { "epoch": 0.0986417591965552, "grad_norm": 0.80536949634552, "learning_rate": 9.940161175254082e-05, "loss": 0.9417, "step": 15440 }, { "epoch": 0.0987056463462939, "grad_norm": 0.6706516146659851, "learning_rate": 9.940083753823263e-05, "loss": 1.249, "step": 15450 }, { "epoch": 0.09876953349603261, "grad_norm": 0.6131950616836548, "learning_rate": 9.940006282641527e-05, "loss": 0.7975, "step": 15460 }, { "epoch": 0.0988334206457713, "grad_norm": 0.9210124611854553, "learning_rate": 9.939928761709655e-05, "loss": 0.7322, "step": 15470 }, { "epoch": 0.09889730779551001, "grad_norm": 0.8976283669471741, "learning_rate": 9.939851191028426e-05, "loss": 0.9391, "step": 15480 }, { "epoch": 0.09896119494524871, "grad_norm": 0.7244909405708313, "learning_rate": 9.939773570598623e-05, "loss": 0.7818, "step": 15490 }, { "epoch": 0.09902508209498742, "grad_norm": 1.1001940965652466, "learning_rate": 9.939695900421024e-05, "loss": 0.9527, "step": 15500 }, { "epoch": 0.09908896924472611, "grad_norm": 0.7406299114227295, "learning_rate": 9.939618180496417e-05, "loss": 0.8922, "step": 15510 }, { "epoch": 0.09915285639446482, "grad_norm": 1.2300517559051514, "learning_rate": 9.93954041082558e-05, "loss": 1.2021, "step": 15520 }, { "epoch": 0.09921674354420353, "grad_norm": 0.9667423963546753, "learning_rate": 9.9394625914093e-05, "loss": 1.1348, "step": 15530 }, { "epoch": 0.09928063069394222, "grad_norm": 0.8901247382164001, "learning_rate": 9.939384722248355e-05, "loss": 1.2461, "step": 15540 }, { "epoch": 0.09934451784368092, "grad_norm": 0.8347676992416382, "learning_rate": 9.939306803343533e-05, "loss": 0.7845, "step": 15550 }, { "epoch": 0.09940840499341963, "grad_norm": 0.9552205801010132, "learning_rate": 9.93922883469562e-05, "loss": 0.6641, "step": 15560 }, { "epoch": 0.09947229214315832, "grad_norm": 0.8416782021522522, "learning_rate": 9.939150816305399e-05, "loss": 0.9133, "step": 15570 }, { "epoch": 0.09953617929289703, "grad_norm": 1.2031623125076294, "learning_rate": 9.939072748173656e-05, "loss": 0.7874, "step": 15580 }, { "epoch": 0.09960006644263573, "grad_norm": 1.0405542850494385, "learning_rate": 9.938994630301179e-05, "loss": 1.0763, "step": 15590 }, { "epoch": 0.09966395359237443, "grad_norm": 0.7080594301223755, "learning_rate": 9.938916462688753e-05, "loss": 1.1229, "step": 15600 }, { "epoch": 0.09972784074211313, "grad_norm": 0.6351432204246521, "learning_rate": 9.938838245337163e-05, "loss": 0.8626, "step": 15610 }, { "epoch": 0.09979172789185184, "grad_norm": 1.3848146200180054, "learning_rate": 9.938759978247201e-05, "loss": 0.8473, "step": 15620 }, { "epoch": 0.09985561504159053, "grad_norm": 0.9175819754600525, "learning_rate": 9.938681661419654e-05, "loss": 0.8902, "step": 15630 }, { "epoch": 0.09991950219132924, "grad_norm": 0.729713499546051, "learning_rate": 9.938603294855309e-05, "loss": 0.8599, "step": 15640 }, { "epoch": 0.09998338934106794, "grad_norm": 0.8896664381027222, "learning_rate": 9.938524878554956e-05, "loss": 1.2631, "step": 15650 }, { "epoch": 0.10004727649080664, "grad_norm": 1.1083167791366577, "learning_rate": 9.938446412519387e-05, "loss": 0.8752, "step": 15660 }, { "epoch": 0.10011116364054534, "grad_norm": 0.9350288510322571, "learning_rate": 9.938367896749388e-05, "loss": 0.891, "step": 15670 }, { "epoch": 0.10017505079028405, "grad_norm": 0.8491414785385132, "learning_rate": 9.938289331245753e-05, "loss": 0.9962, "step": 15680 }, { "epoch": 0.10023893794002274, "grad_norm": 1.3653219938278198, "learning_rate": 9.938210716009272e-05, "loss": 0.8964, "step": 15690 }, { "epoch": 0.10030282508976145, "grad_norm": 1.137112021446228, "learning_rate": 9.938132051040736e-05, "loss": 0.9982, "step": 15700 }, { "epoch": 0.10036671223950015, "grad_norm": 0.8561280965805054, "learning_rate": 9.93805333634094e-05, "loss": 0.7421, "step": 15710 }, { "epoch": 0.10043059938923884, "grad_norm": 1.1215713024139404, "learning_rate": 9.937974571910674e-05, "loss": 0.926, "step": 15720 }, { "epoch": 0.10049448653897755, "grad_norm": 0.6843059659004211, "learning_rate": 9.937895757750733e-05, "loss": 0.934, "step": 15730 }, { "epoch": 0.10055837368871626, "grad_norm": 0.8098707795143127, "learning_rate": 9.937816893861909e-05, "loss": 0.8128, "step": 15740 }, { "epoch": 0.10062226083845495, "grad_norm": 0.8894488215446472, "learning_rate": 9.937737980244997e-05, "loss": 0.9038, "step": 15750 }, { "epoch": 0.10068614798819366, "grad_norm": 1.0936787128448486, "learning_rate": 9.937659016900791e-05, "loss": 0.8245, "step": 15760 }, { "epoch": 0.10075003513793236, "grad_norm": 1.0727956295013428, "learning_rate": 9.937580003830088e-05, "loss": 0.8693, "step": 15770 }, { "epoch": 0.10081392228767105, "grad_norm": 1.4079822301864624, "learning_rate": 9.937500941033682e-05, "loss": 0.6185, "step": 15780 }, { "epoch": 0.10087780943740976, "grad_norm": 1.4234700202941895, "learning_rate": 9.937421828512371e-05, "loss": 0.7397, "step": 15790 }, { "epoch": 0.10094169658714847, "grad_norm": 0.8071795701980591, "learning_rate": 9.937342666266951e-05, "loss": 1.1495, "step": 15800 }, { "epoch": 0.10100558373688716, "grad_norm": 2.0237574577331543, "learning_rate": 9.937263454298217e-05, "loss": 1.0899, "step": 15810 }, { "epoch": 0.10106947088662586, "grad_norm": 0.6750722527503967, "learning_rate": 9.93718419260697e-05, "loss": 0.7826, "step": 15820 }, { "epoch": 0.10113335803636457, "grad_norm": 0.9621725678443909, "learning_rate": 9.937104881194008e-05, "loss": 1.0047, "step": 15830 }, { "epoch": 0.10119724518610326, "grad_norm": 0.7849874496459961, "learning_rate": 9.937025520060127e-05, "loss": 0.8557, "step": 15840 }, { "epoch": 0.10126113233584197, "grad_norm": 0.6543164253234863, "learning_rate": 9.936946109206129e-05, "loss": 0.8556, "step": 15850 }, { "epoch": 0.10132501948558068, "grad_norm": 0.8516491651535034, "learning_rate": 9.936866648632811e-05, "loss": 0.8175, "step": 15860 }, { "epoch": 0.10138890663531937, "grad_norm": 0.7480735778808594, "learning_rate": 9.936787138340976e-05, "loss": 0.9132, "step": 15870 }, { "epoch": 0.10145279378505807, "grad_norm": 0.7891073822975159, "learning_rate": 9.936707578331423e-05, "loss": 0.9786, "step": 15880 }, { "epoch": 0.10151668093479678, "grad_norm": 0.6075239181518555, "learning_rate": 9.936627968604955e-05, "loss": 0.7032, "step": 15890 }, { "epoch": 0.10158056808453547, "grad_norm": 2.8596110343933105, "learning_rate": 9.93654830916237e-05, "loss": 0.9138, "step": 15900 }, { "epoch": 0.10164445523427418, "grad_norm": 1.1982015371322632, "learning_rate": 9.936468600004477e-05, "loss": 0.7194, "step": 15910 }, { "epoch": 0.10170834238401288, "grad_norm": 0.6473510265350342, "learning_rate": 9.936388841132071e-05, "loss": 0.7809, "step": 15920 }, { "epoch": 0.10177222953375158, "grad_norm": 1.089911937713623, "learning_rate": 9.936309032545961e-05, "loss": 0.846, "step": 15930 }, { "epoch": 0.10183611668349028, "grad_norm": 0.9146657586097717, "learning_rate": 9.936229174246947e-05, "loss": 0.8369, "step": 15940 }, { "epoch": 0.10190000383322899, "grad_norm": 0.599389374256134, "learning_rate": 9.936149266235835e-05, "loss": 0.9457, "step": 15950 }, { "epoch": 0.10196389098296768, "grad_norm": 0.5718626976013184, "learning_rate": 9.93606930851343e-05, "loss": 1.1508, "step": 15960 }, { "epoch": 0.10202777813270639, "grad_norm": 0.5820611715316772, "learning_rate": 9.935989301080535e-05, "loss": 0.5636, "step": 15970 }, { "epoch": 0.1020916652824451, "grad_norm": 0.9194528460502625, "learning_rate": 9.935909243937959e-05, "loss": 0.9002, "step": 15980 }, { "epoch": 0.10215555243218379, "grad_norm": 1.094212293624878, "learning_rate": 9.935829137086508e-05, "loss": 1.2759, "step": 15990 }, { "epoch": 0.10221943958192249, "grad_norm": 0.8695144653320312, "learning_rate": 9.935748980526986e-05, "loss": 0.6543, "step": 16000 }, { "epoch": 0.1022833267316612, "grad_norm": 1.7058948278427124, "learning_rate": 9.935668774260202e-05, "loss": 0.8703, "step": 16010 }, { "epoch": 0.10234721388139989, "grad_norm": 2.493241310119629, "learning_rate": 9.935588518286963e-05, "loss": 1.282, "step": 16020 }, { "epoch": 0.1024111010311386, "grad_norm": 0.7929388284683228, "learning_rate": 9.935508212608078e-05, "loss": 0.6585, "step": 16030 }, { "epoch": 0.1024749881808773, "grad_norm": 0.8106563091278076, "learning_rate": 9.935427857224356e-05, "loss": 0.9955, "step": 16040 }, { "epoch": 0.102538875330616, "grad_norm": 2.413360357284546, "learning_rate": 9.935347452136606e-05, "loss": 1.0771, "step": 16050 }, { "epoch": 0.1026027624803547, "grad_norm": 0.7023759484291077, "learning_rate": 9.935266997345636e-05, "loss": 1.0192, "step": 16060 }, { "epoch": 0.1026666496300934, "grad_norm": 1.3818843364715576, "learning_rate": 9.935186492852258e-05, "loss": 1.1104, "step": 16070 }, { "epoch": 0.1027305367798321, "grad_norm": 1.1903809309005737, "learning_rate": 9.935105938657283e-05, "loss": 0.8756, "step": 16080 }, { "epoch": 0.1027944239295708, "grad_norm": 0.610237181186676, "learning_rate": 9.935025334761523e-05, "loss": 0.9345, "step": 16090 }, { "epoch": 0.10285831107930951, "grad_norm": 0.8631981015205383, "learning_rate": 9.934944681165786e-05, "loss": 1.0976, "step": 16100 }, { "epoch": 0.1029221982290482, "grad_norm": 0.5845250487327576, "learning_rate": 9.934863977870889e-05, "loss": 0.8406, "step": 16110 }, { "epoch": 0.10298608537878691, "grad_norm": 0.5269205570220947, "learning_rate": 9.93478322487764e-05, "loss": 0.9238, "step": 16120 }, { "epoch": 0.10304997252852562, "grad_norm": 0.6796483397483826, "learning_rate": 9.934702422186857e-05, "loss": 0.9912, "step": 16130 }, { "epoch": 0.10311385967826431, "grad_norm": 0.9061000347137451, "learning_rate": 9.93462156979935e-05, "loss": 1.0622, "step": 16140 }, { "epoch": 0.10317774682800301, "grad_norm": 0.5684584379196167, "learning_rate": 9.934540667715936e-05, "loss": 0.8797, "step": 16150 }, { "epoch": 0.10324163397774172, "grad_norm": 0.8343471884727478, "learning_rate": 9.934459715937428e-05, "loss": 0.8628, "step": 16160 }, { "epoch": 0.10330552112748041, "grad_norm": 0.9811477065086365, "learning_rate": 9.934378714464642e-05, "loss": 1.1671, "step": 16170 }, { "epoch": 0.10336940827721912, "grad_norm": 0.9283135533332825, "learning_rate": 9.934297663298393e-05, "loss": 0.7027, "step": 16180 }, { "epoch": 0.10343329542695782, "grad_norm": 0.7332042455673218, "learning_rate": 9.934216562439498e-05, "loss": 0.8026, "step": 16190 }, { "epoch": 0.10349718257669652, "grad_norm": 3.353732109069824, "learning_rate": 9.934135411888773e-05, "loss": 1.1843, "step": 16200 }, { "epoch": 0.10356106972643522, "grad_norm": 1.056642770767212, "learning_rate": 9.934054211647036e-05, "loss": 0.8445, "step": 16210 }, { "epoch": 0.10362495687617393, "grad_norm": 0.6340813636779785, "learning_rate": 9.933972961715104e-05, "loss": 1.0407, "step": 16220 }, { "epoch": 0.10368884402591262, "grad_norm": 0.823939859867096, "learning_rate": 9.933891662093797e-05, "loss": 0.9409, "step": 16230 }, { "epoch": 0.10375273117565133, "grad_norm": 1.3675154447555542, "learning_rate": 9.933810312783932e-05, "loss": 0.7627, "step": 16240 }, { "epoch": 0.10381661832539003, "grad_norm": 2.952162742614746, "learning_rate": 9.933728913786328e-05, "loss": 0.7343, "step": 16250 }, { "epoch": 0.10388050547512873, "grad_norm": 0.5602843165397644, "learning_rate": 9.933647465101807e-05, "loss": 0.9949, "step": 16260 }, { "epoch": 0.10394439262486743, "grad_norm": 1.035836935043335, "learning_rate": 9.933565966731187e-05, "loss": 0.733, "step": 16270 }, { "epoch": 0.10400827977460614, "grad_norm": 0.599962055683136, "learning_rate": 9.93348441867529e-05, "loss": 0.8972, "step": 16280 }, { "epoch": 0.10407216692434483, "grad_norm": 1.3323990106582642, "learning_rate": 9.933402820934936e-05, "loss": 0.8854, "step": 16290 }, { "epoch": 0.10413605407408354, "grad_norm": 1.9497777223587036, "learning_rate": 9.933321173510949e-05, "loss": 0.9189, "step": 16300 }, { "epoch": 0.10419994122382224, "grad_norm": 1.2453469038009644, "learning_rate": 9.933239476404149e-05, "loss": 0.9895, "step": 16310 }, { "epoch": 0.10426382837356094, "grad_norm": 1.1778478622436523, "learning_rate": 9.933157729615359e-05, "loss": 0.9034, "step": 16320 }, { "epoch": 0.10432771552329964, "grad_norm": 0.7370180487632751, "learning_rate": 9.933075933145404e-05, "loss": 0.9827, "step": 16330 }, { "epoch": 0.10439160267303835, "grad_norm": 0.992669403553009, "learning_rate": 9.932994086995107e-05, "loss": 0.696, "step": 16340 }, { "epoch": 0.10445548982277705, "grad_norm": 0.8469734191894531, "learning_rate": 9.93291219116529e-05, "loss": 0.7605, "step": 16350 }, { "epoch": 0.10451937697251575, "grad_norm": 1.4844669103622437, "learning_rate": 9.932830245656782e-05, "loss": 0.8848, "step": 16360 }, { "epoch": 0.10458326412225445, "grad_norm": 0.7089157104492188, "learning_rate": 9.932748250470403e-05, "loss": 0.8722, "step": 16370 }, { "epoch": 0.10464715127199316, "grad_norm": 0.6361833214759827, "learning_rate": 9.932666205606984e-05, "loss": 1.0907, "step": 16380 }, { "epoch": 0.10471103842173185, "grad_norm": 1.060922384262085, "learning_rate": 9.932584111067348e-05, "loss": 0.9377, "step": 16390 }, { "epoch": 0.10477492557147056, "grad_norm": 1.2127258777618408, "learning_rate": 9.932501966852323e-05, "loss": 1.1433, "step": 16400 }, { "epoch": 0.10483881272120926, "grad_norm": 0.6231849193572998, "learning_rate": 9.932419772962735e-05, "loss": 0.925, "step": 16410 }, { "epoch": 0.10490269987094795, "grad_norm": 0.5481915473937988, "learning_rate": 9.932337529399415e-05, "loss": 0.8031, "step": 16420 }, { "epoch": 0.10496658702068666, "grad_norm": 0.5232637524604797, "learning_rate": 9.932255236163187e-05, "loss": 0.8512, "step": 16430 }, { "epoch": 0.10503047417042537, "grad_norm": 0.6596049666404724, "learning_rate": 9.932172893254884e-05, "loss": 0.7366, "step": 16440 }, { "epoch": 0.10509436132016406, "grad_norm": 0.826575517654419, "learning_rate": 9.932090500675331e-05, "loss": 0.7942, "step": 16450 }, { "epoch": 0.10515824846990277, "grad_norm": 0.6646784543991089, "learning_rate": 9.932008058425359e-05, "loss": 1.1065, "step": 16460 }, { "epoch": 0.10522213561964147, "grad_norm": 0.6288832426071167, "learning_rate": 9.931925566505802e-05, "loss": 0.9242, "step": 16470 }, { "epoch": 0.10528602276938016, "grad_norm": 0.7876302003860474, "learning_rate": 9.931843024917484e-05, "loss": 0.9227, "step": 16480 }, { "epoch": 0.10534990991911887, "grad_norm": 0.6333622336387634, "learning_rate": 9.931760433661244e-05, "loss": 0.9783, "step": 16490 }, { "epoch": 0.10541379706885758, "grad_norm": 1.2118867635726929, "learning_rate": 9.931677792737907e-05, "loss": 0.727, "step": 16500 }, { "epoch": 0.10547768421859627, "grad_norm": 0.8063325881958008, "learning_rate": 9.931595102148309e-05, "loss": 1.2654, "step": 16510 }, { "epoch": 0.10554157136833497, "grad_norm": 0.5137673020362854, "learning_rate": 9.931512361893283e-05, "loss": 0.7905, "step": 16520 }, { "epoch": 0.10560545851807368, "grad_norm": 1.0696414709091187, "learning_rate": 9.93142957197366e-05, "loss": 1.0821, "step": 16530 }, { "epoch": 0.10566934566781237, "grad_norm": 1.1155736446380615, "learning_rate": 9.931346732390274e-05, "loss": 0.7375, "step": 16540 }, { "epoch": 0.10573323281755108, "grad_norm": 0.784761369228363, "learning_rate": 9.931263843143962e-05, "loss": 0.7859, "step": 16550 }, { "epoch": 0.10579711996728978, "grad_norm": 0.9071635007858276, "learning_rate": 9.931180904235557e-05, "loss": 1.0189, "step": 16560 }, { "epoch": 0.10586100711702848, "grad_norm": 0.6615142822265625, "learning_rate": 9.931097915665892e-05, "loss": 0.9826, "step": 16570 }, { "epoch": 0.10592489426676718, "grad_norm": 1.0913355350494385, "learning_rate": 9.931014877435806e-05, "loss": 1.2501, "step": 16580 }, { "epoch": 0.10598878141650589, "grad_norm": 0.7185521125793457, "learning_rate": 9.930931789546136e-05, "loss": 0.9584, "step": 16590 }, { "epoch": 0.10605266856624458, "grad_norm": 0.9962629079818726, "learning_rate": 9.930848651997716e-05, "loss": 1.2084, "step": 16600 }, { "epoch": 0.10611655571598329, "grad_norm": 0.5388261079788208, "learning_rate": 9.930765464791383e-05, "loss": 0.7474, "step": 16610 }, { "epoch": 0.106180442865722, "grad_norm": 0.963033139705658, "learning_rate": 9.930682227927978e-05, "loss": 0.8856, "step": 16620 }, { "epoch": 0.10624433001546069, "grad_norm": 0.8740180730819702, "learning_rate": 9.930598941408335e-05, "loss": 0.9665, "step": 16630 }, { "epoch": 0.10630821716519939, "grad_norm": 0.7706631422042847, "learning_rate": 9.930515605233297e-05, "loss": 0.9538, "step": 16640 }, { "epoch": 0.1063721043149381, "grad_norm": 1.0172282457351685, "learning_rate": 9.930432219403702e-05, "loss": 0.9451, "step": 16650 }, { "epoch": 0.10643599146467679, "grad_norm": 1.1416665315628052, "learning_rate": 9.930348783920387e-05, "loss": 0.812, "step": 16660 }, { "epoch": 0.1064998786144155, "grad_norm": 1.248719573020935, "learning_rate": 9.930265298784196e-05, "loss": 1.0079, "step": 16670 }, { "epoch": 0.1065637657641542, "grad_norm": 0.8804942965507507, "learning_rate": 9.930181763995968e-05, "loss": 1.0038, "step": 16680 }, { "epoch": 0.1066276529138929, "grad_norm": 0.9898728132247925, "learning_rate": 9.930098179556543e-05, "loss": 0.9694, "step": 16690 }, { "epoch": 0.1066915400636316, "grad_norm": 1.1314060688018799, "learning_rate": 9.930014545466765e-05, "loss": 1.0318, "step": 16700 }, { "epoch": 0.10675542721337031, "grad_norm": 1.0899930000305176, "learning_rate": 9.929930861727476e-05, "loss": 1.1298, "step": 16710 }, { "epoch": 0.106819314363109, "grad_norm": 1.2332922220230103, "learning_rate": 9.929847128339517e-05, "loss": 0.9744, "step": 16720 }, { "epoch": 0.1068832015128477, "grad_norm": 1.1803171634674072, "learning_rate": 9.929763345303733e-05, "loss": 0.9733, "step": 16730 }, { "epoch": 0.10694708866258641, "grad_norm": 0.8435320854187012, "learning_rate": 9.929679512620969e-05, "loss": 0.8418, "step": 16740 }, { "epoch": 0.1070109758123251, "grad_norm": 0.68702632188797, "learning_rate": 9.929595630292066e-05, "loss": 1.0078, "step": 16750 }, { "epoch": 0.10707486296206381, "grad_norm": 0.8807457089424133, "learning_rate": 9.92951169831787e-05, "loss": 0.8933, "step": 16760 }, { "epoch": 0.10713875011180252, "grad_norm": 0.922346293926239, "learning_rate": 9.929427716699227e-05, "loss": 0.765, "step": 16770 }, { "epoch": 0.10720263726154121, "grad_norm": 0.6668721437454224, "learning_rate": 9.929343685436982e-05, "loss": 0.7723, "step": 16780 }, { "epoch": 0.10726652441127991, "grad_norm": 1.0509366989135742, "learning_rate": 9.929259604531981e-05, "loss": 0.9128, "step": 16790 }, { "epoch": 0.10733041156101862, "grad_norm": 0.9233303070068359, "learning_rate": 9.929175473985073e-05, "loss": 0.8772, "step": 16800 }, { "epoch": 0.10739429871075731, "grad_norm": 0.5858426094055176, "learning_rate": 9.929091293797102e-05, "loss": 0.9377, "step": 16810 }, { "epoch": 0.10745818586049602, "grad_norm": 0.7452363967895508, "learning_rate": 9.929007063968919e-05, "loss": 0.9821, "step": 16820 }, { "epoch": 0.10752207301023473, "grad_norm": 0.8996424078941345, "learning_rate": 9.92892278450137e-05, "loss": 0.969, "step": 16830 }, { "epoch": 0.10758596015997342, "grad_norm": 0.9038456082344055, "learning_rate": 9.928838455395304e-05, "loss": 0.9136, "step": 16840 }, { "epoch": 0.10764984730971212, "grad_norm": 2.0651540756225586, "learning_rate": 9.928754076651571e-05, "loss": 0.9447, "step": 16850 }, { "epoch": 0.10771373445945083, "grad_norm": 0.9123902916908264, "learning_rate": 9.928669648271021e-05, "loss": 1.0723, "step": 16860 }, { "epoch": 0.10777762160918952, "grad_norm": 0.7702105641365051, "learning_rate": 9.928585170254503e-05, "loss": 1.0555, "step": 16870 }, { "epoch": 0.10784150875892823, "grad_norm": 0.8191667795181274, "learning_rate": 9.928500642602869e-05, "loss": 0.952, "step": 16880 }, { "epoch": 0.10790539590866693, "grad_norm": 0.8521249890327454, "learning_rate": 9.928416065316969e-05, "loss": 0.9182, "step": 16890 }, { "epoch": 0.10796928305840563, "grad_norm": 0.4355503022670746, "learning_rate": 9.928331438397655e-05, "loss": 0.7828, "step": 16900 }, { "epoch": 0.10803317020814433, "grad_norm": 0.8053306937217712, "learning_rate": 9.928246761845782e-05, "loss": 1.073, "step": 16910 }, { "epoch": 0.10809705735788304, "grad_norm": 1.1718153953552246, "learning_rate": 9.928162035662199e-05, "loss": 0.8979, "step": 16920 }, { "epoch": 0.10816094450762173, "grad_norm": 0.8112810850143433, "learning_rate": 9.928077259847761e-05, "loss": 1.0277, "step": 16930 }, { "epoch": 0.10822483165736044, "grad_norm": 0.5165520906448364, "learning_rate": 9.927992434403322e-05, "loss": 1.0714, "step": 16940 }, { "epoch": 0.10828871880709914, "grad_norm": 0.9523488283157349, "learning_rate": 9.927907559329736e-05, "loss": 0.9623, "step": 16950 }, { "epoch": 0.10835260595683784, "grad_norm": 0.5549238324165344, "learning_rate": 9.927822634627857e-05, "loss": 0.7777, "step": 16960 }, { "epoch": 0.10841649310657654, "grad_norm": 0.8362735509872437, "learning_rate": 9.927737660298541e-05, "loss": 0.9429, "step": 16970 }, { "epoch": 0.10848038025631525, "grad_norm": 1.324947714805603, "learning_rate": 9.927652636342645e-05, "loss": 0.9626, "step": 16980 }, { "epoch": 0.10854426740605394, "grad_norm": 0.8219287395477295, "learning_rate": 9.927567562761021e-05, "loss": 1.1411, "step": 16990 }, { "epoch": 0.10860815455579265, "grad_norm": 0.7673150897026062, "learning_rate": 9.927482439554532e-05, "loss": 0.9758, "step": 17000 }, { "epoch": 0.10867204170553135, "grad_norm": 0.7057496905326843, "learning_rate": 9.92739726672403e-05, "loss": 0.8181, "step": 17010 }, { "epoch": 0.10873592885527004, "grad_norm": 1.2595868110656738, "learning_rate": 9.927312044270375e-05, "loss": 0.9396, "step": 17020 }, { "epoch": 0.10879981600500875, "grad_norm": 0.8270642161369324, "learning_rate": 9.927226772194426e-05, "loss": 0.9074, "step": 17030 }, { "epoch": 0.10886370315474746, "grad_norm": 0.5199679732322693, "learning_rate": 9.927141450497039e-05, "loss": 0.9427, "step": 17040 }, { "epoch": 0.10892759030448615, "grad_norm": 0.7724682688713074, "learning_rate": 9.927056079179076e-05, "loss": 0.8286, "step": 17050 }, { "epoch": 0.10899147745422486, "grad_norm": 0.614035964012146, "learning_rate": 9.926970658241397e-05, "loss": 0.8915, "step": 17060 }, { "epoch": 0.10905536460396356, "grad_norm": 1.045047402381897, "learning_rate": 9.926885187684859e-05, "loss": 0.8422, "step": 17070 }, { "epoch": 0.10911925175370225, "grad_norm": 0.7779353857040405, "learning_rate": 9.926799667510326e-05, "loss": 0.8882, "step": 17080 }, { "epoch": 0.10918313890344096, "grad_norm": 0.9462752938270569, "learning_rate": 9.926714097718657e-05, "loss": 0.923, "step": 17090 }, { "epoch": 0.10924702605317967, "grad_norm": 0.5807504057884216, "learning_rate": 9.926628478310715e-05, "loss": 0.8799, "step": 17100 }, { "epoch": 0.10931091320291836, "grad_norm": 0.8692427277565002, "learning_rate": 9.926542809287364e-05, "loss": 0.9051, "step": 17110 }, { "epoch": 0.10937480035265706, "grad_norm": 0.8406835794448853, "learning_rate": 9.926457090649462e-05, "loss": 0.7788, "step": 17120 }, { "epoch": 0.10943868750239577, "grad_norm": 0.8116185665130615, "learning_rate": 9.926371322397877e-05, "loss": 0.7086, "step": 17130 }, { "epoch": 0.10950257465213446, "grad_norm": 1.1767171621322632, "learning_rate": 9.92628550453347e-05, "loss": 0.9115, "step": 17140 }, { "epoch": 0.10956646180187317, "grad_norm": 0.8342850804328918, "learning_rate": 9.926199637057108e-05, "loss": 0.8859, "step": 17150 }, { "epoch": 0.10963034895161188, "grad_norm": 1.0540907382965088, "learning_rate": 9.926113719969652e-05, "loss": 1.1537, "step": 17160 }, { "epoch": 0.10969423610135058, "grad_norm": 1.0921380519866943, "learning_rate": 9.926027753271969e-05, "loss": 0.6981, "step": 17170 }, { "epoch": 0.10975812325108927, "grad_norm": 1.9400385618209839, "learning_rate": 9.925941736964925e-05, "loss": 0.953, "step": 17180 }, { "epoch": 0.10982201040082798, "grad_norm": 0.9200677275657654, "learning_rate": 9.925855671049387e-05, "loss": 1.0603, "step": 17190 }, { "epoch": 0.10988589755056669, "grad_norm": 0.8739213347434998, "learning_rate": 9.92576955552622e-05, "loss": 0.9993, "step": 17200 }, { "epoch": 0.10994978470030538, "grad_norm": 0.4887886345386505, "learning_rate": 9.925683390396292e-05, "loss": 0.9623, "step": 17210 }, { "epoch": 0.11001367185004408, "grad_norm": 0.7912802696228027, "learning_rate": 9.925597175660472e-05, "loss": 0.9074, "step": 17220 }, { "epoch": 0.11007755899978279, "grad_norm": 0.8125321865081787, "learning_rate": 9.925510911319626e-05, "loss": 0.8537, "step": 17230 }, { "epoch": 0.11014144614952148, "grad_norm": 0.5782091617584229, "learning_rate": 9.925424597374626e-05, "loss": 0.8458, "step": 17240 }, { "epoch": 0.11020533329926019, "grad_norm": 0.777730405330658, "learning_rate": 9.925338233826338e-05, "loss": 0.8778, "step": 17250 }, { "epoch": 0.1102692204489989, "grad_norm": 0.8471282124519348, "learning_rate": 9.925251820675633e-05, "loss": 0.8727, "step": 17260 }, { "epoch": 0.11033310759873759, "grad_norm": 0.7456023097038269, "learning_rate": 9.92516535792338e-05, "loss": 0.9105, "step": 17270 }, { "epoch": 0.1103969947484763, "grad_norm": 1.1746282577514648, "learning_rate": 9.925078845570452e-05, "loss": 0.9972, "step": 17280 }, { "epoch": 0.110460881898215, "grad_norm": 1.0486959218978882, "learning_rate": 9.92499228361772e-05, "loss": 0.923, "step": 17290 }, { "epoch": 0.11052476904795369, "grad_norm": 0.8411831259727478, "learning_rate": 9.924905672066054e-05, "loss": 0.8309, "step": 17300 }, { "epoch": 0.1105886561976924, "grad_norm": 0.8965527415275574, "learning_rate": 9.924819010916328e-05, "loss": 0.66, "step": 17310 }, { "epoch": 0.1106525433474311, "grad_norm": 0.8932517170906067, "learning_rate": 9.924732300169414e-05, "loss": 0.9388, "step": 17320 }, { "epoch": 0.1107164304971698, "grad_norm": 0.4896504878997803, "learning_rate": 9.924645539826184e-05, "loss": 0.9554, "step": 17330 }, { "epoch": 0.1107803176469085, "grad_norm": 1.2872638702392578, "learning_rate": 9.924558729887514e-05, "loss": 1.0668, "step": 17340 }, { "epoch": 0.11084420479664721, "grad_norm": 0.7142483592033386, "learning_rate": 9.924471870354277e-05, "loss": 0.9363, "step": 17350 }, { "epoch": 0.1109080919463859, "grad_norm": 0.8296705484390259, "learning_rate": 9.924384961227348e-05, "loss": 0.7258, "step": 17360 }, { "epoch": 0.1109719790961246, "grad_norm": 0.609883189201355, "learning_rate": 9.924298002507602e-05, "loss": 1.0268, "step": 17370 }, { "epoch": 0.11103586624586331, "grad_norm": 1.5724817514419556, "learning_rate": 9.924210994195915e-05, "loss": 0.9622, "step": 17380 }, { "epoch": 0.111099753395602, "grad_norm": 0.8032723069190979, "learning_rate": 9.924123936293164e-05, "loss": 0.7995, "step": 17390 }, { "epoch": 0.11116364054534071, "grad_norm": 0.6989961266517639, "learning_rate": 9.924036828800223e-05, "loss": 0.8276, "step": 17400 }, { "epoch": 0.11122752769507942, "grad_norm": 0.6586987972259521, "learning_rate": 9.923949671717973e-05, "loss": 0.9443, "step": 17410 }, { "epoch": 0.11129141484481811, "grad_norm": 1.005934715270996, "learning_rate": 9.923862465047291e-05, "loss": 0.7672, "step": 17420 }, { "epoch": 0.11135530199455682, "grad_norm": 0.7518536448478699, "learning_rate": 9.923775208789053e-05, "loss": 0.9946, "step": 17430 }, { "epoch": 0.11141918914429552, "grad_norm": 0.8329457640647888, "learning_rate": 9.923687902944138e-05, "loss": 0.8019, "step": 17440 }, { "epoch": 0.11148307629403421, "grad_norm": 0.5968138575553894, "learning_rate": 9.923600547513427e-05, "loss": 0.779, "step": 17450 }, { "epoch": 0.11154696344377292, "grad_norm": 0.5740717053413391, "learning_rate": 9.9235131424978e-05, "loss": 1.1269, "step": 17460 }, { "epoch": 0.11161085059351163, "grad_norm": 0.4525648355484009, "learning_rate": 9.923425687898135e-05, "loss": 0.9606, "step": 17470 }, { "epoch": 0.11167473774325032, "grad_norm": 0.7562941312789917, "learning_rate": 9.923338183715314e-05, "loss": 1.3565, "step": 17480 }, { "epoch": 0.11173862489298902, "grad_norm": 0.8217481970787048, "learning_rate": 9.923250629950218e-05, "loss": 0.9787, "step": 17490 }, { "epoch": 0.11180251204272773, "grad_norm": 1.1421339511871338, "learning_rate": 9.92316302660373e-05, "loss": 1.118, "step": 17500 }, { "epoch": 0.11186639919246642, "grad_norm": 1.1751115322113037, "learning_rate": 9.92307537367673e-05, "loss": 0.8283, "step": 17510 }, { "epoch": 0.11193028634220513, "grad_norm": 0.8353852033615112, "learning_rate": 9.922987671170103e-05, "loss": 1.1464, "step": 17520 }, { "epoch": 0.11199417349194384, "grad_norm": 0.6148945689201355, "learning_rate": 9.92289991908473e-05, "loss": 0.8993, "step": 17530 }, { "epoch": 0.11205806064168253, "grad_norm": 0.7143790125846863, "learning_rate": 9.922812117421496e-05, "loss": 0.8293, "step": 17540 }, { "epoch": 0.11212194779142123, "grad_norm": 0.6704200506210327, "learning_rate": 9.922724266181286e-05, "loss": 0.9002, "step": 17550 }, { "epoch": 0.11218583494115994, "grad_norm": 0.6758965253829956, "learning_rate": 9.922636365364984e-05, "loss": 0.868, "step": 17560 }, { "epoch": 0.11224972209089863, "grad_norm": 1.4119186401367188, "learning_rate": 9.922548414973473e-05, "loss": 0.8967, "step": 17570 }, { "epoch": 0.11231360924063734, "grad_norm": 0.7103084921836853, "learning_rate": 9.922460415007644e-05, "loss": 0.7774, "step": 17580 }, { "epoch": 0.11237749639037604, "grad_norm": 1.5748227834701538, "learning_rate": 9.922372365468378e-05, "loss": 0.8543, "step": 17590 }, { "epoch": 0.11244138354011474, "grad_norm": 0.8554244637489319, "learning_rate": 9.922284266356565e-05, "loss": 0.9862, "step": 17600 }, { "epoch": 0.11250527068985344, "grad_norm": 0.9203200936317444, "learning_rate": 9.92219611767309e-05, "loss": 0.8462, "step": 17610 }, { "epoch": 0.11256915783959215, "grad_norm": 0.8570156097412109, "learning_rate": 9.922107919418842e-05, "loss": 0.9768, "step": 17620 }, { "epoch": 0.11263304498933084, "grad_norm": 0.8079208135604858, "learning_rate": 9.92201967159471e-05, "loss": 0.7745, "step": 17630 }, { "epoch": 0.11269693213906955, "grad_norm": 0.8128913640975952, "learning_rate": 9.92193137420158e-05, "loss": 0.7183, "step": 17640 }, { "epoch": 0.11276081928880825, "grad_norm": 1.0222535133361816, "learning_rate": 9.921843027240345e-05, "loss": 0.762, "step": 17650 }, { "epoch": 0.11282470643854695, "grad_norm": 0.782536506652832, "learning_rate": 9.921754630711891e-05, "loss": 1.0573, "step": 17660 }, { "epoch": 0.11288859358828565, "grad_norm": 0.7294056415557861, "learning_rate": 9.921666184617111e-05, "loss": 1.1262, "step": 17670 }, { "epoch": 0.11295248073802436, "grad_norm": 0.7423584461212158, "learning_rate": 9.921577688956893e-05, "loss": 0.9985, "step": 17680 }, { "epoch": 0.11301636788776305, "grad_norm": 0.7123269438743591, "learning_rate": 9.921489143732133e-05, "loss": 0.9849, "step": 17690 }, { "epoch": 0.11308025503750176, "grad_norm": 0.9806658625602722, "learning_rate": 9.921400548943718e-05, "loss": 0.8499, "step": 17700 }, { "epoch": 0.11314414218724046, "grad_norm": 2.702582359313965, "learning_rate": 9.921311904592541e-05, "loss": 0.9368, "step": 17710 }, { "epoch": 0.11320802933697915, "grad_norm": 0.627751350402832, "learning_rate": 9.921223210679495e-05, "loss": 1.0154, "step": 17720 }, { "epoch": 0.11327191648671786, "grad_norm": 1.1272038221359253, "learning_rate": 9.921134467205477e-05, "loss": 1.0128, "step": 17730 }, { "epoch": 0.11333580363645657, "grad_norm": 1.0452537536621094, "learning_rate": 9.921045674171374e-05, "loss": 0.9581, "step": 17740 }, { "epoch": 0.11339969078619526, "grad_norm": 0.6000169515609741, "learning_rate": 9.920956831578086e-05, "loss": 1.0127, "step": 17750 }, { "epoch": 0.11346357793593397, "grad_norm": 0.8441605567932129, "learning_rate": 9.920867939426505e-05, "loss": 1.0766, "step": 17760 }, { "epoch": 0.11352746508567267, "grad_norm": 1.0325100421905518, "learning_rate": 9.920778997717527e-05, "loss": 0.673, "step": 17770 }, { "epoch": 0.11359135223541136, "grad_norm": 0.8646054863929749, "learning_rate": 9.920690006452047e-05, "loss": 0.8475, "step": 17780 }, { "epoch": 0.11365523938515007, "grad_norm": 1.1158571243286133, "learning_rate": 9.920600965630962e-05, "loss": 0.7743, "step": 17790 }, { "epoch": 0.11371912653488878, "grad_norm": 0.790447473526001, "learning_rate": 9.920511875255168e-05, "loss": 0.8564, "step": 17800 }, { "epoch": 0.11378301368462747, "grad_norm": 0.6469011902809143, "learning_rate": 9.920422735325561e-05, "loss": 0.9071, "step": 17810 }, { "epoch": 0.11384690083436617, "grad_norm": 0.8129775524139404, "learning_rate": 9.920333545843042e-05, "loss": 0.9754, "step": 17820 }, { "epoch": 0.11391078798410488, "grad_norm": 1.0118224620819092, "learning_rate": 9.920244306808509e-05, "loss": 0.8034, "step": 17830 }, { "epoch": 0.11397467513384357, "grad_norm": 0.8558486104011536, "learning_rate": 9.920155018222857e-05, "loss": 1.0181, "step": 17840 }, { "epoch": 0.11403856228358228, "grad_norm": 1.0910837650299072, "learning_rate": 9.920065680086988e-05, "loss": 0.9216, "step": 17850 }, { "epoch": 0.11410244943332098, "grad_norm": 0.6649434566497803, "learning_rate": 9.9199762924018e-05, "loss": 0.7545, "step": 17860 }, { "epoch": 0.11416633658305968, "grad_norm": 2.092512369155884, "learning_rate": 9.919886855168196e-05, "loss": 1.0409, "step": 17870 }, { "epoch": 0.11423022373279838, "grad_norm": 1.0226621627807617, "learning_rate": 9.919797368387073e-05, "loss": 0.9839, "step": 17880 }, { "epoch": 0.11429411088253709, "grad_norm": 0.9362402558326721, "learning_rate": 9.919707832059337e-05, "loss": 0.9349, "step": 17890 }, { "epoch": 0.11435799803227578, "grad_norm": 0.6043878793716431, "learning_rate": 9.919618246185886e-05, "loss": 0.9667, "step": 17900 }, { "epoch": 0.11442188518201449, "grad_norm": 0.7030009031295776, "learning_rate": 9.919528610767622e-05, "loss": 0.8868, "step": 17910 }, { "epoch": 0.1144857723317532, "grad_norm": 0.54000324010849, "learning_rate": 9.919438925805451e-05, "loss": 0.9966, "step": 17920 }, { "epoch": 0.11454965948149189, "grad_norm": 0.7529541254043579, "learning_rate": 9.919349191300272e-05, "loss": 0.8292, "step": 17930 }, { "epoch": 0.11461354663123059, "grad_norm": 1.047979712486267, "learning_rate": 9.919259407252992e-05, "loss": 0.722, "step": 17940 }, { "epoch": 0.1146774337809693, "grad_norm": 0.6364821195602417, "learning_rate": 9.919169573664513e-05, "loss": 1.0382, "step": 17950 }, { "epoch": 0.11474132093070799, "grad_norm": 1.298886775970459, "learning_rate": 9.919079690535742e-05, "loss": 1.0275, "step": 17960 }, { "epoch": 0.1148052080804467, "grad_norm": 0.9060257077217102, "learning_rate": 9.918989757867583e-05, "loss": 0.8959, "step": 17970 }, { "epoch": 0.1148690952301854, "grad_norm": 1.0557827949523926, "learning_rate": 9.91889977566094e-05, "loss": 0.8224, "step": 17980 }, { "epoch": 0.1149329823799241, "grad_norm": 1.0880374908447266, "learning_rate": 9.918809743916722e-05, "loss": 1.0926, "step": 17990 }, { "epoch": 0.1149968695296628, "grad_norm": 0.9131140112876892, "learning_rate": 9.918719662635834e-05, "loss": 0.8125, "step": 18000 }, { "epoch": 0.11506075667940151, "grad_norm": 0.956883430480957, "learning_rate": 9.918629531819184e-05, "loss": 0.7358, "step": 18010 }, { "epoch": 0.11512464382914021, "grad_norm": 1.1593812704086304, "learning_rate": 9.91853935146768e-05, "loss": 0.9325, "step": 18020 }, { "epoch": 0.1151885309788789, "grad_norm": 0.8647767901420593, "learning_rate": 9.918449121582228e-05, "loss": 0.889, "step": 18030 }, { "epoch": 0.11525241812861761, "grad_norm": 0.875560462474823, "learning_rate": 9.91835884216374e-05, "loss": 0.7376, "step": 18040 }, { "epoch": 0.11531630527835632, "grad_norm": 1.0609110593795776, "learning_rate": 9.918268513213123e-05, "loss": 0.9935, "step": 18050 }, { "epoch": 0.11538019242809501, "grad_norm": 0.7033603191375732, "learning_rate": 9.918178134731286e-05, "loss": 0.9307, "step": 18060 }, { "epoch": 0.11544407957783372, "grad_norm": 0.7909555435180664, "learning_rate": 9.918087706719141e-05, "loss": 1.0967, "step": 18070 }, { "epoch": 0.11550796672757242, "grad_norm": 1.5477937459945679, "learning_rate": 9.917997229177597e-05, "loss": 0.921, "step": 18080 }, { "epoch": 0.11557185387731111, "grad_norm": 1.373567819595337, "learning_rate": 9.91790670210757e-05, "loss": 0.7096, "step": 18090 }, { "epoch": 0.11563574102704982, "grad_norm": 0.5353577136993408, "learning_rate": 9.917816125509965e-05, "loss": 0.8476, "step": 18100 }, { "epoch": 0.11569962817678853, "grad_norm": 0.6826961040496826, "learning_rate": 9.917725499385698e-05, "loss": 1.0802, "step": 18110 }, { "epoch": 0.11576351532652722, "grad_norm": 0.9268578290939331, "learning_rate": 9.917634823735678e-05, "loss": 1.0728, "step": 18120 }, { "epoch": 0.11582740247626593, "grad_norm": 0.9943346381187439, "learning_rate": 9.917544098560824e-05, "loss": 1.2018, "step": 18130 }, { "epoch": 0.11589128962600463, "grad_norm": 1.2347413301467896, "learning_rate": 9.917453323862046e-05, "loss": 0.8933, "step": 18140 }, { "epoch": 0.11595517677574332, "grad_norm": 0.7425234913825989, "learning_rate": 9.91736249964026e-05, "loss": 0.7152, "step": 18150 }, { "epoch": 0.11601906392548203, "grad_norm": 1.068671703338623, "learning_rate": 9.917271625896377e-05, "loss": 0.9737, "step": 18160 }, { "epoch": 0.11608295107522074, "grad_norm": 0.9823939204216003, "learning_rate": 9.917180702631316e-05, "loss": 0.9365, "step": 18170 }, { "epoch": 0.11614683822495943, "grad_norm": 0.7012134194374084, "learning_rate": 9.917089729845991e-05, "loss": 0.9741, "step": 18180 }, { "epoch": 0.11621072537469813, "grad_norm": 0.8662933111190796, "learning_rate": 9.916998707541319e-05, "loss": 0.9238, "step": 18190 }, { "epoch": 0.11627461252443684, "grad_norm": 0.5047873258590698, "learning_rate": 9.916907635718216e-05, "loss": 0.772, "step": 18200 }, { "epoch": 0.11633849967417553, "grad_norm": 1.2148154973983765, "learning_rate": 9.916816514377598e-05, "loss": 0.8872, "step": 18210 }, { "epoch": 0.11640238682391424, "grad_norm": 0.6862503886222839, "learning_rate": 9.916725343520386e-05, "loss": 0.9914, "step": 18220 }, { "epoch": 0.11646627397365295, "grad_norm": 0.7228761315345764, "learning_rate": 9.916634123147495e-05, "loss": 1.3034, "step": 18230 }, { "epoch": 0.11653016112339164, "grad_norm": 0.5457968711853027, "learning_rate": 9.916542853259848e-05, "loss": 0.8272, "step": 18240 }, { "epoch": 0.11659404827313034, "grad_norm": 2.023207187652588, "learning_rate": 9.916451533858358e-05, "loss": 0.7746, "step": 18250 }, { "epoch": 0.11665793542286905, "grad_norm": 0.9167050123214722, "learning_rate": 9.916360164943947e-05, "loss": 0.9439, "step": 18260 }, { "epoch": 0.11672182257260774, "grad_norm": 0.7956591248512268, "learning_rate": 9.916268746517537e-05, "loss": 1.0798, "step": 18270 }, { "epoch": 0.11678570972234645, "grad_norm": 0.8357956409454346, "learning_rate": 9.916177278580047e-05, "loss": 1.0405, "step": 18280 }, { "epoch": 0.11684959687208515, "grad_norm": 0.7955309152603149, "learning_rate": 9.9160857611324e-05, "loss": 0.7501, "step": 18290 }, { "epoch": 0.11691348402182385, "grad_norm": 0.8821001052856445, "learning_rate": 9.915994194175516e-05, "loss": 0.916, "step": 18300 }, { "epoch": 0.11697737117156255, "grad_norm": 0.7497395873069763, "learning_rate": 9.915902577710318e-05, "loss": 0.9209, "step": 18310 }, { "epoch": 0.11704125832130126, "grad_norm": 0.7040755152702332, "learning_rate": 9.915810911737727e-05, "loss": 0.807, "step": 18320 }, { "epoch": 0.11710514547103995, "grad_norm": 0.640442430973053, "learning_rate": 9.915719196258668e-05, "loss": 0.8374, "step": 18330 }, { "epoch": 0.11716903262077866, "grad_norm": 0.8393665552139282, "learning_rate": 9.915627431274064e-05, "loss": 1.118, "step": 18340 }, { "epoch": 0.11723291977051736, "grad_norm": 0.9538019895553589, "learning_rate": 9.915535616784838e-05, "loss": 0.6021, "step": 18350 }, { "epoch": 0.11729680692025606, "grad_norm": 0.5672876238822937, "learning_rate": 9.915443752791917e-05, "loss": 0.7703, "step": 18360 }, { "epoch": 0.11736069406999476, "grad_norm": 0.6178574562072754, "learning_rate": 9.915351839296225e-05, "loss": 1.1465, "step": 18370 }, { "epoch": 0.11742458121973347, "grad_norm": 0.9924026131629944, "learning_rate": 9.915259876298688e-05, "loss": 0.9693, "step": 18380 }, { "epoch": 0.11748846836947216, "grad_norm": 0.9154996275901794, "learning_rate": 9.91516786380023e-05, "loss": 1.5368, "step": 18390 }, { "epoch": 0.11755235551921087, "grad_norm": 0.8077566623687744, "learning_rate": 9.91507580180178e-05, "loss": 0.9521, "step": 18400 }, { "epoch": 0.11761624266894957, "grad_norm": 0.8165660500526428, "learning_rate": 9.914983690304266e-05, "loss": 0.977, "step": 18410 }, { "epoch": 0.11768012981868826, "grad_norm": 0.46091389656066895, "learning_rate": 9.914891529308614e-05, "loss": 0.9916, "step": 18420 }, { "epoch": 0.11774401696842697, "grad_norm": 0.8578134775161743, "learning_rate": 9.914799318815751e-05, "loss": 0.8512, "step": 18430 }, { "epoch": 0.11780790411816568, "grad_norm": 1.149581789970398, "learning_rate": 9.914707058826607e-05, "loss": 1.0471, "step": 18440 }, { "epoch": 0.11787179126790437, "grad_norm": 1.0105202198028564, "learning_rate": 9.91461474934211e-05, "loss": 0.8365, "step": 18450 }, { "epoch": 0.11793567841764308, "grad_norm": 0.5020955801010132, "learning_rate": 9.914522390363194e-05, "loss": 0.9565, "step": 18460 }, { "epoch": 0.11799956556738178, "grad_norm": 0.5407631993293762, "learning_rate": 9.914429981890783e-05, "loss": 0.885, "step": 18470 }, { "epoch": 0.11806345271712047, "grad_norm": 0.5676096081733704, "learning_rate": 9.914337523925812e-05, "loss": 0.9684, "step": 18480 }, { "epoch": 0.11812733986685918, "grad_norm": 0.7046330571174622, "learning_rate": 9.91424501646921e-05, "loss": 1.2259, "step": 18490 }, { "epoch": 0.11819122701659789, "grad_norm": 0.501208484172821, "learning_rate": 9.914152459521909e-05, "loss": 0.8909, "step": 18500 }, { "epoch": 0.11825511416633658, "grad_norm": 1.0521641969680786, "learning_rate": 9.914059853084842e-05, "loss": 0.8647, "step": 18510 }, { "epoch": 0.11831900131607528, "grad_norm": 1.0477256774902344, "learning_rate": 9.913967197158942e-05, "loss": 0.8122, "step": 18520 }, { "epoch": 0.11838288846581399, "grad_norm": 1.0611625909805298, "learning_rate": 9.913874491745138e-05, "loss": 1.0057, "step": 18530 }, { "epoch": 0.11844677561555268, "grad_norm": 0.833010733127594, "learning_rate": 9.91378173684437e-05, "loss": 0.8437, "step": 18540 }, { "epoch": 0.11851066276529139, "grad_norm": 0.7416166067123413, "learning_rate": 9.913688932457567e-05, "loss": 0.9933, "step": 18550 }, { "epoch": 0.1185745499150301, "grad_norm": 1.7045838832855225, "learning_rate": 9.913596078585667e-05, "loss": 0.7402, "step": 18560 }, { "epoch": 0.11863843706476879, "grad_norm": 1.204579472541809, "learning_rate": 9.913503175229603e-05, "loss": 0.73, "step": 18570 }, { "epoch": 0.11870232421450749, "grad_norm": 2.575094223022461, "learning_rate": 9.91341022239031e-05, "loss": 1.0376, "step": 18580 }, { "epoch": 0.1187662113642462, "grad_norm": 1.0583864450454712, "learning_rate": 9.913317220068728e-05, "loss": 0.9919, "step": 18590 }, { "epoch": 0.11883009851398489, "grad_norm": 1.465122938156128, "learning_rate": 9.913224168265788e-05, "loss": 0.8039, "step": 18600 }, { "epoch": 0.1188939856637236, "grad_norm": 0.8531835675239563, "learning_rate": 9.913131066982431e-05, "loss": 0.846, "step": 18610 }, { "epoch": 0.1189578728134623, "grad_norm": 0.6930166482925415, "learning_rate": 9.913037916219594e-05, "loss": 1.1698, "step": 18620 }, { "epoch": 0.119021759963201, "grad_norm": 0.8985093832015991, "learning_rate": 9.912944715978215e-05, "loss": 1.0585, "step": 18630 }, { "epoch": 0.1190856471129397, "grad_norm": 0.7134751677513123, "learning_rate": 9.912851466259232e-05, "loss": 0.8098, "step": 18640 }, { "epoch": 0.11914953426267841, "grad_norm": 1.5828766822814941, "learning_rate": 9.912758167063585e-05, "loss": 1.116, "step": 18650 }, { "epoch": 0.1192134214124171, "grad_norm": 0.602565586566925, "learning_rate": 9.912664818392213e-05, "loss": 1.0292, "step": 18660 }, { "epoch": 0.1192773085621558, "grad_norm": 0.9910022616386414, "learning_rate": 9.912571420246057e-05, "loss": 1.0432, "step": 18670 }, { "epoch": 0.11934119571189451, "grad_norm": 0.8652639389038086, "learning_rate": 9.912477972626055e-05, "loss": 0.941, "step": 18680 }, { "epoch": 0.1194050828616332, "grad_norm": 0.6660580039024353, "learning_rate": 9.912384475533152e-05, "loss": 0.8312, "step": 18690 }, { "epoch": 0.11946897001137191, "grad_norm": 1.2698357105255127, "learning_rate": 9.912290928968286e-05, "loss": 0.6955, "step": 18700 }, { "epoch": 0.11953285716111062, "grad_norm": 0.7728399634361267, "learning_rate": 9.9121973329324e-05, "loss": 0.6761, "step": 18710 }, { "epoch": 0.11959674431084931, "grad_norm": 1.1762244701385498, "learning_rate": 9.91210368742644e-05, "loss": 0.9973, "step": 18720 }, { "epoch": 0.11966063146058802, "grad_norm": 0.9727983474731445, "learning_rate": 9.912009992451343e-05, "loss": 1.0287, "step": 18730 }, { "epoch": 0.11972451861032672, "grad_norm": 1.166279673576355, "learning_rate": 9.911916248008058e-05, "loss": 0.9455, "step": 18740 }, { "epoch": 0.11978840576006541, "grad_norm": 1.3871594667434692, "learning_rate": 9.911822454097526e-05, "loss": 0.8691, "step": 18750 }, { "epoch": 0.11985229290980412, "grad_norm": 0.7483668923377991, "learning_rate": 9.911728610720693e-05, "loss": 0.7596, "step": 18760 }, { "epoch": 0.11991618005954283, "grad_norm": 0.7471362352371216, "learning_rate": 9.911634717878505e-05, "loss": 0.7925, "step": 18770 }, { "epoch": 0.11998006720928152, "grad_norm": 0.7331792712211609, "learning_rate": 9.911540775571903e-05, "loss": 0.7732, "step": 18780 }, { "epoch": 0.12004395435902022, "grad_norm": 0.8485783934593201, "learning_rate": 9.911446783801839e-05, "loss": 1.0558, "step": 18790 }, { "epoch": 0.12010784150875893, "grad_norm": 0.63601154088974, "learning_rate": 9.911352742569255e-05, "loss": 0.8409, "step": 18800 }, { "epoch": 0.12017172865849762, "grad_norm": 1.0364725589752197, "learning_rate": 9.911258651875102e-05, "loss": 1.1726, "step": 18810 }, { "epoch": 0.12023561580823633, "grad_norm": 1.1578558683395386, "learning_rate": 9.911164511720324e-05, "loss": 1.0072, "step": 18820 }, { "epoch": 0.12029950295797504, "grad_norm": 0.622075617313385, "learning_rate": 9.911070322105871e-05, "loss": 0.7986, "step": 18830 }, { "epoch": 0.12036339010771374, "grad_norm": 0.9480080604553223, "learning_rate": 9.91097608303269e-05, "loss": 0.7835, "step": 18840 }, { "epoch": 0.12042727725745243, "grad_norm": 0.6373130679130554, "learning_rate": 9.910881794501734e-05, "loss": 1.2013, "step": 18850 }, { "epoch": 0.12049116440719114, "grad_norm": 1.1628334522247314, "learning_rate": 9.910787456513948e-05, "loss": 0.8801, "step": 18860 }, { "epoch": 0.12055505155692985, "grad_norm": 1.2941060066223145, "learning_rate": 9.910693069070285e-05, "loss": 0.8426, "step": 18870 }, { "epoch": 0.12061893870666854, "grad_norm": 1.0892646312713623, "learning_rate": 9.910598632171692e-05, "loss": 0.7019, "step": 18880 }, { "epoch": 0.12068282585640724, "grad_norm": 1.0153416395187378, "learning_rate": 9.910504145819124e-05, "loss": 0.9361, "step": 18890 }, { "epoch": 0.12074671300614595, "grad_norm": 0.8913525342941284, "learning_rate": 9.910409610013531e-05, "loss": 1.2171, "step": 18900 }, { "epoch": 0.12081060015588464, "grad_norm": 1.518178105354309, "learning_rate": 9.910315024755866e-05, "loss": 0.8538, "step": 18910 }, { "epoch": 0.12087448730562335, "grad_norm": 0.8142111301422119, "learning_rate": 9.910220390047081e-05, "loss": 0.9446, "step": 18920 }, { "epoch": 0.12093837445536205, "grad_norm": 0.6663020849227905, "learning_rate": 9.910125705888127e-05, "loss": 0.9821, "step": 18930 }, { "epoch": 0.12100226160510075, "grad_norm": 0.7732610106468201, "learning_rate": 9.91003097227996e-05, "loss": 0.7072, "step": 18940 }, { "epoch": 0.12106614875483945, "grad_norm": 0.8097338080406189, "learning_rate": 9.909936189223533e-05, "loss": 1.0208, "step": 18950 }, { "epoch": 0.12113003590457816, "grad_norm": 1.0220088958740234, "learning_rate": 9.909841356719802e-05, "loss": 0.7898, "step": 18960 }, { "epoch": 0.12119392305431685, "grad_norm": 2.5410892963409424, "learning_rate": 9.909746474769718e-05, "loss": 0.884, "step": 18970 }, { "epoch": 0.12125781020405556, "grad_norm": 1.1213639974594116, "learning_rate": 9.909651543374243e-05, "loss": 0.9554, "step": 18980 }, { "epoch": 0.12132169735379426, "grad_norm": 0.8598119020462036, "learning_rate": 9.909556562534327e-05, "loss": 1.0724, "step": 18990 }, { "epoch": 0.12138558450353296, "grad_norm": 0.89163738489151, "learning_rate": 9.90946153225093e-05, "loss": 0.9145, "step": 19000 }, { "epoch": 0.12144947165327166, "grad_norm": 0.8153218030929565, "learning_rate": 9.909366452525009e-05, "loss": 0.8033, "step": 19010 }, { "epoch": 0.12151335880301037, "grad_norm": 0.8267776966094971, "learning_rate": 9.90927132335752e-05, "loss": 0.9408, "step": 19020 }, { "epoch": 0.12157724595274906, "grad_norm": 0.794154942035675, "learning_rate": 9.909176144749421e-05, "loss": 0.9167, "step": 19030 }, { "epoch": 0.12164113310248777, "grad_norm": 0.9239640831947327, "learning_rate": 9.909080916701672e-05, "loss": 0.9062, "step": 19040 }, { "epoch": 0.12170502025222647, "grad_norm": 1.426063060760498, "learning_rate": 9.908995169188589e-05, "loss": 0.9635, "step": 19050 }, { "epoch": 0.12176890740196517, "grad_norm": 0.840755820274353, "learning_rate": 9.908899847208145e-05, "loss": 0.6732, "step": 19060 }, { "epoch": 0.12183279455170387, "grad_norm": 1.245961308479309, "learning_rate": 9.908804475790834e-05, "loss": 1.0316, "step": 19070 }, { "epoch": 0.12189668170144258, "grad_norm": 0.5957521796226501, "learning_rate": 9.908709054937615e-05, "loss": 0.7994, "step": 19080 }, { "epoch": 0.12196056885118127, "grad_norm": 0.753171980381012, "learning_rate": 9.908613584649447e-05, "loss": 0.7903, "step": 19090 }, { "epoch": 0.12202445600091998, "grad_norm": 0.5334873199462891, "learning_rate": 9.908518064927297e-05, "loss": 0.8806, "step": 19100 }, { "epoch": 0.12208834315065868, "grad_norm": 0.7774950265884399, "learning_rate": 9.908422495772121e-05, "loss": 0.785, "step": 19110 }, { "epoch": 0.12215223030039737, "grad_norm": 1.0679373741149902, "learning_rate": 9.908326877184885e-05, "loss": 1.1829, "step": 19120 }, { "epoch": 0.12221611745013608, "grad_norm": 0.9180088043212891, "learning_rate": 9.908231209166552e-05, "loss": 0.943, "step": 19130 }, { "epoch": 0.12228000459987479, "grad_norm": 2.2565629482269287, "learning_rate": 9.908135491718082e-05, "loss": 0.7051, "step": 19140 }, { "epoch": 0.12234389174961348, "grad_norm": 0.5851088762283325, "learning_rate": 9.908039724840444e-05, "loss": 0.8, "step": 19150 }, { "epoch": 0.12240777889935218, "grad_norm": 1.1300508975982666, "learning_rate": 9.9079439085346e-05, "loss": 1.0469, "step": 19160 }, { "epoch": 0.12247166604909089, "grad_norm": 1.3692076206207275, "learning_rate": 9.907848042801514e-05, "loss": 0.8056, "step": 19170 }, { "epoch": 0.12253555319882958, "grad_norm": 0.7391330599784851, "learning_rate": 9.907752127642151e-05, "loss": 1.0543, "step": 19180 }, { "epoch": 0.12259944034856829, "grad_norm": 1.7373781204223633, "learning_rate": 9.90765616305748e-05, "loss": 1.1021, "step": 19190 }, { "epoch": 0.122663327498307, "grad_norm": 1.2597390413284302, "learning_rate": 9.907560149048465e-05, "loss": 1.0209, "step": 19200 }, { "epoch": 0.12272721464804569, "grad_norm": 0.7740830183029175, "learning_rate": 9.907464085616073e-05, "loss": 0.8195, "step": 19210 }, { "epoch": 0.1227911017977844, "grad_norm": 0.8929482698440552, "learning_rate": 9.907367972761273e-05, "loss": 0.8193, "step": 19220 }, { "epoch": 0.1228549889475231, "grad_norm": 0.854239821434021, "learning_rate": 9.907271810485033e-05, "loss": 0.9699, "step": 19230 }, { "epoch": 0.12291887609726179, "grad_norm": 1.0040228366851807, "learning_rate": 9.907175598788319e-05, "loss": 0.8653, "step": 19240 }, { "epoch": 0.1229827632470005, "grad_norm": 0.9501043558120728, "learning_rate": 9.907079337672102e-05, "loss": 1.2441, "step": 19250 }, { "epoch": 0.1230466503967392, "grad_norm": 0.9891424179077148, "learning_rate": 9.90698302713735e-05, "loss": 1.0697, "step": 19260 }, { "epoch": 0.1231105375464779, "grad_norm": 0.7450829148292542, "learning_rate": 9.906886667185034e-05, "loss": 0.883, "step": 19270 }, { "epoch": 0.1231744246962166, "grad_norm": 0.9859048128128052, "learning_rate": 9.906790257816125e-05, "loss": 1.0223, "step": 19280 }, { "epoch": 0.12323831184595531, "grad_norm": 0.6718336343765259, "learning_rate": 9.906693799031593e-05, "loss": 0.7721, "step": 19290 }, { "epoch": 0.123302198995694, "grad_norm": 0.9734120965003967, "learning_rate": 9.90659729083241e-05, "loss": 1.1092, "step": 19300 }, { "epoch": 0.12336608614543271, "grad_norm": 0.5610973238945007, "learning_rate": 9.906500733219545e-05, "loss": 0.8074, "step": 19310 }, { "epoch": 0.12342997329517141, "grad_norm": 0.9786707162857056, "learning_rate": 9.906404126193976e-05, "loss": 0.8548, "step": 19320 }, { "epoch": 0.1234938604449101, "grad_norm": 0.71066814661026, "learning_rate": 9.90630746975667e-05, "loss": 0.9489, "step": 19330 }, { "epoch": 0.12355774759464881, "grad_norm": 1.0802106857299805, "learning_rate": 9.906210763908606e-05, "loss": 1.0818, "step": 19340 }, { "epoch": 0.12362163474438752, "grad_norm": 0.8210603594779968, "learning_rate": 9.906114008650753e-05, "loss": 1.1651, "step": 19350 }, { "epoch": 0.12368552189412621, "grad_norm": 1.066074252128601, "learning_rate": 9.906017203984089e-05, "loss": 1.1113, "step": 19360 }, { "epoch": 0.12374940904386492, "grad_norm": 1.0652400255203247, "learning_rate": 9.905920349909587e-05, "loss": 0.8688, "step": 19370 }, { "epoch": 0.12381329619360362, "grad_norm": 0.6207056045532227, "learning_rate": 9.905823446428222e-05, "loss": 0.7867, "step": 19380 }, { "epoch": 0.12387718334334231, "grad_norm": 0.508903443813324, "learning_rate": 9.905726493540972e-05, "loss": 0.7805, "step": 19390 }, { "epoch": 0.12394107049308102, "grad_norm": 1.3334448337554932, "learning_rate": 9.905629491248812e-05, "loss": 1.1862, "step": 19400 }, { "epoch": 0.12400495764281973, "grad_norm": 0.6775515675544739, "learning_rate": 9.905532439552718e-05, "loss": 1.0348, "step": 19410 }, { "epoch": 0.12406884479255842, "grad_norm": 0.628044605255127, "learning_rate": 9.905435338453668e-05, "loss": 0.8879, "step": 19420 }, { "epoch": 0.12413273194229713, "grad_norm": 0.4216572940349579, "learning_rate": 9.905338187952642e-05, "loss": 0.9814, "step": 19430 }, { "epoch": 0.12419661909203583, "grad_norm": 0.9256001710891724, "learning_rate": 9.905240988050616e-05, "loss": 0.834, "step": 19440 }, { "epoch": 0.12426050624177452, "grad_norm": 1.2580517530441284, "learning_rate": 9.90514373874857e-05, "loss": 0.8597, "step": 19450 }, { "epoch": 0.12432439339151323, "grad_norm": 0.9285855889320374, "learning_rate": 9.905046440047483e-05, "loss": 0.8476, "step": 19460 }, { "epoch": 0.12438828054125194, "grad_norm": 4.265834808349609, "learning_rate": 9.904949091948335e-05, "loss": 0.7808, "step": 19470 }, { "epoch": 0.12445216769099063, "grad_norm": 1.1027454137802124, "learning_rate": 9.904851694452105e-05, "loss": 0.9509, "step": 19480 }, { "epoch": 0.12451605484072933, "grad_norm": 0.7222440838813782, "learning_rate": 9.904754247559776e-05, "loss": 1.19, "step": 19490 }, { "epoch": 0.12457994199046804, "grad_norm": 0.9820877909660339, "learning_rate": 9.904656751272328e-05, "loss": 1.1383, "step": 19500 }, { "epoch": 0.12464382914020673, "grad_norm": 0.521395742893219, "learning_rate": 9.904559205590744e-05, "loss": 0.7945, "step": 19510 }, { "epoch": 0.12470771628994544, "grad_norm": 0.819299042224884, "learning_rate": 9.904461610516006e-05, "loss": 0.9847, "step": 19520 }, { "epoch": 0.12477160343968415, "grad_norm": 0.7167036533355713, "learning_rate": 9.904363966049098e-05, "loss": 0.9058, "step": 19530 }, { "epoch": 0.12483549058942284, "grad_norm": 0.9135296940803528, "learning_rate": 9.904266272190999e-05, "loss": 1.0799, "step": 19540 }, { "epoch": 0.12489937773916154, "grad_norm": 0.9460045695304871, "learning_rate": 9.904168528942696e-05, "loss": 0.8938, "step": 19550 }, { "epoch": 0.12496326488890025, "grad_norm": 0.8096686601638794, "learning_rate": 9.904070736305176e-05, "loss": 0.9354, "step": 19560 }, { "epoch": 0.12502715203863896, "grad_norm": 0.8548075556755066, "learning_rate": 9.903972894279419e-05, "loss": 1.0604, "step": 19570 }, { "epoch": 0.12509103918837766, "grad_norm": 0.9655779600143433, "learning_rate": 9.903875002866412e-05, "loss": 0.9133, "step": 19580 }, { "epoch": 0.12515492633811634, "grad_norm": 0.6967488527297974, "learning_rate": 9.903777062067142e-05, "loss": 0.9566, "step": 19590 }, { "epoch": 0.12521881348785505, "grad_norm": 0.5470744967460632, "learning_rate": 9.903679071882594e-05, "loss": 1.1614, "step": 19600 }, { "epoch": 0.12528270063759375, "grad_norm": 0.44721782207489014, "learning_rate": 9.903581032313757e-05, "loss": 0.8072, "step": 19610 }, { "epoch": 0.12534658778733246, "grad_norm": 0.6788471937179565, "learning_rate": 9.903482943361616e-05, "loss": 0.7861, "step": 19620 }, { "epoch": 0.12541047493707116, "grad_norm": 1.1422935724258423, "learning_rate": 9.90338480502716e-05, "loss": 0.7205, "step": 19630 }, { "epoch": 0.12547436208680987, "grad_norm": 0.9505433440208435, "learning_rate": 9.903286617311375e-05, "loss": 0.9314, "step": 19640 }, { "epoch": 0.12553824923654855, "grad_norm": 1.1754378080368042, "learning_rate": 9.903188380215254e-05, "loss": 1.0777, "step": 19650 }, { "epoch": 0.12560213638628726, "grad_norm": 0.8856581449508667, "learning_rate": 9.903090093739784e-05, "loss": 0.8573, "step": 19660 }, { "epoch": 0.12566602353602596, "grad_norm": 0.9377738237380981, "learning_rate": 9.902991757885955e-05, "loss": 1.1693, "step": 19670 }, { "epoch": 0.12572991068576467, "grad_norm": 1.0631327629089355, "learning_rate": 9.902893372654755e-05, "loss": 1.0915, "step": 19680 }, { "epoch": 0.12579379783550337, "grad_norm": 0.9726115465164185, "learning_rate": 9.902794938047179e-05, "loss": 0.8837, "step": 19690 }, { "epoch": 0.12585768498524208, "grad_norm": 1.0716935396194458, "learning_rate": 9.902696454064218e-05, "loss": 0.9323, "step": 19700 }, { "epoch": 0.12592157213498076, "grad_norm": 0.6799229383468628, "learning_rate": 9.90259792070686e-05, "loss": 1.0071, "step": 19710 }, { "epoch": 0.12598545928471946, "grad_norm": 0.6276800632476807, "learning_rate": 9.9024993379761e-05, "loss": 0.7668, "step": 19720 }, { "epoch": 0.12604934643445817, "grad_norm": 0.6206457018852234, "learning_rate": 9.902400705872931e-05, "loss": 0.9062, "step": 19730 }, { "epoch": 0.12611323358419688, "grad_norm": 0.8407792448997498, "learning_rate": 9.902302024398344e-05, "loss": 0.9027, "step": 19740 }, { "epoch": 0.12617712073393558, "grad_norm": 1.0708434581756592, "learning_rate": 9.902203293553337e-05, "loss": 1.1047, "step": 19750 }, { "epoch": 0.1262410078836743, "grad_norm": 0.7790530920028687, "learning_rate": 9.902104513338901e-05, "loss": 0.9325, "step": 19760 }, { "epoch": 0.12630489503341297, "grad_norm": 0.8316869139671326, "learning_rate": 9.90200568375603e-05, "loss": 0.7061, "step": 19770 }, { "epoch": 0.12636878218315167, "grad_norm": 1.0187642574310303, "learning_rate": 9.901906804805723e-05, "loss": 1.0777, "step": 19780 }, { "epoch": 0.12643266933289038, "grad_norm": 0.5136988759040833, "learning_rate": 9.901807876488973e-05, "loss": 0.9242, "step": 19790 }, { "epoch": 0.12649655648262909, "grad_norm": 0.8000445365905762, "learning_rate": 9.901708898806777e-05, "loss": 0.9573, "step": 19800 }, { "epoch": 0.1265604436323678, "grad_norm": 0.6587111353874207, "learning_rate": 9.901609871760132e-05, "loss": 1.0622, "step": 19810 }, { "epoch": 0.1266243307821065, "grad_norm": 0.7772683501243591, "learning_rate": 9.901510795350035e-05, "loss": 0.9968, "step": 19820 }, { "epoch": 0.12668821793184518, "grad_norm": 0.578628659248352, "learning_rate": 9.901411669577484e-05, "loss": 0.8025, "step": 19830 }, { "epoch": 0.12675210508158388, "grad_norm": 0.5878568887710571, "learning_rate": 9.901312494443477e-05, "loss": 0.9639, "step": 19840 }, { "epoch": 0.1268159922313226, "grad_norm": 1.2923487424850464, "learning_rate": 9.901213269949013e-05, "loss": 0.8744, "step": 19850 }, { "epoch": 0.1268798793810613, "grad_norm": 0.8328975439071655, "learning_rate": 9.90111399609509e-05, "loss": 0.8774, "step": 19860 }, { "epoch": 0.1269437665308, "grad_norm": 0.5888987183570862, "learning_rate": 9.901024607425051e-05, "loss": 0.8943, "step": 19870 }, { "epoch": 0.1270076536805387, "grad_norm": 0.5450535416603088, "learning_rate": 9.900925239790913e-05, "loss": 0.8265, "step": 19880 }, { "epoch": 0.12707154083027739, "grad_norm": 1.1033037900924683, "learning_rate": 9.90082582280022e-05, "loss": 1.0133, "step": 19890 }, { "epoch": 0.1271354279800161, "grad_norm": 0.7691605687141418, "learning_rate": 9.90072635645397e-05, "loss": 1.0807, "step": 19900 }, { "epoch": 0.1271993151297548, "grad_norm": 0.5714837908744812, "learning_rate": 9.900626840753167e-05, "loss": 0.8473, "step": 19910 }, { "epoch": 0.1272632022794935, "grad_norm": 0.5955528020858765, "learning_rate": 9.90052727569881e-05, "loss": 0.9808, "step": 19920 }, { "epoch": 0.1273270894292322, "grad_norm": 0.6563436388969421, "learning_rate": 9.900427661291904e-05, "loss": 0.9406, "step": 19930 }, { "epoch": 0.12739097657897092, "grad_norm": 1.398422360420227, "learning_rate": 9.900327997533454e-05, "loss": 1.1866, "step": 19940 }, { "epoch": 0.1274548637287096, "grad_norm": 0.8855098485946655, "learning_rate": 9.900228284424459e-05, "loss": 1.167, "step": 19950 }, { "epoch": 0.1275187508784483, "grad_norm": 0.8752385973930359, "learning_rate": 9.900128521965927e-05, "loss": 0.9714, "step": 19960 }, { "epoch": 0.127582638028187, "grad_norm": 0.7587289810180664, "learning_rate": 9.900028710158865e-05, "loss": 1.1985, "step": 19970 }, { "epoch": 0.1276465251779257, "grad_norm": 0.683338463306427, "learning_rate": 9.899928849004269e-05, "loss": 1.0779, "step": 19980 }, { "epoch": 0.12771041232766442, "grad_norm": 0.738228440284729, "learning_rate": 9.899828938503155e-05, "loss": 0.8112, "step": 19990 }, { "epoch": 0.12777429947740312, "grad_norm": 1.1224406957626343, "learning_rate": 9.899728978656521e-05, "loss": 0.707, "step": 20000 }, { "epoch": 0.1278381866271418, "grad_norm": 1.0595028400421143, "learning_rate": 9.89962896946538e-05, "loss": 1.296, "step": 20010 }, { "epoch": 0.1279020737768805, "grad_norm": 0.972698986530304, "learning_rate": 9.899528910930736e-05, "loss": 0.9258, "step": 20020 }, { "epoch": 0.12796596092661922, "grad_norm": 0.7331506609916687, "learning_rate": 9.899428803053597e-05, "loss": 0.8608, "step": 20030 }, { "epoch": 0.12802984807635792, "grad_norm": 0.9206950664520264, "learning_rate": 9.899328645834971e-05, "loss": 0.9087, "step": 20040 }, { "epoch": 0.12809373522609663, "grad_norm": 2.2389299869537354, "learning_rate": 9.899228439275867e-05, "loss": 0.9422, "step": 20050 }, { "epoch": 0.12815762237583533, "grad_norm": 1.7067959308624268, "learning_rate": 9.899128183377294e-05, "loss": 0.8746, "step": 20060 }, { "epoch": 0.128221509525574, "grad_norm": 0.6370442509651184, "learning_rate": 9.899027878140264e-05, "loss": 1.1108, "step": 20070 }, { "epoch": 0.12828539667531272, "grad_norm": 0.7334869503974915, "learning_rate": 9.898927523565782e-05, "loss": 0.7668, "step": 20080 }, { "epoch": 0.12834928382505142, "grad_norm": 0.948521077632904, "learning_rate": 9.898827119654864e-05, "loss": 0.9522, "step": 20090 }, { "epoch": 0.12841317097479013, "grad_norm": 1.9327528476715088, "learning_rate": 9.898726666408516e-05, "loss": 0.8717, "step": 20100 }, { "epoch": 0.12847705812452884, "grad_norm": 0.8920581936836243, "learning_rate": 9.898626163827755e-05, "loss": 0.956, "step": 20110 }, { "epoch": 0.12854094527426754, "grad_norm": 0.7983399033546448, "learning_rate": 9.89852561191359e-05, "loss": 0.8899, "step": 20120 }, { "epoch": 0.12860483242400625, "grad_norm": 0.9559574723243713, "learning_rate": 9.898425010667035e-05, "loss": 0.9156, "step": 20130 }, { "epoch": 0.12866871957374493, "grad_norm": 0.5370156764984131, "learning_rate": 9.898324360089099e-05, "loss": 0.8623, "step": 20140 }, { "epoch": 0.12873260672348363, "grad_norm": 1.084375262260437, "learning_rate": 9.898223660180802e-05, "loss": 0.7424, "step": 20150 }, { "epoch": 0.12879649387322234, "grad_norm": 0.6435216069221497, "learning_rate": 9.898122910943155e-05, "loss": 0.7816, "step": 20160 }, { "epoch": 0.12886038102296105, "grad_norm": 0.8738903999328613, "learning_rate": 9.898022112377172e-05, "loss": 0.7824, "step": 20170 }, { "epoch": 0.12892426817269975, "grad_norm": 0.943022608757019, "learning_rate": 9.89792126448387e-05, "loss": 1.1974, "step": 20180 }, { "epoch": 0.12898815532243846, "grad_norm": 0.9258697032928467, "learning_rate": 9.897820367264262e-05, "loss": 0.9837, "step": 20190 }, { "epoch": 0.12905204247217714, "grad_norm": 0.8255495429039001, "learning_rate": 9.897719420719367e-05, "loss": 0.8147, "step": 20200 }, { "epoch": 0.12911592962191584, "grad_norm": 0.5483478307723999, "learning_rate": 9.897618424850199e-05, "loss": 0.9607, "step": 20210 }, { "epoch": 0.12917981677165455, "grad_norm": 0.976705014705658, "learning_rate": 9.897517379657778e-05, "loss": 0.9184, "step": 20220 }, { "epoch": 0.12924370392139325, "grad_norm": 0.66350257396698, "learning_rate": 9.89741628514312e-05, "loss": 0.8475, "step": 20230 }, { "epoch": 0.12930759107113196, "grad_norm": 0.9961204528808594, "learning_rate": 9.897315141307242e-05, "loss": 0.9149, "step": 20240 }, { "epoch": 0.12937147822087067, "grad_norm": 0.8872457146644592, "learning_rate": 9.897213948151165e-05, "loss": 0.8368, "step": 20250 }, { "epoch": 0.12943536537060935, "grad_norm": 1.1536744832992554, "learning_rate": 9.897112705675906e-05, "loss": 0.8775, "step": 20260 }, { "epoch": 0.12949925252034805, "grad_norm": 0.835328221321106, "learning_rate": 9.897011413882484e-05, "loss": 0.8357, "step": 20270 }, { "epoch": 0.12956313967008676, "grad_norm": 0.5641841292381287, "learning_rate": 9.896910072771924e-05, "loss": 0.9148, "step": 20280 }, { "epoch": 0.12962702681982546, "grad_norm": 0.9598913192749023, "learning_rate": 9.89680868234524e-05, "loss": 0.8754, "step": 20290 }, { "epoch": 0.12969091396956417, "grad_norm": 0.7789944410324097, "learning_rate": 9.896707242603457e-05, "loss": 0.8845, "step": 20300 }, { "epoch": 0.12975480111930288, "grad_norm": 1.1389309167861938, "learning_rate": 9.896605753547596e-05, "loss": 1.028, "step": 20310 }, { "epoch": 0.12981868826904155, "grad_norm": 0.8242889642715454, "learning_rate": 9.896504215178681e-05, "loss": 0.7889, "step": 20320 }, { "epoch": 0.12988257541878026, "grad_norm": 1.3238638639450073, "learning_rate": 9.89640262749773e-05, "loss": 0.8451, "step": 20330 }, { "epoch": 0.12994646256851897, "grad_norm": 1.0306720733642578, "learning_rate": 9.896300990505768e-05, "loss": 0.9655, "step": 20340 }, { "epoch": 0.13001034971825767, "grad_norm": 0.7990890145301819, "learning_rate": 9.896199304203821e-05, "loss": 0.9537, "step": 20350 }, { "epoch": 0.13007423686799638, "grad_norm": 0.8819360136985779, "learning_rate": 9.89609756859291e-05, "loss": 0.9703, "step": 20360 }, { "epoch": 0.13013812401773509, "grad_norm": 0.8472315669059753, "learning_rate": 9.895995783674061e-05, "loss": 1.1459, "step": 20370 }, { "epoch": 0.13020201116747376, "grad_norm": 0.8132781386375427, "learning_rate": 9.895893949448301e-05, "loss": 1.2826, "step": 20380 }, { "epoch": 0.13026589831721247, "grad_norm": 1.0438861846923828, "learning_rate": 9.89579206591665e-05, "loss": 0.846, "step": 20390 }, { "epoch": 0.13032978546695118, "grad_norm": 1.2839152812957764, "learning_rate": 9.89569013308014e-05, "loss": 0.798, "step": 20400 }, { "epoch": 0.13039367261668988, "grad_norm": 0.7642764449119568, "learning_rate": 9.895588150939794e-05, "loss": 1.2106, "step": 20410 }, { "epoch": 0.1304575597664286, "grad_norm": 1.4906141757965088, "learning_rate": 9.89548611949664e-05, "loss": 1.0197, "step": 20420 }, { "epoch": 0.1305214469161673, "grad_norm": 1.0365071296691895, "learning_rate": 9.895384038751705e-05, "loss": 0.793, "step": 20430 }, { "epoch": 0.13058533406590597, "grad_norm": 0.7034469842910767, "learning_rate": 9.895281908706018e-05, "loss": 1.0824, "step": 20440 }, { "epoch": 0.13064922121564468, "grad_norm": 0.8058176636695862, "learning_rate": 9.895179729360606e-05, "loss": 0.9053, "step": 20450 }, { "epoch": 0.13071310836538338, "grad_norm": 1.0343101024627686, "learning_rate": 9.8950775007165e-05, "loss": 1.0945, "step": 20460 }, { "epoch": 0.1307769955151221, "grad_norm": 0.7652077674865723, "learning_rate": 9.89497522277473e-05, "loss": 0.8568, "step": 20470 }, { "epoch": 0.1308408826648608, "grad_norm": 0.6593330502510071, "learning_rate": 9.894872895536325e-05, "loss": 0.9574, "step": 20480 }, { "epoch": 0.1309047698145995, "grad_norm": 1.9122685194015503, "learning_rate": 9.894770519002314e-05, "loss": 0.8306, "step": 20490 }, { "epoch": 0.13096865696433818, "grad_norm": 0.57440185546875, "learning_rate": 9.894668093173729e-05, "loss": 0.6806, "step": 20500 }, { "epoch": 0.1310325441140769, "grad_norm": 0.5228521227836609, "learning_rate": 9.894565618051603e-05, "loss": 0.9544, "step": 20510 }, { "epoch": 0.1310964312638156, "grad_norm": 0.6962705850601196, "learning_rate": 9.894463093636966e-05, "loss": 0.7487, "step": 20520 }, { "epoch": 0.1311603184135543, "grad_norm": 0.92603999376297, "learning_rate": 9.89436051993085e-05, "loss": 0.7536, "step": 20530 }, { "epoch": 0.131224205563293, "grad_norm": 1.5977349281311035, "learning_rate": 9.894257896934292e-05, "loss": 1.0637, "step": 20540 }, { "epoch": 0.1312880927130317, "grad_norm": 1.1071442365646362, "learning_rate": 9.894155224648322e-05, "loss": 0.7441, "step": 20550 }, { "epoch": 0.1313519798627704, "grad_norm": 0.576611340045929, "learning_rate": 9.894052503073973e-05, "loss": 0.7077, "step": 20560 }, { "epoch": 0.1314158670125091, "grad_norm": 0.7525666356086731, "learning_rate": 9.893949732212284e-05, "loss": 0.9049, "step": 20570 }, { "epoch": 0.1314797541622478, "grad_norm": 0.7113981246948242, "learning_rate": 9.893846912064287e-05, "loss": 1.0453, "step": 20580 }, { "epoch": 0.1315436413119865, "grad_norm": 0.9703547358512878, "learning_rate": 9.893744042631016e-05, "loss": 0.903, "step": 20590 }, { "epoch": 0.13160752846172522, "grad_norm": 0.8187039494514465, "learning_rate": 9.89364112391351e-05, "loss": 0.7462, "step": 20600 }, { "epoch": 0.13167141561146392, "grad_norm": 0.6756948232650757, "learning_rate": 9.893538155912804e-05, "loss": 0.8157, "step": 20610 }, { "epoch": 0.1317353027612026, "grad_norm": 1.0830146074295044, "learning_rate": 9.893435138629936e-05, "loss": 0.7643, "step": 20620 }, { "epoch": 0.1317991899109413, "grad_norm": 1.8327852487564087, "learning_rate": 9.893332072065942e-05, "loss": 0.9862, "step": 20630 }, { "epoch": 0.13186307706068, "grad_norm": 2.034275770187378, "learning_rate": 9.893228956221861e-05, "loss": 0.8296, "step": 20640 }, { "epoch": 0.13192696421041872, "grad_norm": 0.7762085795402527, "learning_rate": 9.893125791098729e-05, "loss": 0.8986, "step": 20650 }, { "epoch": 0.13199085136015742, "grad_norm": 1.0018727779388428, "learning_rate": 9.89302257669759e-05, "loss": 0.9179, "step": 20660 }, { "epoch": 0.13205473850989613, "grad_norm": 1.3458504676818848, "learning_rate": 9.89291931301948e-05, "loss": 0.7677, "step": 20670 }, { "epoch": 0.1321186256596348, "grad_norm": 0.7849268913269043, "learning_rate": 9.89281600006544e-05, "loss": 1.1385, "step": 20680 }, { "epoch": 0.13218251280937351, "grad_norm": 0.9244788289070129, "learning_rate": 9.892712637836507e-05, "loss": 0.8751, "step": 20690 }, { "epoch": 0.13224639995911222, "grad_norm": 0.7756919860839844, "learning_rate": 9.892609226333728e-05, "loss": 0.8581, "step": 20700 }, { "epoch": 0.13231028710885093, "grad_norm": 0.7075464129447937, "learning_rate": 9.89250576555814e-05, "loss": 0.8242, "step": 20710 }, { "epoch": 0.13237417425858963, "grad_norm": 0.8638562560081482, "learning_rate": 9.892402255510786e-05, "loss": 0.9992, "step": 20720 }, { "epoch": 0.13243806140832834, "grad_norm": 0.9571630954742432, "learning_rate": 9.89229869619271e-05, "loss": 0.9963, "step": 20730 }, { "epoch": 0.13250194855806702, "grad_norm": 1.0435787439346313, "learning_rate": 9.892195087604954e-05, "loss": 0.8632, "step": 20740 }, { "epoch": 0.13256583570780572, "grad_norm": 1.1710478067398071, "learning_rate": 9.89209142974856e-05, "loss": 0.8433, "step": 20750 }, { "epoch": 0.13262972285754443, "grad_norm": 0.6886267066001892, "learning_rate": 9.891987722624574e-05, "loss": 1.0012, "step": 20760 }, { "epoch": 0.13269361000728314, "grad_norm": 1.172371745109558, "learning_rate": 9.89188396623404e-05, "loss": 0.9275, "step": 20770 }, { "epoch": 0.13275749715702184, "grad_norm": 0.8536580204963684, "learning_rate": 9.891780160577999e-05, "loss": 1.0204, "step": 20780 }, { "epoch": 0.13282138430676055, "grad_norm": 0.8853366374969482, "learning_rate": 9.891676305657502e-05, "loss": 0.913, "step": 20790 }, { "epoch": 0.13288527145649923, "grad_norm": 0.9350702166557312, "learning_rate": 9.891572401473594e-05, "loss": 0.8241, "step": 20800 }, { "epoch": 0.13294915860623793, "grad_norm": 0.7683811783790588, "learning_rate": 9.891468448027318e-05, "loss": 0.7284, "step": 20810 }, { "epoch": 0.13301304575597664, "grad_norm": 1.978036880493164, "learning_rate": 9.891364445319723e-05, "loss": 0.9082, "step": 20820 }, { "epoch": 0.13307693290571534, "grad_norm": 0.828632652759552, "learning_rate": 9.891260393351858e-05, "loss": 0.7259, "step": 20830 }, { "epoch": 0.13314082005545405, "grad_norm": 0.8856496810913086, "learning_rate": 9.891156292124768e-05, "loss": 0.8231, "step": 20840 }, { "epoch": 0.13320470720519276, "grad_norm": 0.9500540494918823, "learning_rate": 9.891052141639505e-05, "loss": 0.8454, "step": 20850 }, { "epoch": 0.13326859435493144, "grad_norm": 0.6504539251327515, "learning_rate": 9.890947941897113e-05, "loss": 0.8924, "step": 20860 }, { "epoch": 0.13333248150467014, "grad_norm": 0.9036272168159485, "learning_rate": 9.890843692898644e-05, "loss": 1.2291, "step": 20870 }, { "epoch": 0.13339636865440885, "grad_norm": 0.487404465675354, "learning_rate": 9.890739394645149e-05, "loss": 0.9082, "step": 20880 }, { "epoch": 0.13346025580414755, "grad_norm": 0.6194189786911011, "learning_rate": 9.890635047137678e-05, "loss": 0.8234, "step": 20890 }, { "epoch": 0.13352414295388626, "grad_norm": 1.1986579895019531, "learning_rate": 9.890530650377279e-05, "loss": 1.0975, "step": 20900 }, { "epoch": 0.13358803010362497, "grad_norm": 0.5254888534545898, "learning_rate": 9.890426204365006e-05, "loss": 0.9337, "step": 20910 }, { "epoch": 0.13365191725336364, "grad_norm": 2.1500959396362305, "learning_rate": 9.890321709101911e-05, "loss": 0.9268, "step": 20920 }, { "epoch": 0.13371580440310235, "grad_norm": 1.653495192527771, "learning_rate": 9.890217164589044e-05, "loss": 0.805, "step": 20930 }, { "epoch": 0.13377969155284106, "grad_norm": 0.9262358546257019, "learning_rate": 9.890112570827461e-05, "loss": 0.7364, "step": 20940 }, { "epoch": 0.13384357870257976, "grad_norm": 0.7505791187286377, "learning_rate": 9.890007927818214e-05, "loss": 0.836, "step": 20950 }, { "epoch": 0.13390746585231847, "grad_norm": 0.7554075717926025, "learning_rate": 9.889903235562357e-05, "loss": 1.0677, "step": 20960 }, { "epoch": 0.13397135300205718, "grad_norm": 1.8679813146591187, "learning_rate": 9.889798494060942e-05, "loss": 0.818, "step": 20970 }, { "epoch": 0.13403524015179588, "grad_norm": 1.584902286529541, "learning_rate": 9.889693703315029e-05, "loss": 1.1151, "step": 20980 }, { "epoch": 0.13409912730153456, "grad_norm": 0.8589569330215454, "learning_rate": 9.889588863325667e-05, "loss": 0.9884, "step": 20990 }, { "epoch": 0.13416301445127327, "grad_norm": 0.9949905872344971, "learning_rate": 9.889483974093917e-05, "loss": 0.925, "step": 21000 }, { "epoch": 0.13422690160101197, "grad_norm": 0.6873974204063416, "learning_rate": 9.889379035620833e-05, "loss": 0.9067, "step": 21010 }, { "epoch": 0.13429078875075068, "grad_norm": 2.3519535064697266, "learning_rate": 9.889274047907472e-05, "loss": 0.9542, "step": 21020 }, { "epoch": 0.13435467590048938, "grad_norm": 0.6520812511444092, "learning_rate": 9.889169010954892e-05, "loss": 0.9918, "step": 21030 }, { "epoch": 0.1344185630502281, "grad_norm": 0.6458450555801392, "learning_rate": 9.88906392476415e-05, "loss": 1.0032, "step": 21040 }, { "epoch": 0.13448245019996677, "grad_norm": 1.0632940530776978, "learning_rate": 9.888958789336304e-05, "loss": 1.0281, "step": 21050 }, { "epoch": 0.13454633734970547, "grad_norm": 0.8738301992416382, "learning_rate": 9.888853604672415e-05, "loss": 0.8943, "step": 21060 }, { "epoch": 0.13461022449944418, "grad_norm": 0.8271169662475586, "learning_rate": 9.88874837077354e-05, "loss": 0.8891, "step": 21070 }, { "epoch": 0.1346741116491829, "grad_norm": 0.7805771827697754, "learning_rate": 9.888643087640739e-05, "loss": 0.9641, "step": 21080 }, { "epoch": 0.1347379987989216, "grad_norm": 1.1134415864944458, "learning_rate": 9.888537755275073e-05, "loss": 0.8162, "step": 21090 }, { "epoch": 0.1348018859486603, "grad_norm": 0.9039101004600525, "learning_rate": 9.888432373677602e-05, "loss": 1.0201, "step": 21100 }, { "epoch": 0.13486577309839898, "grad_norm": 0.8428747653961182, "learning_rate": 9.888326942849389e-05, "loss": 0.8404, "step": 21110 }, { "epoch": 0.13492966024813768, "grad_norm": 0.8015506267547607, "learning_rate": 9.888221462791493e-05, "loss": 1.0085, "step": 21120 }, { "epoch": 0.1349935473978764, "grad_norm": 1.1341489553451538, "learning_rate": 9.88811593350498e-05, "loss": 0.9912, "step": 21130 }, { "epoch": 0.1350574345476151, "grad_norm": 0.8308176398277283, "learning_rate": 9.888010354990911e-05, "loss": 0.7831, "step": 21140 }, { "epoch": 0.1351213216973538, "grad_norm": 0.8086538910865784, "learning_rate": 9.887904727250348e-05, "loss": 0.8645, "step": 21150 }, { "epoch": 0.1351852088470925, "grad_norm": 0.5411624908447266, "learning_rate": 9.887799050284355e-05, "loss": 1.0745, "step": 21160 }, { "epoch": 0.1352490959968312, "grad_norm": 0.8454309105873108, "learning_rate": 9.887693324093998e-05, "loss": 0.8502, "step": 21170 }, { "epoch": 0.1353129831465699, "grad_norm": 0.8707975149154663, "learning_rate": 9.88758754868034e-05, "loss": 0.9231, "step": 21180 }, { "epoch": 0.1353768702963086, "grad_norm": 0.819693386554718, "learning_rate": 9.887481724044447e-05, "loss": 0.8355, "step": 21190 }, { "epoch": 0.1354407574460473, "grad_norm": 0.7734857201576233, "learning_rate": 9.887375850187386e-05, "loss": 1.1568, "step": 21200 }, { "epoch": 0.135504644595786, "grad_norm": 0.6396207809448242, "learning_rate": 9.887269927110222e-05, "loss": 0.9182, "step": 21210 }, { "epoch": 0.13556853174552472, "grad_norm": 1.281610369682312, "learning_rate": 9.88716395481402e-05, "loss": 0.9132, "step": 21220 }, { "epoch": 0.1356324188952634, "grad_norm": 1.1592093706130981, "learning_rate": 9.88705793329985e-05, "loss": 0.9757, "step": 21230 }, { "epoch": 0.1356963060450021, "grad_norm": 0.7820732593536377, "learning_rate": 9.88695186256878e-05, "loss": 0.9314, "step": 21240 }, { "epoch": 0.1357601931947408, "grad_norm": 0.7652541399002075, "learning_rate": 9.886845742621876e-05, "loss": 1.0022, "step": 21250 }, { "epoch": 0.13582408034447951, "grad_norm": 0.7700982689857483, "learning_rate": 9.886739573460207e-05, "loss": 1.0373, "step": 21260 }, { "epoch": 0.13588796749421822, "grad_norm": 1.0912948846817017, "learning_rate": 9.886633355084843e-05, "loss": 0.9453, "step": 21270 }, { "epoch": 0.13595185464395693, "grad_norm": 0.6350242495536804, "learning_rate": 9.886527087496853e-05, "loss": 0.7426, "step": 21280 }, { "epoch": 0.1360157417936956, "grad_norm": 0.7051372528076172, "learning_rate": 9.886420770697309e-05, "loss": 0.823, "step": 21290 }, { "epoch": 0.1360796289434343, "grad_norm": 0.8976541757583618, "learning_rate": 9.88631440468728e-05, "loss": 0.9737, "step": 21300 }, { "epoch": 0.13614351609317302, "grad_norm": 2.363358974456787, "learning_rate": 9.886207989467837e-05, "loss": 0.9999, "step": 21310 }, { "epoch": 0.13620740324291172, "grad_norm": 1.8028829097747803, "learning_rate": 9.886101525040055e-05, "loss": 0.8832, "step": 21320 }, { "epoch": 0.13627129039265043, "grad_norm": 1.440885305404663, "learning_rate": 9.885995011405e-05, "loss": 0.922, "step": 21330 }, { "epoch": 0.13633517754238914, "grad_norm": 0.9806457161903381, "learning_rate": 9.88588844856375e-05, "loss": 0.9399, "step": 21340 }, { "epoch": 0.1363990646921278, "grad_norm": 0.8839708566665649, "learning_rate": 9.885781836517377e-05, "loss": 0.7167, "step": 21350 }, { "epoch": 0.13646295184186652, "grad_norm": 1.2500883340835571, "learning_rate": 9.885675175266953e-05, "loss": 1.177, "step": 21360 }, { "epoch": 0.13652683899160523, "grad_norm": 1.0007693767547607, "learning_rate": 9.885568464813554e-05, "loss": 0.9975, "step": 21370 }, { "epoch": 0.13659072614134393, "grad_norm": 0.8086827397346497, "learning_rate": 9.885461705158254e-05, "loss": 0.8139, "step": 21380 }, { "epoch": 0.13665461329108264, "grad_norm": 0.947471022605896, "learning_rate": 9.885354896302128e-05, "loss": 1.1116, "step": 21390 }, { "epoch": 0.13671850044082134, "grad_norm": 0.8959566950798035, "learning_rate": 9.885248038246251e-05, "loss": 1.1009, "step": 21400 }, { "epoch": 0.13678238759056002, "grad_norm": 1.1560317277908325, "learning_rate": 9.8851411309917e-05, "loss": 0.6756, "step": 21410 }, { "epoch": 0.13684627474029873, "grad_norm": 1.5213913917541504, "learning_rate": 9.885034174539552e-05, "loss": 0.7979, "step": 21420 }, { "epoch": 0.13691016189003744, "grad_norm": 0.5531548261642456, "learning_rate": 9.884927168890884e-05, "loss": 0.9408, "step": 21430 }, { "epoch": 0.13697404903977614, "grad_norm": 0.7810382843017578, "learning_rate": 9.884820114046774e-05, "loss": 0.8515, "step": 21440 }, { "epoch": 0.13703793618951485, "grad_norm": 1.0958387851715088, "learning_rate": 9.884713010008298e-05, "loss": 0.869, "step": 21450 }, { "epoch": 0.13710182333925355, "grad_norm": 0.4343324899673462, "learning_rate": 9.884605856776537e-05, "loss": 0.8596, "step": 21460 }, { "epoch": 0.13716571048899223, "grad_norm": 0.9415945410728455, "learning_rate": 9.884498654352567e-05, "loss": 0.7679, "step": 21470 }, { "epoch": 0.13722959763873094, "grad_norm": 0.6814182996749878, "learning_rate": 9.884391402737473e-05, "loss": 0.9849, "step": 21480 }, { "epoch": 0.13729348478846964, "grad_norm": 0.8244829177856445, "learning_rate": 9.88428410193233e-05, "loss": 0.9204, "step": 21490 }, { "epoch": 0.13735737193820835, "grad_norm": 0.5591076612472534, "learning_rate": 9.884176751938222e-05, "loss": 1.0907, "step": 21500 }, { "epoch": 0.13742125908794706, "grad_norm": 0.6328865885734558, "learning_rate": 9.884069352756228e-05, "loss": 0.7108, "step": 21510 }, { "epoch": 0.13748514623768576, "grad_norm": 0.6090789437294006, "learning_rate": 9.883961904387431e-05, "loss": 0.8593, "step": 21520 }, { "epoch": 0.13754903338742444, "grad_norm": 1.0573042631149292, "learning_rate": 9.88385440683291e-05, "loss": 1.2391, "step": 21530 }, { "epoch": 0.13761292053716315, "grad_norm": 1.0376691818237305, "learning_rate": 9.883746860093752e-05, "loss": 0.8013, "step": 21540 }, { "epoch": 0.13767680768690185, "grad_norm": 0.9400094747543335, "learning_rate": 9.883639264171038e-05, "loss": 0.8789, "step": 21550 }, { "epoch": 0.13774069483664056, "grad_norm": 2.4618563652038574, "learning_rate": 9.88353161906585e-05, "loss": 1.1602, "step": 21560 }, { "epoch": 0.13780458198637927, "grad_norm": 1.2091678380966187, "learning_rate": 9.883423924779277e-05, "loss": 0.7947, "step": 21570 }, { "epoch": 0.13786846913611797, "grad_norm": 0.7721507549285889, "learning_rate": 9.883316181312398e-05, "loss": 1.0147, "step": 21580 }, { "epoch": 0.13793235628585665, "grad_norm": 0.9228678941726685, "learning_rate": 9.8832083886663e-05, "loss": 0.8414, "step": 21590 }, { "epoch": 0.13799624343559536, "grad_norm": 0.6696807742118835, "learning_rate": 9.883100546842071e-05, "loss": 1.0162, "step": 21600 }, { "epoch": 0.13806013058533406, "grad_norm": 0.8186768889427185, "learning_rate": 9.882992655840793e-05, "loss": 0.8442, "step": 21610 }, { "epoch": 0.13812401773507277, "grad_norm": 1.444062352180481, "learning_rate": 9.882884715663557e-05, "loss": 1.2117, "step": 21620 }, { "epoch": 0.13818790488481147, "grad_norm": 0.7770470380783081, "learning_rate": 9.882776726311445e-05, "loss": 0.7657, "step": 21630 }, { "epoch": 0.13825179203455018, "grad_norm": 1.0606368780136108, "learning_rate": 9.882668687785548e-05, "loss": 0.8434, "step": 21640 }, { "epoch": 0.13831567918428886, "grad_norm": 1.0077322721481323, "learning_rate": 9.882560600086954e-05, "loss": 0.8445, "step": 21650 }, { "epoch": 0.13837956633402757, "grad_norm": 0.9700446128845215, "learning_rate": 9.882452463216749e-05, "loss": 0.7823, "step": 21660 }, { "epoch": 0.13844345348376627, "grad_norm": 0.7618522644042969, "learning_rate": 9.882344277176025e-05, "loss": 1.0858, "step": 21670 }, { "epoch": 0.13850734063350498, "grad_norm": 0.5642924904823303, "learning_rate": 9.882236041965871e-05, "loss": 0.9753, "step": 21680 }, { "epoch": 0.13857122778324368, "grad_norm": 0.6261829733848572, "learning_rate": 9.882127757587377e-05, "loss": 0.773, "step": 21690 }, { "epoch": 0.1386351149329824, "grad_norm": 0.48715344071388245, "learning_rate": 9.882019424041629e-05, "loss": 0.7998, "step": 21700 }, { "epoch": 0.13869900208272107, "grad_norm": 0.850307285785675, "learning_rate": 9.881911041329726e-05, "loss": 0.9011, "step": 21710 }, { "epoch": 0.13876288923245977, "grad_norm": 0.7470149993896484, "learning_rate": 9.881802609452753e-05, "loss": 0.9515, "step": 21720 }, { "epoch": 0.13882677638219848, "grad_norm": 0.6368236541748047, "learning_rate": 9.881694128411804e-05, "loss": 1.0206, "step": 21730 }, { "epoch": 0.1388906635319372, "grad_norm": 1.1505577564239502, "learning_rate": 9.881585598207973e-05, "loss": 1.0826, "step": 21740 }, { "epoch": 0.1389545506816759, "grad_norm": 2.4669744968414307, "learning_rate": 9.881477018842352e-05, "loss": 1.018, "step": 21750 }, { "epoch": 0.1390184378314146, "grad_norm": 1.7851297855377197, "learning_rate": 9.881368390316033e-05, "loss": 0.7395, "step": 21760 }, { "epoch": 0.1390823249811533, "grad_norm": 0.6467908620834351, "learning_rate": 9.881259712630113e-05, "loss": 0.9388, "step": 21770 }, { "epoch": 0.13914621213089198, "grad_norm": 1.0224095582962036, "learning_rate": 9.881150985785683e-05, "loss": 0.9804, "step": 21780 }, { "epoch": 0.1392100992806307, "grad_norm": 0.8423238396644592, "learning_rate": 9.881042209783842e-05, "loss": 0.8013, "step": 21790 }, { "epoch": 0.1392739864303694, "grad_norm": 0.8437933325767517, "learning_rate": 9.880933384625681e-05, "loss": 1.0403, "step": 21800 }, { "epoch": 0.1393378735801081, "grad_norm": 0.8127179145812988, "learning_rate": 9.880824510312301e-05, "loss": 1.0857, "step": 21810 }, { "epoch": 0.1394017607298468, "grad_norm": 0.7408185005187988, "learning_rate": 9.880715586844793e-05, "loss": 0.8628, "step": 21820 }, { "epoch": 0.1394656478795855, "grad_norm": 0.8337761759757996, "learning_rate": 9.880606614224256e-05, "loss": 0.9279, "step": 21830 }, { "epoch": 0.1395295350293242, "grad_norm": 0.7604190707206726, "learning_rate": 9.880497592451791e-05, "loss": 0.6789, "step": 21840 }, { "epoch": 0.1395934221790629, "grad_norm": 0.8677889704704285, "learning_rate": 9.880388521528491e-05, "loss": 1.2008, "step": 21850 }, { "epoch": 0.1396573093288016, "grad_norm": 0.6006574630737305, "learning_rate": 9.880279401455459e-05, "loss": 0.9166, "step": 21860 }, { "epoch": 0.1397211964785403, "grad_norm": 1.9674246311187744, "learning_rate": 9.880170232233789e-05, "loss": 0.7204, "step": 21870 }, { "epoch": 0.13978508362827902, "grad_norm": 1.377977967262268, "learning_rate": 9.880061013864583e-05, "loss": 0.9785, "step": 21880 }, { "epoch": 0.13984897077801772, "grad_norm": 0.8513831496238708, "learning_rate": 9.879951746348942e-05, "loss": 1.1253, "step": 21890 }, { "epoch": 0.1399128579277564, "grad_norm": 0.7017676830291748, "learning_rate": 9.879842429687964e-05, "loss": 0.6319, "step": 21900 }, { "epoch": 0.1399767450774951, "grad_norm": 0.8190149068832397, "learning_rate": 9.87973306388275e-05, "loss": 1.0802, "step": 21910 }, { "epoch": 0.1400406322272338, "grad_norm": 0.7550898790359497, "learning_rate": 9.879623648934404e-05, "loss": 0.8998, "step": 21920 }, { "epoch": 0.14010451937697252, "grad_norm": 0.8115261793136597, "learning_rate": 9.879514184844027e-05, "loss": 0.6637, "step": 21930 }, { "epoch": 0.14016840652671123, "grad_norm": 0.6252816319465637, "learning_rate": 9.87940467161272e-05, "loss": 0.9156, "step": 21940 }, { "epoch": 0.14023229367644993, "grad_norm": 2.5343711376190186, "learning_rate": 9.879295109241587e-05, "loss": 1.0213, "step": 21950 }, { "epoch": 0.1402961808261886, "grad_norm": 0.9597296714782715, "learning_rate": 9.87918549773173e-05, "loss": 0.7637, "step": 21960 }, { "epoch": 0.14036006797592732, "grad_norm": 0.613199770450592, "learning_rate": 9.879075837084255e-05, "loss": 0.9528, "step": 21970 }, { "epoch": 0.14042395512566602, "grad_norm": 1.902039885520935, "learning_rate": 9.878966127300264e-05, "loss": 0.7262, "step": 21980 }, { "epoch": 0.14048784227540473, "grad_norm": 0.6681598424911499, "learning_rate": 9.878856368380864e-05, "loss": 0.9852, "step": 21990 }, { "epoch": 0.14055172942514343, "grad_norm": 0.593425989151001, "learning_rate": 9.87874656032716e-05, "loss": 0.8866, "step": 22000 }, { "epoch": 0.14061561657488214, "grad_norm": 0.8456883430480957, "learning_rate": 9.878636703140257e-05, "loss": 0.7837, "step": 22010 }, { "epoch": 0.14067950372462082, "grad_norm": 0.719262421131134, "learning_rate": 9.878526796821261e-05, "loss": 1.0117, "step": 22020 }, { "epoch": 0.14074339087435953, "grad_norm": 0.740960419178009, "learning_rate": 9.878416841371282e-05, "loss": 1.0046, "step": 22030 }, { "epoch": 0.14080727802409823, "grad_norm": 1.0368300676345825, "learning_rate": 9.878306836791423e-05, "loss": 0.8077, "step": 22040 }, { "epoch": 0.14087116517383694, "grad_norm": 1.3289177417755127, "learning_rate": 9.878196783082793e-05, "loss": 0.777, "step": 22050 }, { "epoch": 0.14093505232357564, "grad_norm": 0.9713805913925171, "learning_rate": 9.878086680246504e-05, "loss": 0.8098, "step": 22060 }, { "epoch": 0.14099893947331435, "grad_norm": 0.5410668253898621, "learning_rate": 9.877976528283661e-05, "loss": 0.9304, "step": 22070 }, { "epoch": 0.14106282662305303, "grad_norm": 0.8843280673027039, "learning_rate": 9.877866327195373e-05, "loss": 0.8307, "step": 22080 }, { "epoch": 0.14112671377279173, "grad_norm": 1.040749430656433, "learning_rate": 9.877756076982751e-05, "loss": 0.8895, "step": 22090 }, { "epoch": 0.14119060092253044, "grad_norm": 0.8764167428016663, "learning_rate": 9.877645777646907e-05, "loss": 0.9634, "step": 22100 }, { "epoch": 0.14125448807226915, "grad_norm": 0.5217092633247375, "learning_rate": 9.87753542918895e-05, "loss": 1.0052, "step": 22110 }, { "epoch": 0.14131837522200785, "grad_norm": 0.6405453681945801, "learning_rate": 9.87742503160999e-05, "loss": 0.9599, "step": 22120 }, { "epoch": 0.14138226237174656, "grad_norm": 0.7412799000740051, "learning_rate": 9.877314584911143e-05, "loss": 0.7852, "step": 22130 }, { "epoch": 0.14144614952148524, "grad_norm": 1.6060749292373657, "learning_rate": 9.877204089093516e-05, "loss": 1.2637, "step": 22140 }, { "epoch": 0.14151003667122394, "grad_norm": 1.0910207033157349, "learning_rate": 9.877093544158227e-05, "loss": 0.8333, "step": 22150 }, { "epoch": 0.14157392382096265, "grad_norm": 1.1602824926376343, "learning_rate": 9.876982950106384e-05, "loss": 0.9858, "step": 22160 }, { "epoch": 0.14163781097070136, "grad_norm": 0.8228923082351685, "learning_rate": 9.876872306939105e-05, "loss": 1.0867, "step": 22170 }, { "epoch": 0.14170169812044006, "grad_norm": 0.825602650642395, "learning_rate": 9.876761614657504e-05, "loss": 0.8261, "step": 22180 }, { "epoch": 0.14176558527017877, "grad_norm": 0.7997944355010986, "learning_rate": 9.876650873262692e-05, "loss": 0.8914, "step": 22190 }, { "epoch": 0.14182947241991745, "grad_norm": 0.6990909576416016, "learning_rate": 9.876540082755788e-05, "loss": 1.1852, "step": 22200 }, { "epoch": 0.14189335956965615, "grad_norm": 0.7908971309661865, "learning_rate": 9.876429243137906e-05, "loss": 0.7917, "step": 22210 }, { "epoch": 0.14195724671939486, "grad_norm": 1.2012591361999512, "learning_rate": 9.876318354410163e-05, "loss": 0.9249, "step": 22220 }, { "epoch": 0.14202113386913356, "grad_norm": 0.7461243867874146, "learning_rate": 9.876207416573677e-05, "loss": 0.9312, "step": 22230 }, { "epoch": 0.14208502101887227, "grad_norm": 0.8374635577201843, "learning_rate": 9.876096429629563e-05, "loss": 0.8613, "step": 22240 }, { "epoch": 0.14214890816861098, "grad_norm": 0.6775134801864624, "learning_rate": 9.875985393578938e-05, "loss": 0.9284, "step": 22250 }, { "epoch": 0.14221279531834966, "grad_norm": 1.069081425666809, "learning_rate": 9.875874308422923e-05, "loss": 0.809, "step": 22260 }, { "epoch": 0.14227668246808836, "grad_norm": 0.8016782402992249, "learning_rate": 9.875763174162635e-05, "loss": 0.8151, "step": 22270 }, { "epoch": 0.14234056961782707, "grad_norm": 0.7888844609260559, "learning_rate": 9.875651990799196e-05, "loss": 0.8556, "step": 22280 }, { "epoch": 0.14240445676756577, "grad_norm": 0.8360929489135742, "learning_rate": 9.875540758333721e-05, "loss": 0.7994, "step": 22290 }, { "epoch": 0.14246834391730448, "grad_norm": 0.520611584186554, "learning_rate": 9.875429476767333e-05, "loss": 0.8767, "step": 22300 }, { "epoch": 0.14253223106704319, "grad_norm": 0.47477564215660095, "learning_rate": 9.875318146101151e-05, "loss": 0.7093, "step": 22310 }, { "epoch": 0.14259611821678186, "grad_norm": 0.7633807063102722, "learning_rate": 9.8752067663363e-05, "loss": 1.1947, "step": 22320 }, { "epoch": 0.14266000536652057, "grad_norm": 0.7206790447235107, "learning_rate": 9.875095337473899e-05, "loss": 0.8928, "step": 22330 }, { "epoch": 0.14272389251625928, "grad_norm": 0.7361767888069153, "learning_rate": 9.874983859515069e-05, "loss": 0.8716, "step": 22340 }, { "epoch": 0.14278777966599798, "grad_norm": 0.8034409880638123, "learning_rate": 9.874872332460934e-05, "loss": 0.9446, "step": 22350 }, { "epoch": 0.1428516668157367, "grad_norm": 0.8999035954475403, "learning_rate": 9.874760756312617e-05, "loss": 1.0096, "step": 22360 }, { "epoch": 0.1429155539654754, "grad_norm": 0.8220607042312622, "learning_rate": 9.874649131071244e-05, "loss": 0.9535, "step": 22370 }, { "epoch": 0.14297944111521407, "grad_norm": 1.6880701780319214, "learning_rate": 9.874537456737936e-05, "loss": 0.9347, "step": 22380 }, { "epoch": 0.14304332826495278, "grad_norm": 2.4227957725524902, "learning_rate": 9.874425733313819e-05, "loss": 0.9415, "step": 22390 }, { "epoch": 0.14310721541469149, "grad_norm": 0.665111243724823, "learning_rate": 9.874313960800017e-05, "loss": 0.8991, "step": 22400 }, { "epoch": 0.1431711025644302, "grad_norm": 1.0277364253997803, "learning_rate": 9.874202139197657e-05, "loss": 1.0399, "step": 22410 }, { "epoch": 0.1432349897141689, "grad_norm": 0.8064048290252686, "learning_rate": 9.874090268507866e-05, "loss": 0.9161, "step": 22420 }, { "epoch": 0.1432988768639076, "grad_norm": 0.5607860684394836, "learning_rate": 9.873978348731767e-05, "loss": 0.8696, "step": 22430 }, { "epoch": 0.14336276401364628, "grad_norm": 0.6954864263534546, "learning_rate": 9.873866379870492e-05, "loss": 0.6301, "step": 22440 }, { "epoch": 0.143426651163385, "grad_norm": 0.675815999507904, "learning_rate": 9.873754361925162e-05, "loss": 0.9119, "step": 22450 }, { "epoch": 0.1434905383131237, "grad_norm": 1.266095757484436, "learning_rate": 9.873642294896913e-05, "loss": 0.9423, "step": 22460 }, { "epoch": 0.1435544254628624, "grad_norm": 0.8914671540260315, "learning_rate": 9.873530178786868e-05, "loss": 1.034, "step": 22470 }, { "epoch": 0.1436183126126011, "grad_norm": 0.953437864780426, "learning_rate": 9.873418013596159e-05, "loss": 0.9487, "step": 22480 }, { "epoch": 0.1436821997623398, "grad_norm": 0.6912809014320374, "learning_rate": 9.873305799325914e-05, "loss": 1.1522, "step": 22490 }, { "epoch": 0.1437460869120785, "grad_norm": 0.6595206260681152, "learning_rate": 9.873193535977263e-05, "loss": 0.977, "step": 22500 }, { "epoch": 0.1438099740618172, "grad_norm": 0.9730925559997559, "learning_rate": 9.873081223551338e-05, "loss": 0.7952, "step": 22510 }, { "epoch": 0.1438738612115559, "grad_norm": 4.339688777923584, "learning_rate": 9.872968862049268e-05, "loss": 0.9139, "step": 22520 }, { "epoch": 0.1439377483612946, "grad_norm": 0.939578652381897, "learning_rate": 9.872856451472188e-05, "loss": 1.0464, "step": 22530 }, { "epoch": 0.14400163551103332, "grad_norm": 0.8998389840126038, "learning_rate": 9.872743991821227e-05, "loss": 0.9492, "step": 22540 }, { "epoch": 0.14406552266077202, "grad_norm": 0.7495961785316467, "learning_rate": 9.872631483097518e-05, "loss": 0.8357, "step": 22550 }, { "epoch": 0.1441294098105107, "grad_norm": 0.7158836126327515, "learning_rate": 9.872518925302195e-05, "loss": 0.6346, "step": 22560 }, { "epoch": 0.1441932969602494, "grad_norm": 1.3562219142913818, "learning_rate": 9.872406318436391e-05, "loss": 0.7683, "step": 22570 }, { "epoch": 0.1442571841099881, "grad_norm": 2.4515798091888428, "learning_rate": 9.872293662501239e-05, "loss": 0.93, "step": 22580 }, { "epoch": 0.14432107125972682, "grad_norm": 0.6932923197746277, "learning_rate": 9.872180957497876e-05, "loss": 0.8557, "step": 22590 }, { "epoch": 0.14438495840946552, "grad_norm": 0.8083714842796326, "learning_rate": 9.872068203427434e-05, "loss": 0.9603, "step": 22600 }, { "epoch": 0.14444884555920423, "grad_norm": 0.6430138945579529, "learning_rate": 9.871955400291052e-05, "loss": 1.0151, "step": 22610 }, { "epoch": 0.14451273270894294, "grad_norm": 0.5157865881919861, "learning_rate": 9.871842548089864e-05, "loss": 1.0402, "step": 22620 }, { "epoch": 0.14457661985868162, "grad_norm": 0.7073084115982056, "learning_rate": 9.871729646825008e-05, "loss": 1.1601, "step": 22630 }, { "epoch": 0.14464050700842032, "grad_norm": 0.8356124758720398, "learning_rate": 9.871616696497618e-05, "loss": 0.7882, "step": 22640 }, { "epoch": 0.14470439415815903, "grad_norm": 0.7543877959251404, "learning_rate": 9.871503697108833e-05, "loss": 1.1977, "step": 22650 }, { "epoch": 0.14476828130789773, "grad_norm": 0.5048431158065796, "learning_rate": 9.871390648659793e-05, "loss": 0.6942, "step": 22660 }, { "epoch": 0.14483216845763644, "grad_norm": 0.8877227306365967, "learning_rate": 9.871277551151635e-05, "loss": 1.0161, "step": 22670 }, { "epoch": 0.14489605560737515, "grad_norm": 1.6515774726867676, "learning_rate": 9.871164404585496e-05, "loss": 0.7984, "step": 22680 }, { "epoch": 0.14495994275711382, "grad_norm": 0.7503309845924377, "learning_rate": 9.871051208962518e-05, "loss": 1.4356, "step": 22690 }, { "epoch": 0.14502382990685253, "grad_norm": 0.5918260216712952, "learning_rate": 9.87093796428384e-05, "loss": 0.8047, "step": 22700 }, { "epoch": 0.14508771705659124, "grad_norm": 0.7670891880989075, "learning_rate": 9.870824670550603e-05, "loss": 1.0355, "step": 22710 }, { "epoch": 0.14515160420632994, "grad_norm": 0.7030889987945557, "learning_rate": 9.870711327763947e-05, "loss": 0.9419, "step": 22720 }, { "epoch": 0.14521549135606865, "grad_norm": 1.9804078340530396, "learning_rate": 9.870597935925016e-05, "loss": 1.0519, "step": 22730 }, { "epoch": 0.14527937850580736, "grad_norm": 0.5866715312004089, "learning_rate": 9.870484495034948e-05, "loss": 0.8467, "step": 22740 }, { "epoch": 0.14534326565554603, "grad_norm": 1.0047521591186523, "learning_rate": 9.87037100509489e-05, "loss": 0.9234, "step": 22750 }, { "epoch": 0.14540715280528474, "grad_norm": 0.8460586667060852, "learning_rate": 9.87025746610598e-05, "loss": 0.98, "step": 22760 }, { "epoch": 0.14547103995502345, "grad_norm": 0.6952506303787231, "learning_rate": 9.870143878069364e-05, "loss": 0.8913, "step": 22770 }, { "epoch": 0.14553492710476215, "grad_norm": 0.8370442986488342, "learning_rate": 9.870030240986188e-05, "loss": 0.7564, "step": 22780 }, { "epoch": 0.14559881425450086, "grad_norm": 2.1772940158843994, "learning_rate": 9.869916554857593e-05, "loss": 1.0058, "step": 22790 }, { "epoch": 0.14566270140423956, "grad_norm": 1.9751546382904053, "learning_rate": 9.869802819684726e-05, "loss": 0.8494, "step": 22800 }, { "epoch": 0.14572658855397824, "grad_norm": 1.1138042211532593, "learning_rate": 9.86968903546873e-05, "loss": 0.745, "step": 22810 }, { "epoch": 0.14579047570371695, "grad_norm": 0.9470332264900208, "learning_rate": 9.869575202210754e-05, "loss": 0.9222, "step": 22820 }, { "epoch": 0.14585436285345565, "grad_norm": 0.6957728862762451, "learning_rate": 9.869461319911944e-05, "loss": 1.0055, "step": 22830 }, { "epoch": 0.14591825000319436, "grad_norm": 0.7304112911224365, "learning_rate": 9.869347388573443e-05, "loss": 0.8063, "step": 22840 }, { "epoch": 0.14598213715293307, "grad_norm": 0.4859442710876465, "learning_rate": 9.869233408196403e-05, "loss": 0.7749, "step": 22850 }, { "epoch": 0.14604602430267177, "grad_norm": 0.6382431387901306, "learning_rate": 9.86911937878197e-05, "loss": 0.9488, "step": 22860 }, { "epoch": 0.14610991145241045, "grad_norm": 0.6626219153404236, "learning_rate": 9.869005300331291e-05, "loss": 0.6605, "step": 22870 }, { "epoch": 0.14617379860214916, "grad_norm": 0.9865225553512573, "learning_rate": 9.868891172845519e-05, "loss": 1.0758, "step": 22880 }, { "epoch": 0.14623768575188786, "grad_norm": 0.7838436365127563, "learning_rate": 9.868776996325799e-05, "loss": 1.0838, "step": 22890 }, { "epoch": 0.14630157290162657, "grad_norm": 0.7881513833999634, "learning_rate": 9.868662770773282e-05, "loss": 0.8395, "step": 22900 }, { "epoch": 0.14636546005136528, "grad_norm": 0.6249982118606567, "learning_rate": 9.86854849618912e-05, "loss": 1.0855, "step": 22910 }, { "epoch": 0.14642934720110398, "grad_norm": 0.7879114151000977, "learning_rate": 9.868434172574462e-05, "loss": 1.0791, "step": 22920 }, { "epoch": 0.14649323435084266, "grad_norm": 0.872688353061676, "learning_rate": 9.86831979993046e-05, "loss": 1.189, "step": 22930 }, { "epoch": 0.14655712150058137, "grad_norm": 0.6431063413619995, "learning_rate": 9.868205378258266e-05, "loss": 1.0102, "step": 22940 }, { "epoch": 0.14662100865032007, "grad_norm": 0.9336161017417908, "learning_rate": 9.868090907559033e-05, "loss": 1.1622, "step": 22950 }, { "epoch": 0.14668489580005878, "grad_norm": 1.0055698156356812, "learning_rate": 9.867976387833913e-05, "loss": 0.8623, "step": 22960 }, { "epoch": 0.14674878294979748, "grad_norm": 1.0225908756256104, "learning_rate": 9.867861819084059e-05, "loss": 0.7738, "step": 22970 }, { "epoch": 0.1468126700995362, "grad_norm": 0.9196385741233826, "learning_rate": 9.867747201310626e-05, "loss": 0.8153, "step": 22980 }, { "epoch": 0.14687655724927487, "grad_norm": 1.0798165798187256, "learning_rate": 9.867632534514766e-05, "loss": 0.9407, "step": 22990 }, { "epoch": 0.14694044439901358, "grad_norm": 0.8176427483558655, "learning_rate": 9.867517818697636e-05, "loss": 0.9316, "step": 23000 }, { "epoch": 0.14700433154875228, "grad_norm": 1.2678016424179077, "learning_rate": 9.867403053860391e-05, "loss": 0.7385, "step": 23010 }, { "epoch": 0.147068218698491, "grad_norm": 1.1173145771026611, "learning_rate": 9.867288240004185e-05, "loss": 0.9177, "step": 23020 }, { "epoch": 0.1471321058482297, "grad_norm": 0.6615016460418701, "learning_rate": 9.867173377130177e-05, "loss": 0.9355, "step": 23030 }, { "epoch": 0.1471959929979684, "grad_norm": 0.5626130104064941, "learning_rate": 9.867058465239522e-05, "loss": 0.73, "step": 23040 }, { "epoch": 0.14725988014770708, "grad_norm": 0.9644745588302612, "learning_rate": 9.866943504333377e-05, "loss": 0.8876, "step": 23050 }, { "epoch": 0.14732376729744578, "grad_norm": 1.4023088216781616, "learning_rate": 9.866828494412901e-05, "loss": 0.8923, "step": 23060 }, { "epoch": 0.1473876544471845, "grad_norm": 0.6760227680206299, "learning_rate": 9.866713435479252e-05, "loss": 0.8072, "step": 23070 }, { "epoch": 0.1474515415969232, "grad_norm": 0.9531158804893494, "learning_rate": 9.866598327533589e-05, "loss": 0.8004, "step": 23080 }, { "epoch": 0.1475154287466619, "grad_norm": 0.6163201928138733, "learning_rate": 9.866483170577069e-05, "loss": 0.9639, "step": 23090 }, { "epoch": 0.1475793158964006, "grad_norm": 0.6841567158699036, "learning_rate": 9.866367964610854e-05, "loss": 1.0902, "step": 23100 }, { "epoch": 0.1476432030461393, "grad_norm": 0.8613043427467346, "learning_rate": 9.866252709636104e-05, "loss": 0.8745, "step": 23110 }, { "epoch": 0.147707090195878, "grad_norm": 0.9095843434333801, "learning_rate": 9.86613740565398e-05, "loss": 0.8784, "step": 23120 }, { "epoch": 0.1477709773456167, "grad_norm": 0.6751396059989929, "learning_rate": 9.86602205266564e-05, "loss": 1.0972, "step": 23130 }, { "epoch": 0.1478348644953554, "grad_norm": 0.7569636106491089, "learning_rate": 9.86590665067225e-05, "loss": 1.0461, "step": 23140 }, { "epoch": 0.1478987516450941, "grad_norm": 1.0290982723236084, "learning_rate": 9.86579119967497e-05, "loss": 1.112, "step": 23150 }, { "epoch": 0.14796263879483282, "grad_norm": 0.6011145114898682, "learning_rate": 9.865675699674964e-05, "loss": 1.0506, "step": 23160 }, { "epoch": 0.1480265259445715, "grad_norm": 0.8810587525367737, "learning_rate": 9.865560150673392e-05, "loss": 0.9679, "step": 23170 }, { "epoch": 0.1480904130943102, "grad_norm": 0.7942286133766174, "learning_rate": 9.865444552671422e-05, "loss": 0.8441, "step": 23180 }, { "epoch": 0.1481543002440489, "grad_norm": 1.2883180379867554, "learning_rate": 9.865328905670215e-05, "loss": 0.9123, "step": 23190 }, { "epoch": 0.14821818739378761, "grad_norm": 0.9160734415054321, "learning_rate": 9.865213209670939e-05, "loss": 0.8103, "step": 23200 }, { "epoch": 0.14828207454352632, "grad_norm": 0.5292953848838806, "learning_rate": 9.865097464674754e-05, "loss": 0.7631, "step": 23210 }, { "epoch": 0.14834596169326503, "grad_norm": 1.5886908769607544, "learning_rate": 9.86498167068283e-05, "loss": 0.9782, "step": 23220 }, { "epoch": 0.1484098488430037, "grad_norm": 1.2354532480239868, "learning_rate": 9.864865827696333e-05, "loss": 1.0666, "step": 23230 }, { "epoch": 0.1484737359927424, "grad_norm": 0.902732789516449, "learning_rate": 9.864749935716427e-05, "loss": 0.8587, "step": 23240 }, { "epoch": 0.14853762314248112, "grad_norm": 0.9489061236381531, "learning_rate": 9.86463399474428e-05, "loss": 0.9015, "step": 23250 }, { "epoch": 0.14860151029221982, "grad_norm": 1.0594868659973145, "learning_rate": 9.86451800478106e-05, "loss": 0.9639, "step": 23260 }, { "epoch": 0.14866539744195853, "grad_norm": 0.9709058403968811, "learning_rate": 9.864401965827936e-05, "loss": 0.9575, "step": 23270 }, { "epoch": 0.14872928459169724, "grad_norm": 0.7420225143432617, "learning_rate": 9.864285877886076e-05, "loss": 0.8139, "step": 23280 }, { "epoch": 0.14879317174143591, "grad_norm": 1.2411167621612549, "learning_rate": 9.86416974095665e-05, "loss": 0.9154, "step": 23290 }, { "epoch": 0.14885705889117462, "grad_norm": 0.9969791769981384, "learning_rate": 9.864053555040826e-05, "loss": 0.7712, "step": 23300 }, { "epoch": 0.14892094604091333, "grad_norm": 0.7000773549079895, "learning_rate": 9.863937320139774e-05, "loss": 0.9034, "step": 23310 }, { "epoch": 0.14898483319065203, "grad_norm": 0.8266654014587402, "learning_rate": 9.863821036254666e-05, "loss": 0.9289, "step": 23320 }, { "epoch": 0.14904872034039074, "grad_norm": 0.5291149616241455, "learning_rate": 9.863704703386671e-05, "loss": 0.8965, "step": 23330 }, { "epoch": 0.14911260749012945, "grad_norm": 1.1645135879516602, "learning_rate": 9.863588321536964e-05, "loss": 1.0616, "step": 23340 }, { "epoch": 0.14917649463986812, "grad_norm": 0.7084513902664185, "learning_rate": 9.863471890706714e-05, "loss": 1.0098, "step": 23350 }, { "epoch": 0.14924038178960683, "grad_norm": 0.6941312551498413, "learning_rate": 9.863355410897095e-05, "loss": 0.9369, "step": 23360 }, { "epoch": 0.14930426893934554, "grad_norm": 1.0156537294387817, "learning_rate": 9.863238882109278e-05, "loss": 1.1076, "step": 23370 }, { "epoch": 0.14936815608908424, "grad_norm": 0.8023911714553833, "learning_rate": 9.863122304344439e-05, "loss": 0.8709, "step": 23380 }, { "epoch": 0.14943204323882295, "grad_norm": 0.8865915536880493, "learning_rate": 9.863005677603752e-05, "loss": 0.8393, "step": 23390 }, { "epoch": 0.14949593038856165, "grad_norm": 1.4520982503890991, "learning_rate": 9.86288900188839e-05, "loss": 1.138, "step": 23400 }, { "epoch": 0.14955981753830033, "grad_norm": 1.1401234865188599, "learning_rate": 9.862772277199529e-05, "loss": 1.1788, "step": 23410 }, { "epoch": 0.14962370468803904, "grad_norm": 0.632628858089447, "learning_rate": 9.862655503538344e-05, "loss": 0.8879, "step": 23420 }, { "epoch": 0.14968759183777774, "grad_norm": 0.6416946649551392, "learning_rate": 9.862538680906012e-05, "loss": 0.8936, "step": 23430 }, { "epoch": 0.14975147898751645, "grad_norm": 0.6808968186378479, "learning_rate": 9.862421809303708e-05, "loss": 0.8778, "step": 23440 }, { "epoch": 0.14981536613725516, "grad_norm": 0.9920696020126343, "learning_rate": 9.86230488873261e-05, "loss": 0.8278, "step": 23450 }, { "epoch": 0.14987925328699386, "grad_norm": 0.8314083218574524, "learning_rate": 9.862187919193895e-05, "loss": 0.9445, "step": 23460 }, { "epoch": 0.14994314043673257, "grad_norm": 0.7839555740356445, "learning_rate": 9.862070900688742e-05, "loss": 0.9105, "step": 23470 }, { "epoch": 0.15000702758647125, "grad_norm": 0.7194756865501404, "learning_rate": 9.861953833218329e-05, "loss": 0.8104, "step": 23480 }, { "epoch": 0.15007091473620995, "grad_norm": 0.8320297002792358, "learning_rate": 9.861836716783834e-05, "loss": 0.9076, "step": 23490 }, { "epoch": 0.15013480188594866, "grad_norm": 0.744303822517395, "learning_rate": 9.861719551386437e-05, "loss": 0.7775, "step": 23500 }, { "epoch": 0.15019868903568737, "grad_norm": 1.1499621868133545, "learning_rate": 9.861602337027318e-05, "loss": 1.0126, "step": 23510 }, { "epoch": 0.15026257618542607, "grad_norm": 0.893481969833374, "learning_rate": 9.861485073707658e-05, "loss": 0.9876, "step": 23520 }, { "epoch": 0.15032646333516478, "grad_norm": 1.0423784255981445, "learning_rate": 9.861367761428638e-05, "loss": 0.831, "step": 23530 }, { "epoch": 0.15039035048490346, "grad_norm": 0.7774150371551514, "learning_rate": 9.861250400191438e-05, "loss": 0.8752, "step": 23540 }, { "epoch": 0.15045423763464216, "grad_norm": 0.9276893138885498, "learning_rate": 9.861132989997242e-05, "loss": 0.815, "step": 23550 }, { "epoch": 0.15051812478438087, "grad_norm": 1.5479460954666138, "learning_rate": 9.86101553084723e-05, "loss": 1.1705, "step": 23560 }, { "epoch": 0.15058201193411958, "grad_norm": 1.3702467679977417, "learning_rate": 9.860898022742587e-05, "loss": 1.1229, "step": 23570 }, { "epoch": 0.15064589908385828, "grad_norm": 0.8833318948745728, "learning_rate": 9.860780465684497e-05, "loss": 0.8501, "step": 23580 }, { "epoch": 0.150709786233597, "grad_norm": 0.8857479691505432, "learning_rate": 9.860662859674139e-05, "loss": 0.9028, "step": 23590 }, { "epoch": 0.15077367338333567, "grad_norm": 0.9464370608329773, "learning_rate": 9.860545204712703e-05, "loss": 0.8605, "step": 23600 }, { "epoch": 0.15083756053307437, "grad_norm": 0.9219076037406921, "learning_rate": 9.860427500801372e-05, "loss": 0.8217, "step": 23610 }, { "epoch": 0.15090144768281308, "grad_norm": 2.4392945766448975, "learning_rate": 9.860309747941333e-05, "loss": 0.8927, "step": 23620 }, { "epoch": 0.15096533483255178, "grad_norm": 1.1871190071105957, "learning_rate": 9.860191946133766e-05, "loss": 1.1577, "step": 23630 }, { "epoch": 0.1510292219822905, "grad_norm": 1.2772961854934692, "learning_rate": 9.860074095379863e-05, "loss": 0.9204, "step": 23640 }, { "epoch": 0.1510931091320292, "grad_norm": 0.6214377284049988, "learning_rate": 9.859956195680811e-05, "loss": 0.8562, "step": 23650 }, { "epoch": 0.15115699628176787, "grad_norm": 0.7957346439361572, "learning_rate": 9.859838247037794e-05, "loss": 0.7878, "step": 23660 }, { "epoch": 0.15122088343150658, "grad_norm": 0.7047122716903687, "learning_rate": 9.859720249452003e-05, "loss": 0.9215, "step": 23670 }, { "epoch": 0.1512847705812453, "grad_norm": 0.8219524025917053, "learning_rate": 9.859602202924623e-05, "loss": 0.884, "step": 23680 }, { "epoch": 0.151348657730984, "grad_norm": 0.844274640083313, "learning_rate": 9.859484107456846e-05, "loss": 0.8565, "step": 23690 }, { "epoch": 0.1514125448807227, "grad_norm": 0.8894696831703186, "learning_rate": 9.859365963049858e-05, "loss": 0.8738, "step": 23700 }, { "epoch": 0.1514764320304614, "grad_norm": 1.032109260559082, "learning_rate": 9.859247769704854e-05, "loss": 0.7034, "step": 23710 }, { "epoch": 0.15154031918020008, "grad_norm": 0.8953695297241211, "learning_rate": 9.859129527423019e-05, "loss": 0.9061, "step": 23720 }, { "epoch": 0.1516042063299388, "grad_norm": 0.7908507585525513, "learning_rate": 9.859011236205547e-05, "loss": 0.9427, "step": 23730 }, { "epoch": 0.1516680934796775, "grad_norm": 0.7494611144065857, "learning_rate": 9.858892896053626e-05, "loss": 0.7095, "step": 23740 }, { "epoch": 0.1517319806294162, "grad_norm": 0.7644729614257812, "learning_rate": 9.858774506968451e-05, "loss": 0.9053, "step": 23750 }, { "epoch": 0.1517958677791549, "grad_norm": 1.1524786949157715, "learning_rate": 9.858656068951215e-05, "loss": 0.7965, "step": 23760 }, { "epoch": 0.15185975492889361, "grad_norm": 0.8188411593437195, "learning_rate": 9.858537582003107e-05, "loss": 0.986, "step": 23770 }, { "epoch": 0.1519236420786323, "grad_norm": 0.9521570801734924, "learning_rate": 9.858419046125322e-05, "loss": 0.791, "step": 23780 }, { "epoch": 0.151987529228371, "grad_norm": 1.1801695823669434, "learning_rate": 9.858300461319057e-05, "loss": 0.8084, "step": 23790 }, { "epoch": 0.1520514163781097, "grad_norm": 0.66313236951828, "learning_rate": 9.8581818275855e-05, "loss": 1.0134, "step": 23800 }, { "epoch": 0.1521153035278484, "grad_norm": 0.7492579817771912, "learning_rate": 9.85806314492585e-05, "loss": 0.892, "step": 23810 }, { "epoch": 0.15217919067758712, "grad_norm": 0.7110322713851929, "learning_rate": 9.857944413341304e-05, "loss": 1.1158, "step": 23820 }, { "epoch": 0.15224307782732582, "grad_norm": 1.010519027709961, "learning_rate": 9.857825632833053e-05, "loss": 0.9537, "step": 23830 }, { "epoch": 0.1523069649770645, "grad_norm": 0.8604142069816589, "learning_rate": 9.857706803402294e-05, "loss": 0.936, "step": 23840 }, { "epoch": 0.1523708521268032, "grad_norm": 0.5838251113891602, "learning_rate": 9.857587925050226e-05, "loss": 0.9363, "step": 23850 }, { "epoch": 0.15243473927654191, "grad_norm": 0.7778534889221191, "learning_rate": 9.857468997778046e-05, "loss": 1.0045, "step": 23860 }, { "epoch": 0.15249862642628062, "grad_norm": 1.257494568824768, "learning_rate": 9.85735002158695e-05, "loss": 0.847, "step": 23870 }, { "epoch": 0.15256251357601933, "grad_norm": 0.7079510688781738, "learning_rate": 9.857230996478137e-05, "loss": 1.0672, "step": 23880 }, { "epoch": 0.15262640072575803, "grad_norm": 2.4514129161834717, "learning_rate": 9.857111922452807e-05, "loss": 0.7693, "step": 23890 }, { "epoch": 0.1526902878754967, "grad_norm": 0.5904504060745239, "learning_rate": 9.856992799512157e-05, "loss": 0.9016, "step": 23900 }, { "epoch": 0.15275417502523542, "grad_norm": 0.7344809770584106, "learning_rate": 9.856873627657387e-05, "loss": 0.7255, "step": 23910 }, { "epoch": 0.15281806217497412, "grad_norm": 1.4561502933502197, "learning_rate": 9.856754406889698e-05, "loss": 0.9038, "step": 23920 }, { "epoch": 0.15288194932471283, "grad_norm": 0.8599551916122437, "learning_rate": 9.85663513721029e-05, "loss": 0.7992, "step": 23930 }, { "epoch": 0.15294583647445154, "grad_norm": 2.2323386669158936, "learning_rate": 9.856515818620367e-05, "loss": 0.9681, "step": 23940 }, { "epoch": 0.15300972362419024, "grad_norm": 1.3280889987945557, "learning_rate": 9.856396451121125e-05, "loss": 0.6727, "step": 23950 }, { "epoch": 0.15307361077392892, "grad_norm": 1.3691116571426392, "learning_rate": 9.856277034713772e-05, "loss": 0.8038, "step": 23960 }, { "epoch": 0.15313749792366763, "grad_norm": 1.1116257905960083, "learning_rate": 9.856157569399507e-05, "loss": 0.749, "step": 23970 }, { "epoch": 0.15320138507340633, "grad_norm": 1.1849030256271362, "learning_rate": 9.856038055179535e-05, "loss": 0.9773, "step": 23980 }, { "epoch": 0.15326527222314504, "grad_norm": 0.88172447681427, "learning_rate": 9.855918492055057e-05, "loss": 1.1426, "step": 23990 }, { "epoch": 0.15332915937288374, "grad_norm": 1.102968454360962, "learning_rate": 9.855798880027279e-05, "loss": 0.9212, "step": 24000 }, { "epoch": 0.15339304652262245, "grad_norm": 1.179286003112793, "learning_rate": 9.855679219097407e-05, "loss": 0.9407, "step": 24010 }, { "epoch": 0.15345693367236113, "grad_norm": 0.7198648452758789, "learning_rate": 9.855559509266644e-05, "loss": 0.9663, "step": 24020 }, { "epoch": 0.15352082082209983, "grad_norm": 0.9259359240531921, "learning_rate": 9.855439750536195e-05, "loss": 1.0747, "step": 24030 }, { "epoch": 0.15358470797183854, "grad_norm": 0.9067502021789551, "learning_rate": 9.855319942907268e-05, "loss": 0.7373, "step": 24040 }, { "epoch": 0.15364859512157725, "grad_norm": 0.6593869924545288, "learning_rate": 9.855200086381068e-05, "loss": 0.9685, "step": 24050 }, { "epoch": 0.15371248227131595, "grad_norm": 0.810939610004425, "learning_rate": 9.855080180958803e-05, "loss": 0.7862, "step": 24060 }, { "epoch": 0.15377636942105466, "grad_norm": 0.8420569896697998, "learning_rate": 9.854960226641681e-05, "loss": 0.8562, "step": 24070 }, { "epoch": 0.15384025657079334, "grad_norm": 0.7327421307563782, "learning_rate": 9.854840223430909e-05, "loss": 0.852, "step": 24080 }, { "epoch": 0.15390414372053204, "grad_norm": 0.8360452055931091, "learning_rate": 9.854720171327696e-05, "loss": 0.9425, "step": 24090 }, { "epoch": 0.15396803087027075, "grad_norm": 0.6557414531707764, "learning_rate": 9.854600070333251e-05, "loss": 0.754, "step": 24100 }, { "epoch": 0.15403191802000946, "grad_norm": 0.9082469940185547, "learning_rate": 9.854479920448782e-05, "loss": 0.9427, "step": 24110 }, { "epoch": 0.15409580516974816, "grad_norm": 0.7796029448509216, "learning_rate": 9.854359721675503e-05, "loss": 0.8438, "step": 24120 }, { "epoch": 0.15415969231948687, "grad_norm": 0.6190805435180664, "learning_rate": 9.85423947401462e-05, "loss": 0.8237, "step": 24130 }, { "epoch": 0.15422357946922555, "grad_norm": 0.813653290271759, "learning_rate": 9.854119177467347e-05, "loss": 0.8553, "step": 24140 }, { "epoch": 0.15428746661896425, "grad_norm": 0.8362258672714233, "learning_rate": 9.853998832034894e-05, "loss": 0.9488, "step": 24150 }, { "epoch": 0.15435135376870296, "grad_norm": 1.0680490732192993, "learning_rate": 9.853878437718473e-05, "loss": 0.9838, "step": 24160 }, { "epoch": 0.15441524091844167, "grad_norm": 1.0183037519454956, "learning_rate": 9.853757994519299e-05, "loss": 0.6685, "step": 24170 }, { "epoch": 0.15447912806818037, "grad_norm": 0.7617247700691223, "learning_rate": 9.853637502438582e-05, "loss": 0.8784, "step": 24180 }, { "epoch": 0.15454301521791908, "grad_norm": 0.633660614490509, "learning_rate": 9.853516961477535e-05, "loss": 0.8068, "step": 24190 }, { "epoch": 0.15460690236765776, "grad_norm": 0.8987011313438416, "learning_rate": 9.853396371637374e-05, "loss": 0.6322, "step": 24200 }, { "epoch": 0.15467078951739646, "grad_norm": 0.8973355889320374, "learning_rate": 9.853275732919314e-05, "loss": 1.2822, "step": 24210 }, { "epoch": 0.15473467666713517, "grad_norm": 1.284421682357788, "learning_rate": 9.853155045324567e-05, "loss": 0.9294, "step": 24220 }, { "epoch": 0.15479856381687387, "grad_norm": 1.0189619064331055, "learning_rate": 9.85303430885435e-05, "loss": 0.7897, "step": 24230 }, { "epoch": 0.15486245096661258, "grad_norm": 0.8572905659675598, "learning_rate": 9.85291352350988e-05, "loss": 0.9204, "step": 24240 }, { "epoch": 0.1549263381163513, "grad_norm": 1.0044801235198975, "learning_rate": 9.852792689292373e-05, "loss": 1.0265, "step": 24250 }, { "epoch": 0.15499022526608996, "grad_norm": 0.8651962280273438, "learning_rate": 9.852671806203045e-05, "loss": 0.6892, "step": 24260 }, { "epoch": 0.15505411241582867, "grad_norm": 1.309009075164795, "learning_rate": 9.852550874243111e-05, "loss": 1.0858, "step": 24270 }, { "epoch": 0.15511799956556738, "grad_norm": 0.9584972262382507, "learning_rate": 9.852429893413795e-05, "loss": 0.9216, "step": 24280 }, { "epoch": 0.15518188671530608, "grad_norm": 0.6010156869888306, "learning_rate": 9.852308863716311e-05, "loss": 0.8739, "step": 24290 }, { "epoch": 0.1552457738650448, "grad_norm": 0.8952304124832153, "learning_rate": 9.852187785151879e-05, "loss": 0.9147, "step": 24300 }, { "epoch": 0.1553096610147835, "grad_norm": 0.6536133885383606, "learning_rate": 9.85206665772172e-05, "loss": 0.8771, "step": 24310 }, { "epoch": 0.1553735481645222, "grad_norm": 0.7753522992134094, "learning_rate": 9.851945481427048e-05, "loss": 1.0301, "step": 24320 }, { "epoch": 0.15543743531426088, "grad_norm": 1.4516469240188599, "learning_rate": 9.851824256269092e-05, "loss": 1.0265, "step": 24330 }, { "epoch": 0.1555013224639996, "grad_norm": 0.934195339679718, "learning_rate": 9.851702982249065e-05, "loss": 0.995, "step": 24340 }, { "epoch": 0.1555652096137383, "grad_norm": 0.7957481741905212, "learning_rate": 9.851581659368192e-05, "loss": 0.8226, "step": 24350 }, { "epoch": 0.155629096763477, "grad_norm": 0.7475680708885193, "learning_rate": 9.851460287627695e-05, "loss": 1.0825, "step": 24360 }, { "epoch": 0.1556929839132157, "grad_norm": 0.65959233045578, "learning_rate": 9.851338867028797e-05, "loss": 1.1795, "step": 24370 }, { "epoch": 0.1557568710629544, "grad_norm": 0.6770491600036621, "learning_rate": 9.851217397572718e-05, "loss": 0.9308, "step": 24380 }, { "epoch": 0.1558207582126931, "grad_norm": 0.6056157946586609, "learning_rate": 9.851095879260684e-05, "loss": 1.0731, "step": 24390 }, { "epoch": 0.1558846453624318, "grad_norm": 0.8914613127708435, "learning_rate": 9.850974312093918e-05, "loss": 0.7644, "step": 24400 }, { "epoch": 0.1559485325121705, "grad_norm": 0.8289129137992859, "learning_rate": 9.850852696073643e-05, "loss": 1.1423, "step": 24410 }, { "epoch": 0.1560124196619092, "grad_norm": 1.1932592391967773, "learning_rate": 9.850731031201084e-05, "loss": 0.7908, "step": 24420 }, { "epoch": 0.1560763068116479, "grad_norm": 0.8615885376930237, "learning_rate": 9.850609317477468e-05, "loss": 0.7105, "step": 24430 }, { "epoch": 0.15614019396138662, "grad_norm": 0.647098958492279, "learning_rate": 9.85048755490402e-05, "loss": 1.0009, "step": 24440 }, { "epoch": 0.1562040811111253, "grad_norm": 0.6660744547843933, "learning_rate": 9.850365743481965e-05, "loss": 0.8714, "step": 24450 }, { "epoch": 0.156267968260864, "grad_norm": 0.84688800573349, "learning_rate": 9.850243883212531e-05, "loss": 0.942, "step": 24460 }, { "epoch": 0.1563318554106027, "grad_norm": 0.48218655586242676, "learning_rate": 9.850121974096946e-05, "loss": 1.0805, "step": 24470 }, { "epoch": 0.15639574256034142, "grad_norm": 0.9218449592590332, "learning_rate": 9.850000016136437e-05, "loss": 1.0481, "step": 24480 }, { "epoch": 0.15645962971008012, "grad_norm": 0.584633469581604, "learning_rate": 9.849878009332231e-05, "loss": 0.8474, "step": 24490 }, { "epoch": 0.15652351685981883, "grad_norm": 0.8491461873054504, "learning_rate": 9.849755953685557e-05, "loss": 0.9905, "step": 24500 }, { "epoch": 0.1565874040095575, "grad_norm": 0.961509644985199, "learning_rate": 9.849633849197649e-05, "loss": 1.1605, "step": 24510 }, { "epoch": 0.1566512911592962, "grad_norm": 0.8623896241188049, "learning_rate": 9.849511695869728e-05, "loss": 0.7161, "step": 24520 }, { "epoch": 0.15671517830903492, "grad_norm": 0.6448975205421448, "learning_rate": 9.84938949370303e-05, "loss": 0.9754, "step": 24530 }, { "epoch": 0.15677906545877363, "grad_norm": 0.5791314244270325, "learning_rate": 9.849267242698785e-05, "loss": 0.7836, "step": 24540 }, { "epoch": 0.15684295260851233, "grad_norm": 0.5874826312065125, "learning_rate": 9.849144942858224e-05, "loss": 0.8067, "step": 24550 }, { "epoch": 0.15690683975825104, "grad_norm": 0.7695150375366211, "learning_rate": 9.849022594182577e-05, "loss": 1.153, "step": 24560 }, { "epoch": 0.15697072690798972, "grad_norm": 0.7399982213973999, "learning_rate": 9.848900196673079e-05, "loss": 1.2349, "step": 24570 }, { "epoch": 0.15703461405772842, "grad_norm": 0.8517500758171082, "learning_rate": 9.848777750330961e-05, "loss": 1.01, "step": 24580 }, { "epoch": 0.15709850120746713, "grad_norm": 0.6582129001617432, "learning_rate": 9.848655255157456e-05, "loss": 0.71, "step": 24590 }, { "epoch": 0.15716238835720583, "grad_norm": 0.5711886286735535, "learning_rate": 9.848532711153797e-05, "loss": 0.9785, "step": 24600 }, { "epoch": 0.15722627550694454, "grad_norm": 0.7866716980934143, "learning_rate": 9.848410118321221e-05, "loss": 0.8093, "step": 24610 }, { "epoch": 0.15729016265668325, "grad_norm": 0.6282891631126404, "learning_rate": 9.848287476660958e-05, "loss": 0.8937, "step": 24620 }, { "epoch": 0.15735404980642193, "grad_norm": 1.6044594049453735, "learning_rate": 9.848164786174248e-05, "loss": 1.0449, "step": 24630 }, { "epoch": 0.15741793695616063, "grad_norm": 1.279166579246521, "learning_rate": 9.848042046862322e-05, "loss": 1.2909, "step": 24640 }, { "epoch": 0.15748182410589934, "grad_norm": 1.3262732028961182, "learning_rate": 9.847919258726421e-05, "loss": 0.9336, "step": 24650 }, { "epoch": 0.15754571125563804, "grad_norm": 0.7303173542022705, "learning_rate": 9.847796421767777e-05, "loss": 0.8935, "step": 24660 }, { "epoch": 0.15760959840537675, "grad_norm": 0.8746846914291382, "learning_rate": 9.84767353598763e-05, "loss": 0.8438, "step": 24670 }, { "epoch": 0.15767348555511546, "grad_norm": 1.244907259941101, "learning_rate": 9.847550601387217e-05, "loss": 0.672, "step": 24680 }, { "epoch": 0.15773737270485413, "grad_norm": 0.7882753610610962, "learning_rate": 9.847427617967775e-05, "loss": 0.8104, "step": 24690 }, { "epoch": 0.15780125985459284, "grad_norm": 0.5869142413139343, "learning_rate": 9.847304585730544e-05, "loss": 0.9445, "step": 24700 }, { "epoch": 0.15786514700433155, "grad_norm": 0.8743402361869812, "learning_rate": 9.847181504676761e-05, "loss": 1.0129, "step": 24710 }, { "epoch": 0.15792903415407025, "grad_norm": 0.8246279358863831, "learning_rate": 9.847058374807669e-05, "loss": 0.8171, "step": 24720 }, { "epoch": 0.15799292130380896, "grad_norm": 0.7410875558853149, "learning_rate": 9.846935196124504e-05, "loss": 0.9308, "step": 24730 }, { "epoch": 0.15805680845354766, "grad_norm": 0.9520349502563477, "learning_rate": 9.846811968628509e-05, "loss": 1.0484, "step": 24740 }, { "epoch": 0.15812069560328634, "grad_norm": 1.908379316329956, "learning_rate": 9.846688692320925e-05, "loss": 0.9074, "step": 24750 }, { "epoch": 0.15818458275302505, "grad_norm": 1.148059368133545, "learning_rate": 9.846565367202992e-05, "loss": 1.0573, "step": 24760 }, { "epoch": 0.15824846990276376, "grad_norm": 0.6221771836280823, "learning_rate": 9.846441993275952e-05, "loss": 0.9355, "step": 24770 }, { "epoch": 0.15831235705250246, "grad_norm": 0.7107810974121094, "learning_rate": 9.84631857054105e-05, "loss": 0.8245, "step": 24780 }, { "epoch": 0.15837624420224117, "grad_norm": 2.3203704357147217, "learning_rate": 9.846195098999527e-05, "loss": 0.7197, "step": 24790 }, { "epoch": 0.15844013135197987, "grad_norm": 0.8047979474067688, "learning_rate": 9.846071578652627e-05, "loss": 1.0095, "step": 24800 }, { "epoch": 0.15850401850171855, "grad_norm": 0.848024845123291, "learning_rate": 9.845948009501593e-05, "loss": 0.9665, "step": 24810 }, { "epoch": 0.15856790565145726, "grad_norm": 0.5435264706611633, "learning_rate": 9.845824391547671e-05, "loss": 0.7763, "step": 24820 }, { "epoch": 0.15863179280119596, "grad_norm": 0.6636167764663696, "learning_rate": 9.845700724792104e-05, "loss": 0.966, "step": 24830 }, { "epoch": 0.15869567995093467, "grad_norm": 0.9921244382858276, "learning_rate": 9.84557700923614e-05, "loss": 0.8407, "step": 24840 }, { "epoch": 0.15875956710067338, "grad_norm": 0.6068295836448669, "learning_rate": 9.845453244881022e-05, "loss": 0.7625, "step": 24850 }, { "epoch": 0.15882345425041208, "grad_norm": 0.5496127605438232, "learning_rate": 9.845329431728e-05, "loss": 0.8734, "step": 24860 }, { "epoch": 0.15888734140015076, "grad_norm": 1.1657304763793945, "learning_rate": 9.845205569778316e-05, "loss": 0.8215, "step": 24870 }, { "epoch": 0.15895122854988947, "grad_norm": 0.6050916910171509, "learning_rate": 9.845081659033221e-05, "loss": 0.7701, "step": 24880 }, { "epoch": 0.15901511569962817, "grad_norm": 0.7160899043083191, "learning_rate": 9.844957699493964e-05, "loss": 1.0013, "step": 24890 }, { "epoch": 0.15907900284936688, "grad_norm": 0.8572732210159302, "learning_rate": 9.84483369116179e-05, "loss": 0.8973, "step": 24900 }, { "epoch": 0.15914288999910559, "grad_norm": 0.8619921803474426, "learning_rate": 9.84470963403795e-05, "loss": 0.9491, "step": 24910 }, { "epoch": 0.1592067771488443, "grad_norm": 0.6899974942207336, "learning_rate": 9.844585528123692e-05, "loss": 0.8375, "step": 24920 }, { "epoch": 0.15927066429858297, "grad_norm": 0.7540447115898132, "learning_rate": 9.844461373420267e-05, "loss": 0.8525, "step": 24930 }, { "epoch": 0.15933455144832168, "grad_norm": 0.8030637502670288, "learning_rate": 9.844337169928926e-05, "loss": 0.8833, "step": 24940 }, { "epoch": 0.15939843859806038, "grad_norm": 0.8504492044448853, "learning_rate": 9.844212917650917e-05, "loss": 0.9273, "step": 24950 }, { "epoch": 0.1594623257477991, "grad_norm": 1.3353928327560425, "learning_rate": 9.844088616587493e-05, "loss": 0.8097, "step": 24960 }, { "epoch": 0.1595262128975378, "grad_norm": 1.6527575254440308, "learning_rate": 9.843964266739907e-05, "loss": 0.7699, "step": 24970 }, { "epoch": 0.1595901000472765, "grad_norm": 0.6608484387397766, "learning_rate": 9.84383986810941e-05, "loss": 0.9171, "step": 24980 }, { "epoch": 0.15965398719701518, "grad_norm": 0.8177617788314819, "learning_rate": 9.843715420697254e-05, "loss": 0.9391, "step": 24990 }, { "epoch": 0.15971787434675389, "grad_norm": 0.8526275753974915, "learning_rate": 9.843590924504696e-05, "loss": 0.9272, "step": 25000 }, { "epoch": 0.1597817614964926, "grad_norm": 0.753639817237854, "learning_rate": 9.843466379532985e-05, "loss": 0.6739, "step": 25010 }, { "epoch": 0.1598456486462313, "grad_norm": 0.8092784881591797, "learning_rate": 9.843341785783377e-05, "loss": 0.7158, "step": 25020 }, { "epoch": 0.15990953579597, "grad_norm": 1.0467857122421265, "learning_rate": 9.843217143257126e-05, "loss": 0.7562, "step": 25030 }, { "epoch": 0.1599734229457087, "grad_norm": 0.5774504542350769, "learning_rate": 9.843092451955491e-05, "loss": 0.7832, "step": 25040 }, { "epoch": 0.1600373100954474, "grad_norm": 1.2460720539093018, "learning_rate": 9.842967711879725e-05, "loss": 0.7436, "step": 25050 }, { "epoch": 0.1601011972451861, "grad_norm": 1.1241377592086792, "learning_rate": 9.842842923031084e-05, "loss": 0.7252, "step": 25060 }, { "epoch": 0.1601650843949248, "grad_norm": 0.565549910068512, "learning_rate": 9.842718085410823e-05, "loss": 0.8209, "step": 25070 }, { "epoch": 0.1602289715446635, "grad_norm": 0.5020076036453247, "learning_rate": 9.842593199020203e-05, "loss": 0.661, "step": 25080 }, { "epoch": 0.1602928586944022, "grad_norm": 0.7566838264465332, "learning_rate": 9.842480759571027e-05, "loss": 0.9231, "step": 25090 }, { "epoch": 0.16035674584414092, "grad_norm": 0.9458664655685425, "learning_rate": 9.842355780520187e-05, "loss": 0.7723, "step": 25100 }, { "epoch": 0.1604206329938796, "grad_norm": 1.0208630561828613, "learning_rate": 9.842230752702635e-05, "loss": 0.7883, "step": 25110 }, { "epoch": 0.1604845201436183, "grad_norm": 0.7197948098182678, "learning_rate": 9.84210567611963e-05, "loss": 0.8673, "step": 25120 }, { "epoch": 0.160548407293357, "grad_norm": 0.9319686889648438, "learning_rate": 9.841980550772433e-05, "loss": 0.7893, "step": 25130 }, { "epoch": 0.16061229444309572, "grad_norm": 0.8447830677032471, "learning_rate": 9.841855376662302e-05, "loss": 1.0086, "step": 25140 }, { "epoch": 0.16067618159283442, "grad_norm": 1.1380891799926758, "learning_rate": 9.841730153790499e-05, "loss": 0.7411, "step": 25150 }, { "epoch": 0.16074006874257313, "grad_norm": 0.988677442073822, "learning_rate": 9.841604882158285e-05, "loss": 0.8238, "step": 25160 }, { "epoch": 0.16080395589231183, "grad_norm": 0.6261546611785889, "learning_rate": 9.84147956176692e-05, "loss": 0.9153, "step": 25170 }, { "epoch": 0.1608678430420505, "grad_norm": 1.1242022514343262, "learning_rate": 9.841354192617667e-05, "loss": 0.8479, "step": 25180 }, { "epoch": 0.16093173019178922, "grad_norm": 0.8760757446289062, "learning_rate": 9.84122877471179e-05, "loss": 1.0954, "step": 25190 }, { "epoch": 0.16099561734152792, "grad_norm": 0.8859489560127258, "learning_rate": 9.841103308050552e-05, "loss": 0.6732, "step": 25200 }, { "epoch": 0.16105950449126663, "grad_norm": 1.3529788255691528, "learning_rate": 9.840977792635215e-05, "loss": 1.0534, "step": 25210 }, { "epoch": 0.16112339164100534, "grad_norm": 0.721413254737854, "learning_rate": 9.840852228467041e-05, "loss": 0.9705, "step": 25220 }, { "epoch": 0.16118727879074404, "grad_norm": 0.9626721739768982, "learning_rate": 9.8407266155473e-05, "loss": 0.8799, "step": 25230 }, { "epoch": 0.16125116594048272, "grad_norm": 0.5856235027313232, "learning_rate": 9.840600953877253e-05, "loss": 1.2152, "step": 25240 }, { "epoch": 0.16131505309022143, "grad_norm": 1.074049711227417, "learning_rate": 9.840475243458167e-05, "loss": 0.8506, "step": 25250 }, { "epoch": 0.16137894023996013, "grad_norm": 0.7193922400474548, "learning_rate": 9.840349484291308e-05, "loss": 0.7899, "step": 25260 }, { "epoch": 0.16144282738969884, "grad_norm": 1.0390762090682983, "learning_rate": 9.840223676377942e-05, "loss": 0.9389, "step": 25270 }, { "epoch": 0.16150671453943755, "grad_norm": 1.7726080417633057, "learning_rate": 9.840097819719336e-05, "loss": 0.9474, "step": 25280 }, { "epoch": 0.16157060168917625, "grad_norm": 0.6403753757476807, "learning_rate": 9.839971914316757e-05, "loss": 0.8837, "step": 25290 }, { "epoch": 0.16163448883891493, "grad_norm": 0.8878451585769653, "learning_rate": 9.839845960171475e-05, "loss": 0.9911, "step": 25300 }, { "epoch": 0.16169837598865364, "grad_norm": 0.9376581907272339, "learning_rate": 9.839719957284756e-05, "loss": 1.0247, "step": 25310 }, { "epoch": 0.16176226313839234, "grad_norm": 0.6702033877372742, "learning_rate": 9.839593905657871e-05, "loss": 0.9453, "step": 25320 }, { "epoch": 0.16182615028813105, "grad_norm": 0.4987049400806427, "learning_rate": 9.839467805292089e-05, "loss": 0.7227, "step": 25330 }, { "epoch": 0.16189003743786975, "grad_norm": 0.6735382080078125, "learning_rate": 9.839341656188677e-05, "loss": 0.8046, "step": 25340 }, { "epoch": 0.16195392458760846, "grad_norm": 0.8256925344467163, "learning_rate": 9.839215458348909e-05, "loss": 1.342, "step": 25350 }, { "epoch": 0.16201781173734714, "grad_norm": 1.0099321603775024, "learning_rate": 9.839089211774056e-05, "loss": 0.9079, "step": 25360 }, { "epoch": 0.16208169888708585, "grad_norm": 0.9464432597160339, "learning_rate": 9.838962916465388e-05, "loss": 1.0935, "step": 25370 }, { "epoch": 0.16214558603682455, "grad_norm": 1.0927412509918213, "learning_rate": 9.838836572424176e-05, "loss": 0.6795, "step": 25380 }, { "epoch": 0.16220947318656326, "grad_norm": 0.6880885362625122, "learning_rate": 9.838710179651694e-05, "loss": 0.9407, "step": 25390 }, { "epoch": 0.16227336033630196, "grad_norm": 0.9150338768959045, "learning_rate": 9.838583738149215e-05, "loss": 0.9107, "step": 25400 }, { "epoch": 0.16233724748604067, "grad_norm": 1.087501049041748, "learning_rate": 9.838457247918012e-05, "loss": 0.7319, "step": 25410 }, { "epoch": 0.16240113463577935, "grad_norm": 0.7410935163497925, "learning_rate": 9.838330708959358e-05, "loss": 0.755, "step": 25420 }, { "epoch": 0.16246502178551805, "grad_norm": 0.7320923209190369, "learning_rate": 9.838204121274527e-05, "loss": 0.9022, "step": 25430 }, { "epoch": 0.16252890893525676, "grad_norm": 0.7874162793159485, "learning_rate": 9.838077484864796e-05, "loss": 0.8658, "step": 25440 }, { "epoch": 0.16259279608499547, "grad_norm": 0.6988115310668945, "learning_rate": 9.83795079973144e-05, "loss": 0.8592, "step": 25450 }, { "epoch": 0.16265668323473417, "grad_norm": 1.1536266803741455, "learning_rate": 9.837824065875733e-05, "loss": 0.9147, "step": 25460 }, { "epoch": 0.16272057038447288, "grad_norm": 0.8450478911399841, "learning_rate": 9.837697283298952e-05, "loss": 0.8379, "step": 25470 }, { "epoch": 0.16278445753421156, "grad_norm": 0.545207679271698, "learning_rate": 9.837570452002375e-05, "loss": 0.8029, "step": 25480 }, { "epoch": 0.16284834468395026, "grad_norm": 0.8712167739868164, "learning_rate": 9.837443571987277e-05, "loss": 0.9546, "step": 25490 }, { "epoch": 0.16291223183368897, "grad_norm": 1.3103466033935547, "learning_rate": 9.837316643254938e-05, "loss": 0.8578, "step": 25500 }, { "epoch": 0.16297611898342768, "grad_norm": 0.6110614538192749, "learning_rate": 9.837189665806637e-05, "loss": 0.9893, "step": 25510 }, { "epoch": 0.16304000613316638, "grad_norm": 0.8567008972167969, "learning_rate": 9.83706263964365e-05, "loss": 0.7717, "step": 25520 }, { "epoch": 0.1631038932829051, "grad_norm": 0.844247579574585, "learning_rate": 9.836935564767257e-05, "loss": 1.0621, "step": 25530 }, { "epoch": 0.16316778043264377, "grad_norm": 0.8914816379547119, "learning_rate": 9.836808441178739e-05, "loss": 0.8287, "step": 25540 }, { "epoch": 0.16323166758238247, "grad_norm": 0.6251090168952942, "learning_rate": 9.836681268879377e-05, "loss": 1.051, "step": 25550 }, { "epoch": 0.16329555473212118, "grad_norm": 0.6964147090911865, "learning_rate": 9.836554047870447e-05, "loss": 0.9595, "step": 25560 }, { "epoch": 0.16335944188185988, "grad_norm": 0.5562779307365417, "learning_rate": 9.836426778153236e-05, "loss": 0.8304, "step": 25570 }, { "epoch": 0.1634233290315986, "grad_norm": 0.6539714336395264, "learning_rate": 9.836299459729023e-05, "loss": 0.9026, "step": 25580 }, { "epoch": 0.1634872161813373, "grad_norm": 0.789167582988739, "learning_rate": 9.836172092599089e-05, "loss": 0.806, "step": 25590 }, { "epoch": 0.16355110333107598, "grad_norm": 0.7832333445549011, "learning_rate": 9.83604467676472e-05, "loss": 0.8309, "step": 25600 }, { "epoch": 0.16361499048081468, "grad_norm": 0.9938201308250427, "learning_rate": 9.835917212227197e-05, "loss": 0.9, "step": 25610 }, { "epoch": 0.1636788776305534, "grad_norm": 0.7347666621208191, "learning_rate": 9.835789698987802e-05, "loss": 0.7665, "step": 25620 }, { "epoch": 0.1637427647802921, "grad_norm": 0.7416117191314697, "learning_rate": 9.835662137047824e-05, "loss": 0.8239, "step": 25630 }, { "epoch": 0.1638066519300308, "grad_norm": 0.6439573764801025, "learning_rate": 9.835534526408543e-05, "loss": 0.9106, "step": 25640 }, { "epoch": 0.1638705390797695, "grad_norm": 1.0646562576293945, "learning_rate": 9.835406867071247e-05, "loss": 0.7518, "step": 25650 }, { "epoch": 0.16393442622950818, "grad_norm": 1.135383129119873, "learning_rate": 9.83527915903722e-05, "loss": 0.723, "step": 25660 }, { "epoch": 0.1639983133792469, "grad_norm": 0.9141467213630676, "learning_rate": 9.83515140230775e-05, "loss": 1.104, "step": 25670 }, { "epoch": 0.1640622005289856, "grad_norm": 0.7846889495849609, "learning_rate": 9.83502359688412e-05, "loss": 0.8969, "step": 25680 }, { "epoch": 0.1641260876787243, "grad_norm": 0.8037777543067932, "learning_rate": 9.834895742767622e-05, "loss": 0.9751, "step": 25690 }, { "epoch": 0.164189974828463, "grad_norm": 1.0449095964431763, "learning_rate": 9.83476783995954e-05, "loss": 1.0799, "step": 25700 }, { "epoch": 0.16425386197820172, "grad_norm": 0.6123198866844177, "learning_rate": 9.834639888461162e-05, "loss": 0.8884, "step": 25710 }, { "epoch": 0.1643177491279404, "grad_norm": 0.7933758497238159, "learning_rate": 9.834511888273778e-05, "loss": 0.9816, "step": 25720 }, { "epoch": 0.1643816362776791, "grad_norm": 1.233192801475525, "learning_rate": 9.83438383939868e-05, "loss": 0.9493, "step": 25730 }, { "epoch": 0.1644455234274178, "grad_norm": 0.9002760052680969, "learning_rate": 9.834255741837151e-05, "loss": 1.0682, "step": 25740 }, { "epoch": 0.1645094105771565, "grad_norm": 0.6131082773208618, "learning_rate": 9.834127595590485e-05, "loss": 0.964, "step": 25750 }, { "epoch": 0.16457329772689522, "grad_norm": 1.53384530544281, "learning_rate": 9.833999400659972e-05, "loss": 0.9393, "step": 25760 }, { "epoch": 0.16463718487663392, "grad_norm": 0.8691433072090149, "learning_rate": 9.833871157046904e-05, "loss": 0.708, "step": 25770 }, { "epoch": 0.1647010720263726, "grad_norm": 0.6749919652938843, "learning_rate": 9.833742864752571e-05, "loss": 1.1174, "step": 25780 }, { "epoch": 0.1647649591761113, "grad_norm": 0.6683396100997925, "learning_rate": 9.833614523778266e-05, "loss": 0.8302, "step": 25790 }, { "epoch": 0.16482884632585001, "grad_norm": 0.8051975965499878, "learning_rate": 9.833486134125281e-05, "loss": 1.2393, "step": 25800 }, { "epoch": 0.16489273347558872, "grad_norm": 0.6575607657432556, "learning_rate": 9.833357695794909e-05, "loss": 1.0257, "step": 25810 }, { "epoch": 0.16495662062532743, "grad_norm": 0.9496917128562927, "learning_rate": 9.833229208788443e-05, "loss": 0.8261, "step": 25820 }, { "epoch": 0.16502050777506613, "grad_norm": 0.7231150269508362, "learning_rate": 9.833100673107179e-05, "loss": 0.6341, "step": 25830 }, { "epoch": 0.1650843949248048, "grad_norm": 1.2418237924575806, "learning_rate": 9.832972088752407e-05, "loss": 0.803, "step": 25840 }, { "epoch": 0.16514828207454352, "grad_norm": 0.6519736051559448, "learning_rate": 9.832843455725427e-05, "loss": 0.918, "step": 25850 }, { "epoch": 0.16521216922428222, "grad_norm": 0.6396727561950684, "learning_rate": 9.832714774027534e-05, "loss": 1.0144, "step": 25860 }, { "epoch": 0.16527605637402093, "grad_norm": 1.0266163349151611, "learning_rate": 9.832586043660019e-05, "loss": 0.7874, "step": 25870 }, { "epoch": 0.16533994352375964, "grad_norm": 0.9573850035667419, "learning_rate": 9.832457264624184e-05, "loss": 0.8346, "step": 25880 }, { "epoch": 0.16540383067349834, "grad_norm": 0.7382820844650269, "learning_rate": 9.832328436921324e-05, "loss": 0.7884, "step": 25890 }, { "epoch": 0.16546771782323702, "grad_norm": 0.8257744908332825, "learning_rate": 9.832199560552734e-05, "loss": 0.9137, "step": 25900 }, { "epoch": 0.16553160497297573, "grad_norm": 0.4377366006374359, "learning_rate": 9.832070635519715e-05, "loss": 0.7715, "step": 25910 }, { "epoch": 0.16559549212271443, "grad_norm": 0.6788588166236877, "learning_rate": 9.831941661823564e-05, "loss": 0.8829, "step": 25920 }, { "epoch": 0.16565937927245314, "grad_norm": 0.7223168611526489, "learning_rate": 9.831812639465581e-05, "loss": 0.9969, "step": 25930 }, { "epoch": 0.16572326642219185, "grad_norm": 0.5885007977485657, "learning_rate": 9.831683568447064e-05, "loss": 0.8589, "step": 25940 }, { "epoch": 0.16578715357193055, "grad_norm": 0.4553689956665039, "learning_rate": 9.831554448769314e-05, "loss": 1.0332, "step": 25950 }, { "epoch": 0.16585104072166926, "grad_norm": 0.8313513398170471, "learning_rate": 9.831425280433631e-05, "loss": 0.8301, "step": 25960 }, { "epoch": 0.16591492787140794, "grad_norm": 0.8566281795501709, "learning_rate": 9.831296063441315e-05, "loss": 0.8196, "step": 25970 }, { "epoch": 0.16597881502114664, "grad_norm": 0.9477049708366394, "learning_rate": 9.831166797793668e-05, "loss": 1.0331, "step": 25980 }, { "epoch": 0.16604270217088535, "grad_norm": 0.9263975620269775, "learning_rate": 9.831037483491991e-05, "loss": 1.0746, "step": 25990 }, { "epoch": 0.16610658932062405, "grad_norm": 0.5900850296020508, "learning_rate": 9.83090812053759e-05, "loss": 0.9527, "step": 26000 }, { "epoch": 0.16617047647036276, "grad_norm": 1.1755515336990356, "learning_rate": 9.830778708931762e-05, "loss": 0.8315, "step": 26010 }, { "epoch": 0.16623436362010147, "grad_norm": 0.9088423848152161, "learning_rate": 9.830649248675814e-05, "loss": 0.9244, "step": 26020 }, { "epoch": 0.16629825076984014, "grad_norm": 0.7324301600456238, "learning_rate": 9.83051973977105e-05, "loss": 0.904, "step": 26030 }, { "epoch": 0.16636213791957885, "grad_norm": 0.8652370572090149, "learning_rate": 9.830390182218771e-05, "loss": 0.9646, "step": 26040 }, { "epoch": 0.16642602506931756, "grad_norm": 1.0668914318084717, "learning_rate": 9.830260576020286e-05, "loss": 1.0844, "step": 26050 }, { "epoch": 0.16648991221905626, "grad_norm": 1.8865516185760498, "learning_rate": 9.830130921176898e-05, "loss": 0.9959, "step": 26060 }, { "epoch": 0.16655379936879497, "grad_norm": 0.7931020855903625, "learning_rate": 9.830001217689913e-05, "loss": 0.8263, "step": 26070 }, { "epoch": 0.16661768651853368, "grad_norm": 1.674660325050354, "learning_rate": 9.829871465560637e-05, "loss": 0.8527, "step": 26080 }, { "epoch": 0.16668157366827235, "grad_norm": 1.0010359287261963, "learning_rate": 9.829741664790376e-05, "loss": 0.7847, "step": 26090 }, { "epoch": 0.16674546081801106, "grad_norm": 1.0310410261154175, "learning_rate": 9.829611815380439e-05, "loss": 0.8471, "step": 26100 }, { "epoch": 0.16680934796774977, "grad_norm": 0.8554787039756775, "learning_rate": 9.829481917332132e-05, "loss": 0.8849, "step": 26110 }, { "epoch": 0.16687323511748847, "grad_norm": 1.3090757131576538, "learning_rate": 9.829351970646764e-05, "loss": 0.921, "step": 26120 }, { "epoch": 0.16693712226722718, "grad_norm": 1.068739414215088, "learning_rate": 9.829221975325644e-05, "loss": 0.9898, "step": 26130 }, { "epoch": 0.16700100941696588, "grad_norm": 0.8239749073982239, "learning_rate": 9.829091931370082e-05, "loss": 1.1161, "step": 26140 }, { "epoch": 0.16706489656670456, "grad_norm": 1.0234822034835815, "learning_rate": 9.828961838781385e-05, "loss": 1.0181, "step": 26150 }, { "epoch": 0.16712878371644327, "grad_norm": 1.0589160919189453, "learning_rate": 9.828831697560865e-05, "loss": 1.0243, "step": 26160 }, { "epoch": 0.16719267086618197, "grad_norm": 0.8593624234199524, "learning_rate": 9.828701507709832e-05, "loss": 1.3933, "step": 26170 }, { "epoch": 0.16725655801592068, "grad_norm": 1.0242136716842651, "learning_rate": 9.828571269229598e-05, "loss": 0.9601, "step": 26180 }, { "epoch": 0.1673204451656594, "grad_norm": 0.47015684843063354, "learning_rate": 9.828440982121473e-05, "loss": 1.2651, "step": 26190 }, { "epoch": 0.1673843323153981, "grad_norm": 0.9608283042907715, "learning_rate": 9.828310646386772e-05, "loss": 0.7508, "step": 26200 }, { "epoch": 0.16744821946513677, "grad_norm": 0.8850206136703491, "learning_rate": 9.828180262026805e-05, "loss": 0.9822, "step": 26210 }, { "epoch": 0.16751210661487548, "grad_norm": 1.7484781742095947, "learning_rate": 9.828049829042884e-05, "loss": 0.9558, "step": 26220 }, { "epoch": 0.16757599376461418, "grad_norm": 0.625224769115448, "learning_rate": 9.827919347436328e-05, "loss": 0.8881, "step": 26230 }, { "epoch": 0.1676398809143529, "grad_norm": 0.576524555683136, "learning_rate": 9.827788817208444e-05, "loss": 0.9399, "step": 26240 }, { "epoch": 0.1677037680640916, "grad_norm": 1.0603713989257812, "learning_rate": 9.827658238360553e-05, "loss": 0.8588, "step": 26250 }, { "epoch": 0.1677676552138303, "grad_norm": 0.7877979278564453, "learning_rate": 9.827527610893964e-05, "loss": 0.8973, "step": 26260 }, { "epoch": 0.16783154236356898, "grad_norm": 1.2610846757888794, "learning_rate": 9.827396934809997e-05, "loss": 0.7684, "step": 26270 }, { "epoch": 0.1678954295133077, "grad_norm": 0.49026232957839966, "learning_rate": 9.827266210109967e-05, "loss": 1.061, "step": 26280 }, { "epoch": 0.1679593166630464, "grad_norm": 1.0687637329101562, "learning_rate": 9.827135436795189e-05, "loss": 0.8798, "step": 26290 }, { "epoch": 0.1680232038127851, "grad_norm": 0.9565626978874207, "learning_rate": 9.827004614866981e-05, "loss": 0.8781, "step": 26300 }, { "epoch": 0.1680870909625238, "grad_norm": 1.148451566696167, "learning_rate": 9.826873744326661e-05, "loss": 0.7915, "step": 26310 }, { "epoch": 0.1681509781122625, "grad_norm": 0.6154188513755798, "learning_rate": 9.826742825175547e-05, "loss": 0.8317, "step": 26320 }, { "epoch": 0.1682148652620012, "grad_norm": 0.9438403844833374, "learning_rate": 9.826611857414957e-05, "loss": 0.8347, "step": 26330 }, { "epoch": 0.1682787524117399, "grad_norm": 0.6729276776313782, "learning_rate": 9.82648084104621e-05, "loss": 0.969, "step": 26340 }, { "epoch": 0.1683426395614786, "grad_norm": 0.6888546943664551, "learning_rate": 9.826349776070625e-05, "loss": 1.0223, "step": 26350 }, { "epoch": 0.1684065267112173, "grad_norm": 0.8470525741577148, "learning_rate": 9.826218662489521e-05, "loss": 0.8919, "step": 26360 }, { "epoch": 0.16847041386095601, "grad_norm": 0.653862714767456, "learning_rate": 9.826087500304222e-05, "loss": 1.0743, "step": 26370 }, { "epoch": 0.16853430101069472, "grad_norm": 0.7015219926834106, "learning_rate": 9.825956289516046e-05, "loss": 1.1053, "step": 26380 }, { "epoch": 0.1685981881604334, "grad_norm": 1.116733193397522, "learning_rate": 9.825825030126315e-05, "loss": 1.199, "step": 26390 }, { "epoch": 0.1686620753101721, "grad_norm": 0.8197908401489258, "learning_rate": 9.825693722136351e-05, "loss": 0.9155, "step": 26400 }, { "epoch": 0.1687259624599108, "grad_norm": 0.9840227365493774, "learning_rate": 9.825562365547477e-05, "loss": 0.9655, "step": 26410 }, { "epoch": 0.16878984960964952, "grad_norm": 0.6856445074081421, "learning_rate": 9.825430960361015e-05, "loss": 0.7135, "step": 26420 }, { "epoch": 0.16885373675938822, "grad_norm": 1.0108433961868286, "learning_rate": 9.825299506578288e-05, "loss": 1.1918, "step": 26430 }, { "epoch": 0.16891762390912693, "grad_norm": 0.7306868433952332, "learning_rate": 9.82516800420062e-05, "loss": 0.9422, "step": 26440 }, { "epoch": 0.1689815110588656, "grad_norm": 0.8072736859321594, "learning_rate": 9.825036453229336e-05, "loss": 0.8563, "step": 26450 }, { "epoch": 0.1690453982086043, "grad_norm": 0.9829355478286743, "learning_rate": 9.824904853665764e-05, "loss": 1.2103, "step": 26460 }, { "epoch": 0.16910928535834302, "grad_norm": 0.7728550434112549, "learning_rate": 9.824773205511222e-05, "loss": 0.986, "step": 26470 }, { "epoch": 0.16917317250808173, "grad_norm": 0.7675033211708069, "learning_rate": 9.824641508767042e-05, "loss": 1.1175, "step": 26480 }, { "epoch": 0.16923705965782043, "grad_norm": 1.7961393594741821, "learning_rate": 9.824509763434548e-05, "loss": 0.8564, "step": 26490 }, { "epoch": 0.16930094680755914, "grad_norm": 0.782536506652832, "learning_rate": 9.824377969515065e-05, "loss": 1.0492, "step": 26500 }, { "epoch": 0.16936483395729782, "grad_norm": 0.6397303342819214, "learning_rate": 9.824246127009924e-05, "loss": 0.922, "step": 26510 }, { "epoch": 0.16942872110703652, "grad_norm": 0.7447948455810547, "learning_rate": 9.82411423592045e-05, "loss": 0.9926, "step": 26520 }, { "epoch": 0.16949260825677523, "grad_norm": 0.7400467991828918, "learning_rate": 9.823982296247972e-05, "loss": 1.0191, "step": 26530 }, { "epoch": 0.16955649540651394, "grad_norm": 0.6189865469932556, "learning_rate": 9.82385030799382e-05, "loss": 1.0518, "step": 26540 }, { "epoch": 0.16962038255625264, "grad_norm": 0.8793081641197205, "learning_rate": 9.823718271159321e-05, "loss": 0.8839, "step": 26550 }, { "epoch": 0.16968426970599135, "grad_norm": 0.6479794979095459, "learning_rate": 9.823586185745808e-05, "loss": 1.1906, "step": 26560 }, { "epoch": 0.16974815685573003, "grad_norm": 0.9083991646766663, "learning_rate": 9.823454051754605e-05, "loss": 0.8276, "step": 26570 }, { "epoch": 0.16981204400546873, "grad_norm": 0.7456206679344177, "learning_rate": 9.823321869187051e-05, "loss": 1.6253, "step": 26580 }, { "epoch": 0.16987593115520744, "grad_norm": 0.7797310948371887, "learning_rate": 9.823189638044473e-05, "loss": 0.9139, "step": 26590 }, { "epoch": 0.16993981830494614, "grad_norm": 0.6889947056770325, "learning_rate": 9.8230573583282e-05, "loss": 0.8341, "step": 26600 }, { "epoch": 0.17000370545468485, "grad_norm": 0.9696791172027588, "learning_rate": 9.822925030039567e-05, "loss": 0.8444, "step": 26610 }, { "epoch": 0.17006759260442356, "grad_norm": 0.5877872705459595, "learning_rate": 9.822792653179908e-05, "loss": 1.012, "step": 26620 }, { "epoch": 0.17013147975416223, "grad_norm": 0.7431389093399048, "learning_rate": 9.822660227750554e-05, "loss": 0.7642, "step": 26630 }, { "epoch": 0.17019536690390094, "grad_norm": 0.7920153737068176, "learning_rate": 9.822527753752839e-05, "loss": 0.8715, "step": 26640 }, { "epoch": 0.17025925405363965, "grad_norm": 0.8526118397712708, "learning_rate": 9.822395231188099e-05, "loss": 0.816, "step": 26650 }, { "epoch": 0.17032314120337835, "grad_norm": 0.8121978640556335, "learning_rate": 9.822262660057666e-05, "loss": 0.9923, "step": 26660 }, { "epoch": 0.17038702835311706, "grad_norm": 1.0887260437011719, "learning_rate": 9.822130040362875e-05, "loss": 0.9544, "step": 26670 }, { "epoch": 0.17045091550285577, "grad_norm": 0.5011045336723328, "learning_rate": 9.821997372105065e-05, "loss": 0.7011, "step": 26680 }, { "epoch": 0.17051480265259444, "grad_norm": 1.1520075798034668, "learning_rate": 9.821864655285569e-05, "loss": 0.963, "step": 26690 }, { "epoch": 0.17057868980233315, "grad_norm": 0.7860487699508667, "learning_rate": 9.821731889905722e-05, "loss": 0.8835, "step": 26700 }, { "epoch": 0.17064257695207186, "grad_norm": 0.7170895934104919, "learning_rate": 9.821599075966868e-05, "loss": 0.8771, "step": 26710 }, { "epoch": 0.17070646410181056, "grad_norm": 1.2707265615463257, "learning_rate": 9.821466213470337e-05, "loss": 1.134, "step": 26720 }, { "epoch": 0.17077035125154927, "grad_norm": 0.8907286524772644, "learning_rate": 9.82133330241747e-05, "loss": 0.8502, "step": 26730 }, { "epoch": 0.17083423840128797, "grad_norm": 0.6497828960418701, "learning_rate": 9.821200342809606e-05, "loss": 1.2541, "step": 26740 }, { "epoch": 0.17089812555102665, "grad_norm": 0.7036256194114685, "learning_rate": 9.821067334648084e-05, "loss": 0.9958, "step": 26750 }, { "epoch": 0.17096201270076536, "grad_norm": 0.8180755376815796, "learning_rate": 9.820934277934243e-05, "loss": 1.0885, "step": 26760 }, { "epoch": 0.17102589985050407, "grad_norm": 0.9146037697792053, "learning_rate": 9.820801172669425e-05, "loss": 0.8732, "step": 26770 }, { "epoch": 0.17108978700024277, "grad_norm": 0.9962326884269714, "learning_rate": 9.820668018854966e-05, "loss": 0.711, "step": 26780 }, { "epoch": 0.17115367414998148, "grad_norm": 0.9202134609222412, "learning_rate": 9.82053481649221e-05, "loss": 1.0103, "step": 26790 }, { "epoch": 0.17121756129972018, "grad_norm": 1.4406436681747437, "learning_rate": 9.820401565582498e-05, "loss": 1.1804, "step": 26800 }, { "epoch": 0.1712814484494589, "grad_norm": 0.8345924019813538, "learning_rate": 9.820268266127173e-05, "loss": 0.7762, "step": 26810 }, { "epoch": 0.17134533559919757, "grad_norm": 1.7090119123458862, "learning_rate": 9.820134918127576e-05, "loss": 0.867, "step": 26820 }, { "epoch": 0.17140922274893627, "grad_norm": 0.7631736397743225, "learning_rate": 9.82000152158505e-05, "loss": 1.1823, "step": 26830 }, { "epoch": 0.17147310989867498, "grad_norm": 0.5878487825393677, "learning_rate": 9.81986807650094e-05, "loss": 0.8455, "step": 26840 }, { "epoch": 0.1715369970484137, "grad_norm": 1.0740894079208374, "learning_rate": 9.819734582876587e-05, "loss": 0.8497, "step": 26850 }, { "epoch": 0.1716008841981524, "grad_norm": 0.8244208097457886, "learning_rate": 9.819601040713337e-05, "loss": 0.7606, "step": 26860 }, { "epoch": 0.1716647713478911, "grad_norm": 0.9550793170928955, "learning_rate": 9.819467450012536e-05, "loss": 0.8171, "step": 26870 }, { "epoch": 0.17172865849762978, "grad_norm": 0.7170982360839844, "learning_rate": 9.819333810775528e-05, "loss": 0.823, "step": 26880 }, { "epoch": 0.17179254564736848, "grad_norm": 0.8252397775650024, "learning_rate": 9.81920012300366e-05, "loss": 0.7653, "step": 26890 }, { "epoch": 0.1718564327971072, "grad_norm": 1.1286877393722534, "learning_rate": 9.819066386698277e-05, "loss": 0.991, "step": 26900 }, { "epoch": 0.1719203199468459, "grad_norm": 0.7603797912597656, "learning_rate": 9.818932601860727e-05, "loss": 0.9141, "step": 26910 }, { "epoch": 0.1719842070965846, "grad_norm": 0.7588580250740051, "learning_rate": 9.818798768492354e-05, "loss": 0.8255, "step": 26920 }, { "epoch": 0.1720480942463233, "grad_norm": 0.9968806505203247, "learning_rate": 9.81866488659451e-05, "loss": 0.6958, "step": 26930 }, { "epoch": 0.17211198139606199, "grad_norm": 0.7764785885810852, "learning_rate": 9.818530956168543e-05, "loss": 1.1488, "step": 26940 }, { "epoch": 0.1721758685458007, "grad_norm": 0.6332468390464783, "learning_rate": 9.818396977215801e-05, "loss": 0.6837, "step": 26950 }, { "epoch": 0.1722397556955394, "grad_norm": 0.8513321876525879, "learning_rate": 9.818262949737632e-05, "loss": 0.7871, "step": 26960 }, { "epoch": 0.1723036428452781, "grad_norm": 0.6733559370040894, "learning_rate": 9.818128873735386e-05, "loss": 0.8591, "step": 26970 }, { "epoch": 0.1723675299950168, "grad_norm": 1.0465015172958374, "learning_rate": 9.817994749210415e-05, "loss": 0.8665, "step": 26980 }, { "epoch": 0.17243141714475552, "grad_norm": 0.6963700652122498, "learning_rate": 9.817860576164069e-05, "loss": 0.8684, "step": 26990 }, { "epoch": 0.1724953042944942, "grad_norm": 1.0664374828338623, "learning_rate": 9.817726354597699e-05, "loss": 0.6893, "step": 27000 }, { "epoch": 0.1725591914442329, "grad_norm": 0.7583040595054626, "learning_rate": 9.817592084512655e-05, "loss": 0.9267, "step": 27010 }, { "epoch": 0.1726230785939716, "grad_norm": 0.8282020092010498, "learning_rate": 9.817457765910292e-05, "loss": 0.7665, "step": 27020 }, { "epoch": 0.1726869657437103, "grad_norm": 0.8650298118591309, "learning_rate": 9.817323398791961e-05, "loss": 1.0732, "step": 27030 }, { "epoch": 0.17275085289344902, "grad_norm": 0.5665771961212158, "learning_rate": 9.817188983159016e-05, "loss": 0.92, "step": 27040 }, { "epoch": 0.17281474004318773, "grad_norm": 1.4481645822525024, "learning_rate": 9.817054519012811e-05, "loss": 0.8976, "step": 27050 }, { "epoch": 0.1728786271929264, "grad_norm": 0.7741625308990479, "learning_rate": 9.8169200063547e-05, "loss": 1.0959, "step": 27060 }, { "epoch": 0.1729425143426651, "grad_norm": 0.7932523488998413, "learning_rate": 9.816785445186036e-05, "loss": 0.9241, "step": 27070 }, { "epoch": 0.17300640149240382, "grad_norm": 0.6542154550552368, "learning_rate": 9.816650835508177e-05, "loss": 0.9807, "step": 27080 }, { "epoch": 0.17307028864214252, "grad_norm": 0.7726758718490601, "learning_rate": 9.816516177322477e-05, "loss": 0.8918, "step": 27090 }, { "epoch": 0.17313417579188123, "grad_norm": 0.8398792743682861, "learning_rate": 9.81638147063029e-05, "loss": 1.1571, "step": 27100 }, { "epoch": 0.17319806294161993, "grad_norm": 3.0609123706817627, "learning_rate": 9.816246715432977e-05, "loss": 1.0103, "step": 27110 }, { "epoch": 0.1732619500913586, "grad_norm": 0.8899956941604614, "learning_rate": 9.816111911731892e-05, "loss": 0.878, "step": 27120 }, { "epoch": 0.17332583724109732, "grad_norm": 1.076644778251648, "learning_rate": 9.815977059528393e-05, "loss": 1.0136, "step": 27130 }, { "epoch": 0.17338972439083603, "grad_norm": 2.1969175338745117, "learning_rate": 9.81584215882384e-05, "loss": 0.7375, "step": 27140 }, { "epoch": 0.17345361154057473, "grad_norm": 0.9302259087562561, "learning_rate": 9.815707209619589e-05, "loss": 0.802, "step": 27150 }, { "epoch": 0.17351749869031344, "grad_norm": 0.6798985004425049, "learning_rate": 9.815572211917001e-05, "loss": 0.7363, "step": 27160 }, { "epoch": 0.17358138584005214, "grad_norm": 0.7445381879806519, "learning_rate": 9.815437165717435e-05, "loss": 1.024, "step": 27170 }, { "epoch": 0.17364527298979082, "grad_norm": 0.7571766972541809, "learning_rate": 9.81530207102225e-05, "loss": 0.8216, "step": 27180 }, { "epoch": 0.17370916013952953, "grad_norm": 1.2653508186340332, "learning_rate": 9.815166927832809e-05, "loss": 0.8769, "step": 27190 }, { "epoch": 0.17377304728926823, "grad_norm": 1.0241389274597168, "learning_rate": 9.815031736150468e-05, "loss": 0.8065, "step": 27200 }, { "epoch": 0.17383693443900694, "grad_norm": 0.6065948605537415, "learning_rate": 9.814896495976595e-05, "loss": 0.8726, "step": 27210 }, { "epoch": 0.17390082158874565, "grad_norm": 0.7081197500228882, "learning_rate": 9.814761207312547e-05, "loss": 0.9101, "step": 27220 }, { "epoch": 0.17396470873848435, "grad_norm": 1.0318403244018555, "learning_rate": 9.814625870159688e-05, "loss": 0.9142, "step": 27230 }, { "epoch": 0.17402859588822303, "grad_norm": 1.1322126388549805, "learning_rate": 9.814490484519384e-05, "loss": 0.8966, "step": 27240 }, { "epoch": 0.17409248303796174, "grad_norm": 1.0569275617599487, "learning_rate": 9.814355050392993e-05, "loss": 0.8479, "step": 27250 }, { "epoch": 0.17415637018770044, "grad_norm": 0.6752243041992188, "learning_rate": 9.814219567781882e-05, "loss": 0.8054, "step": 27260 }, { "epoch": 0.17422025733743915, "grad_norm": 2.970486640930176, "learning_rate": 9.814084036687417e-05, "loss": 0.7318, "step": 27270 }, { "epoch": 0.17428414448717786, "grad_norm": 1.1387560367584229, "learning_rate": 9.813948457110957e-05, "loss": 0.7659, "step": 27280 }, { "epoch": 0.17434803163691656, "grad_norm": 0.7417890429496765, "learning_rate": 9.813812829053874e-05, "loss": 0.7819, "step": 27290 }, { "epoch": 0.17441191878665524, "grad_norm": 1.0214507579803467, "learning_rate": 9.813677152517533e-05, "loss": 0.8779, "step": 27300 }, { "epoch": 0.17447580593639395, "grad_norm": 0.9005577564239502, "learning_rate": 9.813541427503296e-05, "loss": 0.826, "step": 27310 }, { "epoch": 0.17453969308613265, "grad_norm": 0.5254817008972168, "learning_rate": 9.813405654012533e-05, "loss": 0.7745, "step": 27320 }, { "epoch": 0.17460358023587136, "grad_norm": 0.8588125109672546, "learning_rate": 9.813269832046612e-05, "loss": 0.8896, "step": 27330 }, { "epoch": 0.17466746738561006, "grad_norm": 0.9681766033172607, "learning_rate": 9.813133961606899e-05, "loss": 0.9978, "step": 27340 }, { "epoch": 0.17473135453534877, "grad_norm": 0.6579704880714417, "learning_rate": 9.812998042694762e-05, "loss": 0.9591, "step": 27350 }, { "epoch": 0.17479524168508745, "grad_norm": 1.3134688138961792, "learning_rate": 9.812862075311572e-05, "loss": 0.9493, "step": 27360 }, { "epoch": 0.17485912883482616, "grad_norm": 1.0650473833084106, "learning_rate": 9.812726059458697e-05, "loss": 0.6251, "step": 27370 }, { "epoch": 0.17492301598456486, "grad_norm": 0.9300364851951599, "learning_rate": 9.812589995137507e-05, "loss": 0.8485, "step": 27380 }, { "epoch": 0.17498690313430357, "grad_norm": 0.8550617098808289, "learning_rate": 9.812453882349373e-05, "loss": 0.9799, "step": 27390 }, { "epoch": 0.17505079028404227, "grad_norm": 1.0517045259475708, "learning_rate": 9.812317721095662e-05, "loss": 1.092, "step": 27400 }, { "epoch": 0.17511467743378098, "grad_norm": 0.8268793821334839, "learning_rate": 9.812181511377752e-05, "loss": 0.8651, "step": 27410 }, { "epoch": 0.17517856458351966, "grad_norm": 1.0271008014678955, "learning_rate": 9.81204525319701e-05, "loss": 1.0251, "step": 27420 }, { "epoch": 0.17524245173325836, "grad_norm": 1.1085052490234375, "learning_rate": 9.811908946554809e-05, "loss": 0.897, "step": 27430 }, { "epoch": 0.17530633888299707, "grad_norm": 0.9341952204704285, "learning_rate": 9.811772591452521e-05, "loss": 0.9069, "step": 27440 }, { "epoch": 0.17537022603273578, "grad_norm": 1.8567582368850708, "learning_rate": 9.811636187891521e-05, "loss": 0.8957, "step": 27450 }, { "epoch": 0.17543411318247448, "grad_norm": 0.8161446452140808, "learning_rate": 9.811499735873182e-05, "loss": 0.9018, "step": 27460 }, { "epoch": 0.1754980003322132, "grad_norm": 0.8577879667282104, "learning_rate": 9.811363235398878e-05, "loss": 0.9191, "step": 27470 }, { "epoch": 0.17556188748195187, "grad_norm": 1.067243218421936, "learning_rate": 9.811226686469985e-05, "loss": 0.6471, "step": 27480 }, { "epoch": 0.17562577463169057, "grad_norm": 1.763016939163208, "learning_rate": 9.811090089087875e-05, "loss": 0.7081, "step": 27490 }, { "epoch": 0.17568966178142928, "grad_norm": 1.0972936153411865, "learning_rate": 9.810953443253927e-05, "loss": 0.821, "step": 27500 }, { "epoch": 0.17575354893116799, "grad_norm": 0.691754937171936, "learning_rate": 9.810816748969516e-05, "loss": 0.9142, "step": 27510 }, { "epoch": 0.1758174360809067, "grad_norm": 0.7978219389915466, "learning_rate": 9.810680006236017e-05, "loss": 1.0896, "step": 27520 }, { "epoch": 0.1758813232306454, "grad_norm": 0.5945133566856384, "learning_rate": 9.81054321505481e-05, "loss": 1.1876, "step": 27530 }, { "epoch": 0.17594521038038408, "grad_norm": 0.7158066034317017, "learning_rate": 9.81040637542727e-05, "loss": 0.8112, "step": 27540 }, { "epoch": 0.17600909753012278, "grad_norm": 0.7002230882644653, "learning_rate": 9.810269487354777e-05, "loss": 0.7471, "step": 27550 }, { "epoch": 0.1760729846798615, "grad_norm": 1.0922120809555054, "learning_rate": 9.810132550838709e-05, "loss": 0.6824, "step": 27560 }, { "epoch": 0.1761368718296002, "grad_norm": 0.7432847023010254, "learning_rate": 9.809995565880443e-05, "loss": 0.9265, "step": 27570 }, { "epoch": 0.1762007589793389, "grad_norm": 0.5448877215385437, "learning_rate": 9.809858532481362e-05, "loss": 0.8096, "step": 27580 }, { "epoch": 0.1762646461290776, "grad_norm": 0.7894873023033142, "learning_rate": 9.809721450642844e-05, "loss": 0.9688, "step": 27590 }, { "epoch": 0.17632853327881629, "grad_norm": 1.4557750225067139, "learning_rate": 9.80958432036627e-05, "loss": 0.8877, "step": 27600 }, { "epoch": 0.176392420428555, "grad_norm": 0.8581323623657227, "learning_rate": 9.809447141653022e-05, "loss": 0.8595, "step": 27610 }, { "epoch": 0.1764563075782937, "grad_norm": 1.0392162799835205, "learning_rate": 9.809309914504479e-05, "loss": 0.9148, "step": 27620 }, { "epoch": 0.1765201947280324, "grad_norm": 0.5153777003288269, "learning_rate": 9.809172638922024e-05, "loss": 0.9317, "step": 27630 }, { "epoch": 0.1765840818777711, "grad_norm": 0.6191779971122742, "learning_rate": 9.809035314907043e-05, "loss": 0.7501, "step": 27640 }, { "epoch": 0.17664796902750982, "grad_norm": 1.2180255651474, "learning_rate": 9.808897942460912e-05, "loss": 0.9112, "step": 27650 }, { "epoch": 0.17671185617724852, "grad_norm": 0.8534625768661499, "learning_rate": 9.808760521585021e-05, "loss": 1.1213, "step": 27660 }, { "epoch": 0.1767757433269872, "grad_norm": 0.7606062889099121, "learning_rate": 9.808623052280752e-05, "loss": 0.9272, "step": 27670 }, { "epoch": 0.1768396304767259, "grad_norm": 0.8535296320915222, "learning_rate": 9.808485534549488e-05, "loss": 0.9289, "step": 27680 }, { "epoch": 0.1769035176264646, "grad_norm": 0.9565229415893555, "learning_rate": 9.808347968392613e-05, "loss": 1.1181, "step": 27690 }, { "epoch": 0.17696740477620332, "grad_norm": 0.8111469149589539, "learning_rate": 9.808210353811516e-05, "loss": 0.9397, "step": 27700 }, { "epoch": 0.17703129192594202, "grad_norm": 0.8592471480369568, "learning_rate": 9.808072690807582e-05, "loss": 0.9435, "step": 27710 }, { "epoch": 0.17709517907568073, "grad_norm": 0.4907069206237793, "learning_rate": 9.807934979382194e-05, "loss": 1.0336, "step": 27720 }, { "epoch": 0.1771590662254194, "grad_norm": 1.013027310371399, "learning_rate": 9.807797219536743e-05, "loss": 0.8746, "step": 27730 }, { "epoch": 0.17722295337515812, "grad_norm": 0.910508394241333, "learning_rate": 9.807659411272614e-05, "loss": 0.7623, "step": 27740 }, { "epoch": 0.17728684052489682, "grad_norm": 1.0840027332305908, "learning_rate": 9.807521554591194e-05, "loss": 1.2327, "step": 27750 }, { "epoch": 0.17735072767463553, "grad_norm": 0.9532760977745056, "learning_rate": 9.807383649493875e-05, "loss": 0.8192, "step": 27760 }, { "epoch": 0.17741461482437423, "grad_norm": 1.1489735841751099, "learning_rate": 9.807245695982044e-05, "loss": 0.7777, "step": 27770 }, { "epoch": 0.17747850197411294, "grad_norm": 0.6683622598648071, "learning_rate": 9.807107694057089e-05, "loss": 0.6466, "step": 27780 }, { "epoch": 0.17754238912385162, "grad_norm": 1.4319005012512207, "learning_rate": 9.806969643720401e-05, "loss": 1.1009, "step": 27790 }, { "epoch": 0.17760627627359032, "grad_norm": 1.017777919769287, "learning_rate": 9.80683154497337e-05, "loss": 0.9284, "step": 27800 }, { "epoch": 0.17767016342332903, "grad_norm": 0.8920938968658447, "learning_rate": 9.806693397817386e-05, "loss": 0.8675, "step": 27810 }, { "epoch": 0.17773405057306774, "grad_norm": 1.0226699113845825, "learning_rate": 9.806555202253842e-05, "loss": 1.0085, "step": 27820 }, { "epoch": 0.17779793772280644, "grad_norm": 0.841672956943512, "learning_rate": 9.806416958284127e-05, "loss": 0.9486, "step": 27830 }, { "epoch": 0.17786182487254515, "grad_norm": 0.7303531765937805, "learning_rate": 9.806278665909638e-05, "loss": 0.9338, "step": 27840 }, { "epoch": 0.17792571202228383, "grad_norm": 0.723166823387146, "learning_rate": 9.806140325131763e-05, "loss": 0.9934, "step": 27850 }, { "epoch": 0.17798959917202253, "grad_norm": 1.413759469985962, "learning_rate": 9.806001935951899e-05, "loss": 1.061, "step": 27860 }, { "epoch": 0.17805348632176124, "grad_norm": 0.8165162205696106, "learning_rate": 9.805863498371435e-05, "loss": 0.9142, "step": 27870 }, { "epoch": 0.17811737347149995, "grad_norm": 0.6334624886512756, "learning_rate": 9.805725012391768e-05, "loss": 0.9758, "step": 27880 }, { "epoch": 0.17818126062123865, "grad_norm": 0.7921863794326782, "learning_rate": 9.805586478014294e-05, "loss": 1.4444, "step": 27890 }, { "epoch": 0.17824514777097736, "grad_norm": 0.94256192445755, "learning_rate": 9.805447895240407e-05, "loss": 0.7907, "step": 27900 }, { "epoch": 0.17830903492071604, "grad_norm": 0.948287844657898, "learning_rate": 9.805309264071502e-05, "loss": 0.9496, "step": 27910 }, { "epoch": 0.17837292207045474, "grad_norm": 0.5825172066688538, "learning_rate": 9.805170584508976e-05, "loss": 1.1519, "step": 27920 }, { "epoch": 0.17843680922019345, "grad_norm": 1.1197121143341064, "learning_rate": 9.80504573152731e-05, "loss": 0.866, "step": 27930 }, { "epoch": 0.17850069636993215, "grad_norm": 0.8723785877227783, "learning_rate": 9.804906960020751e-05, "loss": 0.7456, "step": 27940 }, { "epoch": 0.17856458351967086, "grad_norm": 0.6122041940689087, "learning_rate": 9.804768140124621e-05, "loss": 0.7238, "step": 27950 }, { "epoch": 0.17862847066940957, "grad_norm": 0.7413936853408813, "learning_rate": 9.80462927184032e-05, "loss": 0.863, "step": 27960 }, { "epoch": 0.17869235781914825, "grad_norm": 0.7080979943275452, "learning_rate": 9.804490355169246e-05, "loss": 0.8773, "step": 27970 }, { "epoch": 0.17875624496888695, "grad_norm": 0.9712502956390381, "learning_rate": 9.804351390112799e-05, "loss": 0.9399, "step": 27980 }, { "epoch": 0.17882013211862566, "grad_norm": 1.720031499862671, "learning_rate": 9.804212376672375e-05, "loss": 1.4551, "step": 27990 }, { "epoch": 0.17888401926836436, "grad_norm": 3.504847526550293, "learning_rate": 9.804073314849375e-05, "loss": 1.1386, "step": 28000 }, { "epoch": 0.17894790641810307, "grad_norm": 0.8636149168014526, "learning_rate": 9.803934204645202e-05, "loss": 0.8948, "step": 28010 }, { "epoch": 0.17901179356784178, "grad_norm": 1.0400105714797974, "learning_rate": 9.803795046061257e-05, "loss": 0.8915, "step": 28020 }, { "epoch": 0.17907568071758045, "grad_norm": 0.6742110848426819, "learning_rate": 9.803655839098938e-05, "loss": 1.0636, "step": 28030 }, { "epoch": 0.17913956786731916, "grad_norm": 1.9153518676757812, "learning_rate": 9.80351658375965e-05, "loss": 0.8614, "step": 28040 }, { "epoch": 0.17920345501705787, "grad_norm": 0.7775312662124634, "learning_rate": 9.803377280044794e-05, "loss": 0.869, "step": 28050 }, { "epoch": 0.17926734216679657, "grad_norm": 0.558363676071167, "learning_rate": 9.803237927955772e-05, "loss": 0.7641, "step": 28060 }, { "epoch": 0.17933122931653528, "grad_norm": 0.7154206037521362, "learning_rate": 9.80309852749399e-05, "loss": 0.7582, "step": 28070 }, { "epoch": 0.17939511646627399, "grad_norm": 0.7916398048400879, "learning_rate": 9.802959078660851e-05, "loss": 1.0197, "step": 28080 }, { "epoch": 0.17945900361601266, "grad_norm": 1.3828551769256592, "learning_rate": 9.802819581457758e-05, "loss": 0.9683, "step": 28090 }, { "epoch": 0.17952289076575137, "grad_norm": 1.6986253261566162, "learning_rate": 9.802680035886118e-05, "loss": 1.0508, "step": 28100 }, { "epoch": 0.17958677791549008, "grad_norm": 0.576038658618927, "learning_rate": 9.802540441947334e-05, "loss": 0.6362, "step": 28110 }, { "epoch": 0.17965066506522878, "grad_norm": 0.8584470748901367, "learning_rate": 9.802400799642814e-05, "loss": 0.8484, "step": 28120 }, { "epoch": 0.1797145522149675, "grad_norm": 0.6002673506736755, "learning_rate": 9.802261108973962e-05, "loss": 0.6569, "step": 28130 }, { "epoch": 0.1797784393647062, "grad_norm": 1.76115083694458, "learning_rate": 9.802121369942188e-05, "loss": 1.0472, "step": 28140 }, { "epoch": 0.17984232651444487, "grad_norm": 0.6964778304100037, "learning_rate": 9.801981582548896e-05, "loss": 1.0831, "step": 28150 }, { "epoch": 0.17990621366418358, "grad_norm": 0.6689683198928833, "learning_rate": 9.801841746795495e-05, "loss": 1.013, "step": 28160 }, { "epoch": 0.17997010081392228, "grad_norm": 1.5421873331069946, "learning_rate": 9.801701862683393e-05, "loss": 0.7561, "step": 28170 }, { "epoch": 0.180033987963661, "grad_norm": 0.8853926062583923, "learning_rate": 9.801561930214001e-05, "loss": 0.7668, "step": 28180 }, { "epoch": 0.1800978751133997, "grad_norm": 0.7320166826248169, "learning_rate": 9.801421949388723e-05, "loss": 0.8514, "step": 28190 }, { "epoch": 0.1801617622631384, "grad_norm": 1.5770325660705566, "learning_rate": 9.801281920208976e-05, "loss": 1.2304, "step": 28200 }, { "epoch": 0.18022564941287708, "grad_norm": 0.8628795146942139, "learning_rate": 9.801141842676164e-05, "loss": 0.999, "step": 28210 }, { "epoch": 0.1802895365626158, "grad_norm": 1.4478768110275269, "learning_rate": 9.801001716791701e-05, "loss": 0.7788, "step": 28220 }, { "epoch": 0.1803534237123545, "grad_norm": 1.1721216440200806, "learning_rate": 9.800861542556998e-05, "loss": 0.6793, "step": 28230 }, { "epoch": 0.1804173108620932, "grad_norm": 1.0601638555526733, "learning_rate": 9.800721319973465e-05, "loss": 0.914, "step": 28240 }, { "epoch": 0.1804811980118319, "grad_norm": 1.330712914466858, "learning_rate": 9.800581049042515e-05, "loss": 0.9251, "step": 28250 }, { "epoch": 0.1805450851615706, "grad_norm": 1.723365306854248, "learning_rate": 9.80044072976556e-05, "loss": 0.8571, "step": 28260 }, { "epoch": 0.1806089723113093, "grad_norm": 1.0684921741485596, "learning_rate": 9.800300362144015e-05, "loss": 0.8753, "step": 28270 }, { "epoch": 0.180672859461048, "grad_norm": 0.870155394077301, "learning_rate": 9.800159946179292e-05, "loss": 0.8745, "step": 28280 }, { "epoch": 0.1807367466107867, "grad_norm": 0.8147633075714111, "learning_rate": 9.800019481872807e-05, "loss": 0.8873, "step": 28290 }, { "epoch": 0.1808006337605254, "grad_norm": 0.8370197415351868, "learning_rate": 9.799878969225971e-05, "loss": 0.7692, "step": 28300 }, { "epoch": 0.18086452091026411, "grad_norm": 0.695644199848175, "learning_rate": 9.799738408240202e-05, "loss": 1.0125, "step": 28310 }, { "epoch": 0.18092840806000282, "grad_norm": 0.8963587284088135, "learning_rate": 9.799597798916915e-05, "loss": 0.9593, "step": 28320 }, { "epoch": 0.1809922952097415, "grad_norm": 0.9512690305709839, "learning_rate": 9.799457141257527e-05, "loss": 0.9553, "step": 28330 }, { "epoch": 0.1810561823594802, "grad_norm": 0.8540796637535095, "learning_rate": 9.799316435263452e-05, "loss": 0.8412, "step": 28340 }, { "epoch": 0.1811200695092189, "grad_norm": 0.7773367762565613, "learning_rate": 9.799175680936109e-05, "loss": 0.8601, "step": 28350 }, { "epoch": 0.18118395665895762, "grad_norm": 2.9732205867767334, "learning_rate": 9.799034878276916e-05, "loss": 0.8188, "step": 28360 }, { "epoch": 0.18124784380869632, "grad_norm": 1.0311912298202515, "learning_rate": 9.798894027287289e-05, "loss": 0.6879, "step": 28370 }, { "epoch": 0.18131173095843503, "grad_norm": 1.366125464439392, "learning_rate": 9.798753127968647e-05, "loss": 0.7352, "step": 28380 }, { "epoch": 0.1813756181081737, "grad_norm": 0.7077022790908813, "learning_rate": 9.79861218032241e-05, "loss": 0.9083, "step": 28390 }, { "epoch": 0.18143950525791241, "grad_norm": 0.9163293242454529, "learning_rate": 9.798471184349997e-05, "loss": 1.0788, "step": 28400 }, { "epoch": 0.18150339240765112, "grad_norm": 0.7429232001304626, "learning_rate": 9.798330140052829e-05, "loss": 1.2201, "step": 28410 }, { "epoch": 0.18156727955738983, "grad_norm": 0.7430415749549866, "learning_rate": 9.798189047432323e-05, "loss": 0.7114, "step": 28420 }, { "epoch": 0.18163116670712853, "grad_norm": 0.9560526013374329, "learning_rate": 9.798047906489905e-05, "loss": 0.9409, "step": 28430 }, { "epoch": 0.18169505385686724, "grad_norm": 1.2373318672180176, "learning_rate": 9.797906717226992e-05, "loss": 0.9829, "step": 28440 }, { "epoch": 0.18175894100660592, "grad_norm": 1.158624291419983, "learning_rate": 9.797765479645007e-05, "loss": 0.8655, "step": 28450 }, { "epoch": 0.18182282815634462, "grad_norm": 0.6600698232650757, "learning_rate": 9.797624193745374e-05, "loss": 0.9877, "step": 28460 }, { "epoch": 0.18188671530608333, "grad_norm": 0.8037683963775635, "learning_rate": 9.797482859529514e-05, "loss": 0.7506, "step": 28470 }, { "epoch": 0.18195060245582204, "grad_norm": 0.7499133348464966, "learning_rate": 9.797341476998853e-05, "loss": 0.7967, "step": 28480 }, { "epoch": 0.18201448960556074, "grad_norm": 0.6633144617080688, "learning_rate": 9.797200046154811e-05, "loss": 0.8313, "step": 28490 }, { "epoch": 0.18207837675529945, "grad_norm": 1.5353120565414429, "learning_rate": 9.797058566998816e-05, "loss": 0.7916, "step": 28500 }, { "epoch": 0.18214226390503815, "grad_norm": 1.1196563243865967, "learning_rate": 9.79691703953229e-05, "loss": 0.8152, "step": 28510 }, { "epoch": 0.18220615105477683, "grad_norm": 0.7169744968414307, "learning_rate": 9.79677546375666e-05, "loss": 0.9211, "step": 28520 }, { "epoch": 0.18227003820451554, "grad_norm": 0.8993495106697083, "learning_rate": 9.796633839673352e-05, "loss": 1.0358, "step": 28530 }, { "epoch": 0.18233392535425424, "grad_norm": 0.600199282169342, "learning_rate": 9.79649216728379e-05, "loss": 0.9579, "step": 28540 }, { "epoch": 0.18239781250399295, "grad_norm": 0.901833176612854, "learning_rate": 9.796350446589404e-05, "loss": 0.8611, "step": 28550 }, { "epoch": 0.18246169965373166, "grad_norm": 0.5698120594024658, "learning_rate": 9.796208677591619e-05, "loss": 0.6931, "step": 28560 }, { "epoch": 0.18252558680347036, "grad_norm": 0.9085325598716736, "learning_rate": 9.796066860291861e-05, "loss": 0.7067, "step": 28570 }, { "epoch": 0.18258947395320904, "grad_norm": 0.8795328140258789, "learning_rate": 9.795924994691564e-05, "loss": 0.938, "step": 28580 }, { "epoch": 0.18265336110294775, "grad_norm": 0.7105121612548828, "learning_rate": 9.795783080792151e-05, "loss": 1.0905, "step": 28590 }, { "epoch": 0.18271724825268645, "grad_norm": 1.094942569732666, "learning_rate": 9.795641118595053e-05, "loss": 0.9418, "step": 28600 }, { "epoch": 0.18278113540242516, "grad_norm": 1.1036394834518433, "learning_rate": 9.795499108101702e-05, "loss": 0.7659, "step": 28610 }, { "epoch": 0.18284502255216387, "grad_norm": 0.9667114019393921, "learning_rate": 9.795357049313526e-05, "loss": 0.7326, "step": 28620 }, { "epoch": 0.18290890970190257, "grad_norm": 0.8882653713226318, "learning_rate": 9.795214942231956e-05, "loss": 0.9086, "step": 28630 }, { "epoch": 0.18297279685164125, "grad_norm": 0.941718339920044, "learning_rate": 9.795072786858421e-05, "loss": 0.8087, "step": 28640 }, { "epoch": 0.18303668400137996, "grad_norm": 0.749993622303009, "learning_rate": 9.794930583194357e-05, "loss": 0.8691, "step": 28650 }, { "epoch": 0.18310057115111866, "grad_norm": 0.9505361318588257, "learning_rate": 9.794788331241193e-05, "loss": 0.9151, "step": 28660 }, { "epoch": 0.18316445830085737, "grad_norm": 0.7003071308135986, "learning_rate": 9.794646031000363e-05, "loss": 0.9178, "step": 28670 }, { "epoch": 0.18322834545059608, "grad_norm": 0.7516195178031921, "learning_rate": 9.7945036824733e-05, "loss": 0.9932, "step": 28680 }, { "epoch": 0.18329223260033478, "grad_norm": 0.737834095954895, "learning_rate": 9.794361285661435e-05, "loss": 1.0445, "step": 28690 }, { "epoch": 0.18335611975007346, "grad_norm": 1.5142183303833008, "learning_rate": 9.794218840566205e-05, "loss": 0.9432, "step": 28700 }, { "epoch": 0.18342000689981217, "grad_norm": 0.9545480012893677, "learning_rate": 9.794076347189045e-05, "loss": 1.0564, "step": 28710 }, { "epoch": 0.18348389404955087, "grad_norm": 1.4519827365875244, "learning_rate": 9.793933805531387e-05, "loss": 1.0927, "step": 28720 }, { "epoch": 0.18354778119928958, "grad_norm": 1.179065465927124, "learning_rate": 9.793791215594669e-05, "loss": 0.8412, "step": 28730 }, { "epoch": 0.18361166834902828, "grad_norm": 0.5378461480140686, "learning_rate": 9.793648577380325e-05, "loss": 0.9532, "step": 28740 }, { "epoch": 0.183675555498767, "grad_norm": 0.9860353469848633, "learning_rate": 9.793505890889795e-05, "loss": 0.7965, "step": 28750 }, { "epoch": 0.18373944264850567, "grad_norm": 0.7210092544555664, "learning_rate": 9.793363156124513e-05, "loss": 0.9562, "step": 28760 }, { "epoch": 0.18380332979824437, "grad_norm": 0.9851694703102112, "learning_rate": 9.793220373085917e-05, "loss": 0.9258, "step": 28770 }, { "epoch": 0.18386721694798308, "grad_norm": 1.2864528894424438, "learning_rate": 9.793077541775444e-05, "loss": 0.8495, "step": 28780 }, { "epoch": 0.1839311040977218, "grad_norm": 0.5326701402664185, "learning_rate": 9.792934662194534e-05, "loss": 0.7886, "step": 28790 }, { "epoch": 0.1839949912474605, "grad_norm": 0.9040879011154175, "learning_rate": 9.792791734344627e-05, "loss": 0.7028, "step": 28800 }, { "epoch": 0.1840588783971992, "grad_norm": 0.7170331478118896, "learning_rate": 9.792648758227159e-05, "loss": 0.957, "step": 28810 }, { "epoch": 0.18412276554693788, "grad_norm": 1.0186604261398315, "learning_rate": 9.792505733843573e-05, "loss": 0.8086, "step": 28820 }, { "epoch": 0.18418665269667658, "grad_norm": 1.5476514101028442, "learning_rate": 9.792362661195307e-05, "loss": 0.9259, "step": 28830 }, { "epoch": 0.1842505398464153, "grad_norm": 0.7610865235328674, "learning_rate": 9.792219540283804e-05, "loss": 0.867, "step": 28840 }, { "epoch": 0.184314426996154, "grad_norm": 0.6964796185493469, "learning_rate": 9.792076371110503e-05, "loss": 0.8641, "step": 28850 }, { "epoch": 0.1843783141458927, "grad_norm": 1.106491208076477, "learning_rate": 9.791933153676849e-05, "loss": 0.9952, "step": 28860 }, { "epoch": 0.1844422012956314, "grad_norm": 1.025023102760315, "learning_rate": 9.791789887984282e-05, "loss": 0.9773, "step": 28870 }, { "epoch": 0.1845060884453701, "grad_norm": 0.7797799706459045, "learning_rate": 9.791646574034245e-05, "loss": 0.7025, "step": 28880 }, { "epoch": 0.1845699755951088, "grad_norm": 0.6405588984489441, "learning_rate": 9.791503211828182e-05, "loss": 0.7509, "step": 28890 }, { "epoch": 0.1846338627448475, "grad_norm": 1.01836097240448, "learning_rate": 9.791359801367536e-05, "loss": 0.7725, "step": 28900 }, { "epoch": 0.1846977498945862, "grad_norm": 1.2316473722457886, "learning_rate": 9.791216342653751e-05, "loss": 0.8199, "step": 28910 }, { "epoch": 0.1847616370443249, "grad_norm": 1.0237054824829102, "learning_rate": 9.791072835688274e-05, "loss": 0.7915, "step": 28920 }, { "epoch": 0.18482552419406362, "grad_norm": 0.6611847877502441, "learning_rate": 9.790929280472547e-05, "loss": 0.8064, "step": 28930 }, { "epoch": 0.1848894113438023, "grad_norm": 0.6756503582000732, "learning_rate": 9.790785677008018e-05, "loss": 0.7544, "step": 28940 }, { "epoch": 0.184953298493541, "grad_norm": 1.1825060844421387, "learning_rate": 9.790642025296134e-05, "loss": 1.0022, "step": 28950 }, { "epoch": 0.1850171856432797, "grad_norm": 1.195821762084961, "learning_rate": 9.790498325338339e-05, "loss": 1.0366, "step": 28960 }, { "epoch": 0.18508107279301841, "grad_norm": 3.405341863632202, "learning_rate": 9.790354577136083e-05, "loss": 0.978, "step": 28970 }, { "epoch": 0.18514495994275712, "grad_norm": 1.0382331609725952, "learning_rate": 9.790210780690811e-05, "loss": 0.9581, "step": 28980 }, { "epoch": 0.18520884709249583, "grad_norm": 0.6907293200492859, "learning_rate": 9.790066936003972e-05, "loss": 0.9692, "step": 28990 }, { "epoch": 0.1852727342422345, "grad_norm": 0.8222552537918091, "learning_rate": 9.789923043077015e-05, "loss": 1.1995, "step": 29000 }, { "epoch": 0.1853366213919732, "grad_norm": 0.5325214862823486, "learning_rate": 9.78977910191139e-05, "loss": 0.9084, "step": 29010 }, { "epoch": 0.18540050854171192, "grad_norm": 0.6033929586410522, "learning_rate": 9.789635112508544e-05, "loss": 0.9668, "step": 29020 }, { "epoch": 0.18546439569145062, "grad_norm": 0.604171872138977, "learning_rate": 9.78949107486993e-05, "loss": 0.8394, "step": 29030 }, { "epoch": 0.18552828284118933, "grad_norm": 0.6410810947418213, "learning_rate": 9.789346988996997e-05, "loss": 1.072, "step": 29040 }, { "epoch": 0.18559216999092804, "grad_norm": 0.8470253348350525, "learning_rate": 9.789202854891198e-05, "loss": 0.9716, "step": 29050 }, { "epoch": 0.1856560571406667, "grad_norm": 0.9727482199668884, "learning_rate": 9.789058672553982e-05, "loss": 0.9176, "step": 29060 }, { "epoch": 0.18571994429040542, "grad_norm": 0.9362789988517761, "learning_rate": 9.7889144419868e-05, "loss": 0.8712, "step": 29070 }, { "epoch": 0.18578383144014413, "grad_norm": 0.6700981259346008, "learning_rate": 9.788770163191108e-05, "loss": 0.6975, "step": 29080 }, { "epoch": 0.18584771858988283, "grad_norm": 0.863276481628418, "learning_rate": 9.788625836168359e-05, "loss": 1.2225, "step": 29090 }, { "epoch": 0.18591160573962154, "grad_norm": 0.9833418130874634, "learning_rate": 9.788481460920003e-05, "loss": 0.92, "step": 29100 }, { "epoch": 0.18597549288936024, "grad_norm": 1.1162675619125366, "learning_rate": 9.788337037447497e-05, "loss": 0.765, "step": 29110 }, { "epoch": 0.18603938003909892, "grad_norm": 0.8579927086830139, "learning_rate": 9.788192565752294e-05, "loss": 0.8593, "step": 29120 }, { "epoch": 0.18610326718883763, "grad_norm": 0.6623185276985168, "learning_rate": 9.788048045835851e-05, "loss": 0.9438, "step": 29130 }, { "epoch": 0.18616715433857633, "grad_norm": 1.0203254222869873, "learning_rate": 9.78790347769962e-05, "loss": 0.9786, "step": 29140 }, { "epoch": 0.18623104148831504, "grad_norm": 1.5538065433502197, "learning_rate": 9.78775886134506e-05, "loss": 0.7127, "step": 29150 }, { "epoch": 0.18629492863805375, "grad_norm": 0.5423676371574402, "learning_rate": 9.787614196773627e-05, "loss": 0.9467, "step": 29160 }, { "epoch": 0.18635881578779245, "grad_norm": 0.8626308441162109, "learning_rate": 9.787469483986775e-05, "loss": 1.054, "step": 29170 }, { "epoch": 0.18642270293753113, "grad_norm": 2.267576217651367, "learning_rate": 9.787324722985966e-05, "loss": 0.8235, "step": 29180 }, { "epoch": 0.18648659008726984, "grad_norm": 1.2194722890853882, "learning_rate": 9.787179913772653e-05, "loss": 1.046, "step": 29190 }, { "epoch": 0.18655047723700854, "grad_norm": 1.2716878652572632, "learning_rate": 9.787035056348298e-05, "loss": 1.0831, "step": 29200 }, { "epoch": 0.18661436438674725, "grad_norm": 0.5902767181396484, "learning_rate": 9.786890150714359e-05, "loss": 0.9125, "step": 29210 }, { "epoch": 0.18667825153648596, "grad_norm": 0.6737661361694336, "learning_rate": 9.786745196872295e-05, "loss": 0.8752, "step": 29220 }, { "epoch": 0.18674213868622466, "grad_norm": 0.7880046367645264, "learning_rate": 9.786600194823565e-05, "loss": 0.7642, "step": 29230 }, { "epoch": 0.18680602583596334, "grad_norm": 1.327628254890442, "learning_rate": 9.78645514456963e-05, "loss": 0.668, "step": 29240 }, { "epoch": 0.18686991298570205, "grad_norm": 1.034236192703247, "learning_rate": 9.786310046111951e-05, "loss": 0.9501, "step": 29250 }, { "epoch": 0.18693380013544075, "grad_norm": 0.7702693939208984, "learning_rate": 9.78616489945199e-05, "loss": 0.9508, "step": 29260 }, { "epoch": 0.18699768728517946, "grad_norm": 0.8737154603004456, "learning_rate": 9.786019704591206e-05, "loss": 0.8081, "step": 29270 }, { "epoch": 0.18706157443491817, "grad_norm": 0.7933652400970459, "learning_rate": 9.785874461531064e-05, "loss": 0.8241, "step": 29280 }, { "epoch": 0.18712546158465687, "grad_norm": 1.6798765659332275, "learning_rate": 9.785729170273026e-05, "loss": 0.8096, "step": 29290 }, { "epoch": 0.18718934873439558, "grad_norm": 0.7516373991966248, "learning_rate": 9.785583830818554e-05, "loss": 0.8489, "step": 29300 }, { "epoch": 0.18725323588413426, "grad_norm": 0.5362650752067566, "learning_rate": 9.785438443169115e-05, "loss": 0.8583, "step": 29310 }, { "epoch": 0.18731712303387296, "grad_norm": 0.5288386940956116, "learning_rate": 9.785293007326169e-05, "loss": 0.8078, "step": 29320 }, { "epoch": 0.18738101018361167, "grad_norm": 0.7445020079612732, "learning_rate": 9.785147523291183e-05, "loss": 0.9432, "step": 29330 }, { "epoch": 0.18744489733335037, "grad_norm": 0.8663593530654907, "learning_rate": 9.78500199106562e-05, "loss": 0.8345, "step": 29340 }, { "epoch": 0.18750878448308908, "grad_norm": 1.6068364381790161, "learning_rate": 9.784856410650951e-05, "loss": 1.0205, "step": 29350 }, { "epoch": 0.1875726716328278, "grad_norm": 0.7024542689323425, "learning_rate": 9.784710782048636e-05, "loss": 0.891, "step": 29360 }, { "epoch": 0.18763655878256646, "grad_norm": 0.6852838397026062, "learning_rate": 9.784565105260145e-05, "loss": 0.7938, "step": 29370 }, { "epoch": 0.18770044593230517, "grad_norm": 0.5752915740013123, "learning_rate": 9.784419380286944e-05, "loss": 0.9839, "step": 29380 }, { "epoch": 0.18776433308204388, "grad_norm": 1.396058201789856, "learning_rate": 9.784273607130501e-05, "loss": 0.8067, "step": 29390 }, { "epoch": 0.18782822023178258, "grad_norm": 0.9546979665756226, "learning_rate": 9.784127785792283e-05, "loss": 0.8, "step": 29400 }, { "epoch": 0.1878921073815213, "grad_norm": 1.17519211769104, "learning_rate": 9.783981916273758e-05, "loss": 1.1313, "step": 29410 }, { "epoch": 0.18795599453126, "grad_norm": 2.2271242141723633, "learning_rate": 9.783835998576398e-05, "loss": 0.8251, "step": 29420 }, { "epoch": 0.18801988168099867, "grad_norm": 1.1907005310058594, "learning_rate": 9.78369003270167e-05, "loss": 0.7798, "step": 29430 }, { "epoch": 0.18808376883073738, "grad_norm": 1.2218221426010132, "learning_rate": 9.783544018651048e-05, "loss": 0.9479, "step": 29440 }, { "epoch": 0.1881476559804761, "grad_norm": 0.7123143076896667, "learning_rate": 9.783397956425997e-05, "loss": 0.8228, "step": 29450 }, { "epoch": 0.1882115431302148, "grad_norm": 1.4676718711853027, "learning_rate": 9.78325184602799e-05, "loss": 0.9251, "step": 29460 }, { "epoch": 0.1882754302799535, "grad_norm": 0.7313151359558105, "learning_rate": 9.783105687458499e-05, "loss": 0.9859, "step": 29470 }, { "epoch": 0.1883393174296922, "grad_norm": 0.7722935080528259, "learning_rate": 9.782959480718997e-05, "loss": 0.7907, "step": 29480 }, { "epoch": 0.18840320457943088, "grad_norm": 1.3157824277877808, "learning_rate": 9.782813225810953e-05, "loss": 1.0648, "step": 29490 }, { "epoch": 0.1884670917291696, "grad_norm": 1.3221862316131592, "learning_rate": 9.782666922735843e-05, "loss": 0.7726, "step": 29500 }, { "epoch": 0.1885309788789083, "grad_norm": 0.5356481671333313, "learning_rate": 9.78252057149514e-05, "loss": 0.8444, "step": 29510 }, { "epoch": 0.188594866028647, "grad_norm": 1.3000450134277344, "learning_rate": 9.782374172090318e-05, "loss": 0.7855, "step": 29520 }, { "epoch": 0.1886587531783857, "grad_norm": 0.7123465538024902, "learning_rate": 9.78222772452285e-05, "loss": 0.8986, "step": 29530 }, { "epoch": 0.1887226403281244, "grad_norm": 0.8324477076530457, "learning_rate": 9.78208122879421e-05, "loss": 0.795, "step": 29540 }, { "epoch": 0.1887865274778631, "grad_norm": 1.0922014713287354, "learning_rate": 9.781934684905879e-05, "loss": 0.8251, "step": 29550 }, { "epoch": 0.1888504146276018, "grad_norm": 0.6796879768371582, "learning_rate": 9.781788092859326e-05, "loss": 0.8954, "step": 29560 }, { "epoch": 0.1889143017773405, "grad_norm": 0.6543946862220764, "learning_rate": 9.78164145265603e-05, "loss": 0.9359, "step": 29570 }, { "epoch": 0.1889781889270792, "grad_norm": 0.7796209454536438, "learning_rate": 9.781494764297468e-05, "loss": 0.7721, "step": 29580 }, { "epoch": 0.18904207607681792, "grad_norm": 1.0429221391677856, "learning_rate": 9.781348027785116e-05, "loss": 1.3679, "step": 29590 }, { "epoch": 0.18910596322655662, "grad_norm": 1.09304940700531, "learning_rate": 9.781201243120455e-05, "loss": 1.1277, "step": 29600 }, { "epoch": 0.1891698503762953, "grad_norm": 0.9372734427452087, "learning_rate": 9.781054410304959e-05, "loss": 0.7567, "step": 29610 }, { "epoch": 0.189233737526034, "grad_norm": 0.9679316282272339, "learning_rate": 9.780907529340111e-05, "loss": 0.8106, "step": 29620 }, { "epoch": 0.1892976246757727, "grad_norm": 1.304903268814087, "learning_rate": 9.780760600227388e-05, "loss": 0.9488, "step": 29630 }, { "epoch": 0.18936151182551142, "grad_norm": 1.0478878021240234, "learning_rate": 9.780613622968269e-05, "loss": 0.8575, "step": 29640 }, { "epoch": 0.18942539897525013, "grad_norm": 1.2268606424331665, "learning_rate": 9.780466597564235e-05, "loss": 1.0457, "step": 29650 }, { "epoch": 0.18948928612498883, "grad_norm": 0.8506630659103394, "learning_rate": 9.780319524016767e-05, "loss": 0.7606, "step": 29660 }, { "epoch": 0.1895531732747275, "grad_norm": 1.1287379264831543, "learning_rate": 9.780172402327346e-05, "loss": 1.0102, "step": 29670 }, { "epoch": 0.18961706042446622, "grad_norm": 0.9983859062194824, "learning_rate": 9.780025232497452e-05, "loss": 0.7572, "step": 29680 }, { "epoch": 0.18968094757420492, "grad_norm": 0.6691607236862183, "learning_rate": 9.77987801452857e-05, "loss": 1.0665, "step": 29690 }, { "epoch": 0.18974483472394363, "grad_norm": 1.1289949417114258, "learning_rate": 9.779730748422181e-05, "loss": 0.9657, "step": 29700 }, { "epoch": 0.18980872187368233, "grad_norm": 0.8309307098388672, "learning_rate": 9.779583434179769e-05, "loss": 1.1482, "step": 29710 }, { "epoch": 0.18987260902342104, "grad_norm": 0.9599489569664001, "learning_rate": 9.779436071802815e-05, "loss": 0.8744, "step": 29720 }, { "epoch": 0.18993649617315972, "grad_norm": 1.1437122821807861, "learning_rate": 9.779288661292807e-05, "loss": 0.9947, "step": 29730 }, { "epoch": 0.19000038332289843, "grad_norm": 0.6847367882728577, "learning_rate": 9.779141202651225e-05, "loss": 0.6783, "step": 29740 }, { "epoch": 0.19006427047263713, "grad_norm": 0.7857696413993835, "learning_rate": 9.778993695879559e-05, "loss": 0.9785, "step": 29750 }, { "epoch": 0.19012815762237584, "grad_norm": 0.6318495273590088, "learning_rate": 9.778846140979292e-05, "loss": 0.8373, "step": 29760 }, { "epoch": 0.19019204477211454, "grad_norm": 1.0563832521438599, "learning_rate": 9.778698537951908e-05, "loss": 0.7032, "step": 29770 }, { "epoch": 0.19025593192185325, "grad_norm": 0.8542010188102722, "learning_rate": 9.778550886798898e-05, "loss": 0.9274, "step": 29780 }, { "epoch": 0.19031981907159193, "grad_norm": 0.6458016633987427, "learning_rate": 9.778403187521746e-05, "loss": 0.8418, "step": 29790 }, { "epoch": 0.19038370622133063, "grad_norm": 1.4049769639968872, "learning_rate": 9.778255440121937e-05, "loss": 0.9105, "step": 29800 }, { "epoch": 0.19044759337106934, "grad_norm": 1.6816697120666504, "learning_rate": 9.778107644600964e-05, "loss": 0.9616, "step": 29810 }, { "epoch": 0.19051148052080805, "grad_norm": 0.8408365249633789, "learning_rate": 9.777959800960314e-05, "loss": 0.9771, "step": 29820 }, { "epoch": 0.19057536767054675, "grad_norm": 0.9713007211685181, "learning_rate": 9.777811909201476e-05, "loss": 0.8812, "step": 29830 }, { "epoch": 0.19063925482028546, "grad_norm": 0.5639253258705139, "learning_rate": 9.777663969325938e-05, "loss": 0.9724, "step": 29840 }, { "epoch": 0.19070314197002414, "grad_norm": 1.0495178699493408, "learning_rate": 9.77751598133519e-05, "loss": 1.005, "step": 29850 }, { "epoch": 0.19076702911976284, "grad_norm": 1.3950402736663818, "learning_rate": 9.777367945230722e-05, "loss": 0.6716, "step": 29860 }, { "epoch": 0.19083091626950155, "grad_norm": 1.0976344347000122, "learning_rate": 9.777219861014028e-05, "loss": 0.7201, "step": 29870 }, { "epoch": 0.19089480341924026, "grad_norm": 0.6188146471977234, "learning_rate": 9.777071728686595e-05, "loss": 0.8153, "step": 29880 }, { "epoch": 0.19095869056897896, "grad_norm": 0.748587965965271, "learning_rate": 9.776923548249919e-05, "loss": 1.0403, "step": 29890 }, { "epoch": 0.19102257771871767, "grad_norm": 0.6070273518562317, "learning_rate": 9.776775319705488e-05, "loss": 0.7215, "step": 29900 }, { "epoch": 0.19108646486845635, "grad_norm": 1.0740474462509155, "learning_rate": 9.776627043054799e-05, "loss": 0.9513, "step": 29910 }, { "epoch": 0.19115035201819505, "grad_norm": 0.5291925072669983, "learning_rate": 9.776478718299343e-05, "loss": 0.6963, "step": 29920 }, { "epoch": 0.19121423916793376, "grad_norm": 1.0318714380264282, "learning_rate": 9.776330345440613e-05, "loss": 0.7995, "step": 29930 }, { "epoch": 0.19127812631767246, "grad_norm": 0.8970870971679688, "learning_rate": 9.776181924480105e-05, "loss": 0.9622, "step": 29940 }, { "epoch": 0.19134201346741117, "grad_norm": 1.631463885307312, "learning_rate": 9.776033455419313e-05, "loss": 0.7972, "step": 29950 }, { "epoch": 0.19140590061714988, "grad_norm": 1.0540581941604614, "learning_rate": 9.775884938259732e-05, "loss": 1.0735, "step": 29960 }, { "epoch": 0.19146978776688856, "grad_norm": 2.7128796577453613, "learning_rate": 9.775736373002858e-05, "loss": 0.7245, "step": 29970 }, { "epoch": 0.19153367491662726, "grad_norm": 3.253152847290039, "learning_rate": 9.775587759650186e-05, "loss": 1.0705, "step": 29980 }, { "epoch": 0.19159756206636597, "grad_norm": 0.7138085961341858, "learning_rate": 9.775439098203216e-05, "loss": 1.0778, "step": 29990 }, { "epoch": 0.19166144921610467, "grad_norm": 1.333784580230713, "learning_rate": 9.775290388663443e-05, "loss": 0.7873, "step": 30000 }, { "epoch": 0.19172533636584338, "grad_norm": 1.4584836959838867, "learning_rate": 9.775141631032362e-05, "loss": 0.827, "step": 30010 }, { "epoch": 0.19178922351558209, "grad_norm": 0.7264024019241333, "learning_rate": 9.774992825311476e-05, "loss": 0.7283, "step": 30020 }, { "epoch": 0.19185311066532076, "grad_norm": 0.9471032619476318, "learning_rate": 9.774843971502282e-05, "loss": 0.8963, "step": 30030 }, { "epoch": 0.19191699781505947, "grad_norm": 0.9348069429397583, "learning_rate": 9.774695069606275e-05, "loss": 0.9005, "step": 30040 }, { "epoch": 0.19198088496479818, "grad_norm": 0.7033948302268982, "learning_rate": 9.774546119624961e-05, "loss": 0.7593, "step": 30050 }, { "epoch": 0.19204477211453688, "grad_norm": 0.7773811221122742, "learning_rate": 9.774397121559836e-05, "loss": 0.9053, "step": 30060 }, { "epoch": 0.1921086592642756, "grad_norm": 1.9900609254837036, "learning_rate": 9.7742480754124e-05, "loss": 0.9314, "step": 30070 }, { "epoch": 0.1921725464140143, "grad_norm": 0.6554052233695984, "learning_rate": 9.774098981184158e-05, "loss": 0.6362, "step": 30080 }, { "epoch": 0.19223643356375297, "grad_norm": 1.0862607955932617, "learning_rate": 9.773949838876608e-05, "loss": 0.7648, "step": 30090 }, { "epoch": 0.19230032071349168, "grad_norm": 0.7586400508880615, "learning_rate": 9.773800648491252e-05, "loss": 0.769, "step": 30100 }, { "epoch": 0.19236420786323039, "grad_norm": 0.8479837775230408, "learning_rate": 9.773651410029594e-05, "loss": 0.802, "step": 30110 }, { "epoch": 0.1924280950129691, "grad_norm": 0.5918093323707581, "learning_rate": 9.773502123493139e-05, "loss": 0.7993, "step": 30120 }, { "epoch": 0.1924919821627078, "grad_norm": 1.4376020431518555, "learning_rate": 9.773352788883385e-05, "loss": 0.9593, "step": 30130 }, { "epoch": 0.1925558693124465, "grad_norm": 0.9727760553359985, "learning_rate": 9.77320340620184e-05, "loss": 0.7962, "step": 30140 }, { "epoch": 0.1926197564621852, "grad_norm": 0.9183517098426819, "learning_rate": 9.773053975450009e-05, "loss": 0.755, "step": 30150 }, { "epoch": 0.1926836436119239, "grad_norm": 1.3329063653945923, "learning_rate": 9.772904496629391e-05, "loss": 0.747, "step": 30160 }, { "epoch": 0.1927475307616626, "grad_norm": 0.7893358469009399, "learning_rate": 9.7727549697415e-05, "loss": 0.9337, "step": 30170 }, { "epoch": 0.1928114179114013, "grad_norm": 1.3940712213516235, "learning_rate": 9.772605394787834e-05, "loss": 0.924, "step": 30180 }, { "epoch": 0.19287530506114, "grad_norm": 1.1371750831604004, "learning_rate": 9.772455771769905e-05, "loss": 0.7126, "step": 30190 }, { "epoch": 0.1929391922108787, "grad_norm": 0.8628626465797424, "learning_rate": 9.772306100689216e-05, "loss": 0.965, "step": 30200 }, { "epoch": 0.19300307936061742, "grad_norm": 0.5869954228401184, "learning_rate": 9.772156381547277e-05, "loss": 0.7079, "step": 30210 }, { "epoch": 0.1930669665103561, "grad_norm": 0.6862210035324097, "learning_rate": 9.772006614345594e-05, "loss": 0.8432, "step": 30220 }, { "epoch": 0.1931308536600948, "grad_norm": 0.9875562191009521, "learning_rate": 9.771856799085678e-05, "loss": 1.3028, "step": 30230 }, { "epoch": 0.1931947408098335, "grad_norm": 1.2262318134307861, "learning_rate": 9.771706935769034e-05, "loss": 0.9413, "step": 30240 }, { "epoch": 0.19325862795957222, "grad_norm": 1.6821092367172241, "learning_rate": 9.771557024397173e-05, "loss": 0.8401, "step": 30250 }, { "epoch": 0.19332251510931092, "grad_norm": 0.5990639925003052, "learning_rate": 9.771407064971605e-05, "loss": 0.918, "step": 30260 }, { "epoch": 0.19338640225904963, "grad_norm": 0.7293832898139954, "learning_rate": 9.771257057493841e-05, "loss": 0.8454, "step": 30270 }, { "epoch": 0.1934502894087883, "grad_norm": 0.7124828100204468, "learning_rate": 9.77110700196539e-05, "loss": 0.7466, "step": 30280 }, { "epoch": 0.193514176558527, "grad_norm": 0.7515029311180115, "learning_rate": 9.770956898387764e-05, "loss": 0.8641, "step": 30290 }, { "epoch": 0.19357806370826572, "grad_norm": 0.7060081958770752, "learning_rate": 9.770806746762473e-05, "loss": 0.8651, "step": 30300 }, { "epoch": 0.19364195085800442, "grad_norm": 0.7407328486442566, "learning_rate": 9.770656547091033e-05, "loss": 1.1405, "step": 30310 }, { "epoch": 0.19370583800774313, "grad_norm": 1.009606122970581, "learning_rate": 9.770506299374953e-05, "loss": 0.9224, "step": 30320 }, { "epoch": 0.19376972515748184, "grad_norm": 1.1087229251861572, "learning_rate": 9.770356003615749e-05, "loss": 0.9545, "step": 30330 }, { "epoch": 0.19383361230722052, "grad_norm": 0.6406879425048828, "learning_rate": 9.770205659814931e-05, "loss": 0.9398, "step": 30340 }, { "epoch": 0.19389749945695922, "grad_norm": 0.5704166889190674, "learning_rate": 9.770055267974017e-05, "loss": 0.6516, "step": 30350 }, { "epoch": 0.19396138660669793, "grad_norm": 0.5956087112426758, "learning_rate": 9.769904828094519e-05, "loss": 1.0608, "step": 30360 }, { "epoch": 0.19402527375643663, "grad_norm": 1.1136138439178467, "learning_rate": 9.769754340177953e-05, "loss": 0.7172, "step": 30370 }, { "epoch": 0.19408916090617534, "grad_norm": 0.4953550696372986, "learning_rate": 9.769603804225833e-05, "loss": 0.9855, "step": 30380 }, { "epoch": 0.19415304805591405, "grad_norm": 1.3780313730239868, "learning_rate": 9.769453220239677e-05, "loss": 0.8654, "step": 30390 }, { "epoch": 0.19421693520565272, "grad_norm": 1.0662996768951416, "learning_rate": 9.769302588221002e-05, "loss": 1.1878, "step": 30400 }, { "epoch": 0.19428082235539143, "grad_norm": 0.896293044090271, "learning_rate": 9.769151908171324e-05, "loss": 0.9222, "step": 30410 }, { "epoch": 0.19434470950513014, "grad_norm": 1.046999454498291, "learning_rate": 9.769001180092159e-05, "loss": 0.9972, "step": 30420 }, { "epoch": 0.19440859665486884, "grad_norm": 0.9748583436012268, "learning_rate": 9.768850403985028e-05, "loss": 0.7333, "step": 30430 }, { "epoch": 0.19447248380460755, "grad_norm": 1.3169922828674316, "learning_rate": 9.768699579851446e-05, "loss": 0.7077, "step": 30440 }, { "epoch": 0.19453637095434625, "grad_norm": 0.8591229319572449, "learning_rate": 9.768548707692935e-05, "loss": 0.7176, "step": 30450 }, { "epoch": 0.19460025810408493, "grad_norm": 1.1447664499282837, "learning_rate": 9.768397787511012e-05, "loss": 0.7956, "step": 30460 }, { "epoch": 0.19466414525382364, "grad_norm": 0.8214355111122131, "learning_rate": 9.768246819307199e-05, "loss": 0.9318, "step": 30470 }, { "epoch": 0.19472803240356235, "grad_norm": 0.6454271078109741, "learning_rate": 9.768095803083015e-05, "loss": 0.9187, "step": 30480 }, { "epoch": 0.19479191955330105, "grad_norm": 0.8612026572227478, "learning_rate": 9.767944738839983e-05, "loss": 0.8895, "step": 30490 }, { "epoch": 0.19485580670303976, "grad_norm": 0.7116665244102478, "learning_rate": 9.76779362657962e-05, "loss": 0.9473, "step": 30500 }, { "epoch": 0.19491969385277846, "grad_norm": 0.4623630940914154, "learning_rate": 9.767642466303452e-05, "loss": 1.0248, "step": 30510 }, { "epoch": 0.19498358100251714, "grad_norm": 0.7994482517242432, "learning_rate": 9.767491258013e-05, "loss": 1.0697, "step": 30520 }, { "epoch": 0.19504746815225585, "grad_norm": 0.9058681130409241, "learning_rate": 9.767340001709785e-05, "loss": 1.0001, "step": 30530 }, { "epoch": 0.19511135530199455, "grad_norm": 0.8972348570823669, "learning_rate": 9.767188697395333e-05, "loss": 0.9495, "step": 30540 }, { "epoch": 0.19517524245173326, "grad_norm": 0.664193868637085, "learning_rate": 9.767037345071166e-05, "loss": 0.9913, "step": 30550 }, { "epoch": 0.19523912960147197, "grad_norm": 1.2621389627456665, "learning_rate": 9.766885944738808e-05, "loss": 0.7485, "step": 30560 }, { "epoch": 0.19530301675121067, "grad_norm": 0.7015652656555176, "learning_rate": 9.766734496399786e-05, "loss": 0.7023, "step": 30570 }, { "epoch": 0.19536690390094935, "grad_norm": 1.066769003868103, "learning_rate": 9.766583000055625e-05, "loss": 1.0337, "step": 30580 }, { "epoch": 0.19543079105068806, "grad_norm": 1.1455520391464233, "learning_rate": 9.766431455707847e-05, "loss": 1.0366, "step": 30590 }, { "epoch": 0.19549467820042676, "grad_norm": 1.4529062509536743, "learning_rate": 9.766279863357982e-05, "loss": 0.9134, "step": 30600 }, { "epoch": 0.19555856535016547, "grad_norm": 0.7042234539985657, "learning_rate": 9.766128223007556e-05, "loss": 1.032, "step": 30610 }, { "epoch": 0.19562245249990418, "grad_norm": 0.7277450561523438, "learning_rate": 9.765991705652953e-05, "loss": 0.9731, "step": 30620 }, { "epoch": 0.19568633964964288, "grad_norm": 0.7915880084037781, "learning_rate": 9.765839974105665e-05, "loss": 1.0449, "step": 30630 }, { "epoch": 0.19575022679938156, "grad_norm": 1.1217659711837769, "learning_rate": 9.765688194562249e-05, "loss": 0.816, "step": 30640 }, { "epoch": 0.19581411394912027, "grad_norm": 0.7037495374679565, "learning_rate": 9.765536367024229e-05, "loss": 0.9901, "step": 30650 }, { "epoch": 0.19587800109885897, "grad_norm": 0.8996081352233887, "learning_rate": 9.765384491493132e-05, "loss": 0.9512, "step": 30660 }, { "epoch": 0.19594188824859768, "grad_norm": 0.543251633644104, "learning_rate": 9.765232567970493e-05, "loss": 0.8288, "step": 30670 }, { "epoch": 0.19600577539833638, "grad_norm": 0.7527588605880737, "learning_rate": 9.76508059645784e-05, "loss": 0.9027, "step": 30680 }, { "epoch": 0.1960696625480751, "grad_norm": 0.8170384764671326, "learning_rate": 9.764928576956703e-05, "loss": 0.8716, "step": 30690 }, { "epoch": 0.19613354969781377, "grad_norm": 0.8016200661659241, "learning_rate": 9.764776509468611e-05, "loss": 0.9099, "step": 30700 }, { "epoch": 0.19619743684755248, "grad_norm": 1.191615343093872, "learning_rate": 9.764624393995098e-05, "loss": 0.9785, "step": 30710 }, { "epoch": 0.19626132399729118, "grad_norm": 1.0004390478134155, "learning_rate": 9.764472230537697e-05, "loss": 1.06, "step": 30720 }, { "epoch": 0.1963252111470299, "grad_norm": 0.5032203197479248, "learning_rate": 9.764320019097938e-05, "loss": 0.7955, "step": 30730 }, { "epoch": 0.1963890982967686, "grad_norm": 1.1866439580917358, "learning_rate": 9.764167759677354e-05, "loss": 0.7862, "step": 30740 }, { "epoch": 0.1964529854465073, "grad_norm": 0.934973955154419, "learning_rate": 9.764015452277479e-05, "loss": 0.9502, "step": 30750 }, { "epoch": 0.19651687259624598, "grad_norm": 1.0195708274841309, "learning_rate": 9.763863096899847e-05, "loss": 1.0983, "step": 30760 }, { "epoch": 0.19658075974598468, "grad_norm": 0.8169684410095215, "learning_rate": 9.763710693545993e-05, "loss": 0.7333, "step": 30770 }, { "epoch": 0.1966446468957234, "grad_norm": 1.0230990648269653, "learning_rate": 9.763558242217452e-05, "loss": 1.1088, "step": 30780 }, { "epoch": 0.1967085340454621, "grad_norm": 1.095651388168335, "learning_rate": 9.763405742915756e-05, "loss": 0.8304, "step": 30790 }, { "epoch": 0.1967724211952008, "grad_norm": 0.717144787311554, "learning_rate": 9.763253195642446e-05, "loss": 0.9346, "step": 30800 }, { "epoch": 0.1968363083449395, "grad_norm": 3.6631853580474854, "learning_rate": 9.763100600399053e-05, "loss": 1.0039, "step": 30810 }, { "epoch": 0.1969001954946782, "grad_norm": 0.7753827571868896, "learning_rate": 9.762947957187117e-05, "loss": 0.986, "step": 30820 }, { "epoch": 0.1969640826444169, "grad_norm": 1.0646581649780273, "learning_rate": 9.762795266008175e-05, "loss": 0.743, "step": 30830 }, { "epoch": 0.1970279697941556, "grad_norm": 0.9290790557861328, "learning_rate": 9.762642526863765e-05, "loss": 0.9802, "step": 30840 }, { "epoch": 0.1970918569438943, "grad_norm": 1.0001217126846313, "learning_rate": 9.762489739755423e-05, "loss": 0.8519, "step": 30850 }, { "epoch": 0.197155744093633, "grad_norm": 0.9493054151535034, "learning_rate": 9.76233690468469e-05, "loss": 0.844, "step": 30860 }, { "epoch": 0.19721963124337172, "grad_norm": 0.775419294834137, "learning_rate": 9.762184021653104e-05, "loss": 0.6618, "step": 30870 }, { "epoch": 0.1972835183931104, "grad_norm": 0.6491733193397522, "learning_rate": 9.762031090662205e-05, "loss": 0.8618, "step": 30880 }, { "epoch": 0.1973474055428491, "grad_norm": 0.7320391535758972, "learning_rate": 9.761878111713534e-05, "loss": 1.1604, "step": 30890 }, { "epoch": 0.1974112926925878, "grad_norm": 0.6711703538894653, "learning_rate": 9.761725084808629e-05, "loss": 0.9965, "step": 30900 }, { "epoch": 0.19747517984232651, "grad_norm": 0.5033368468284607, "learning_rate": 9.761572009949035e-05, "loss": 1.0613, "step": 30910 }, { "epoch": 0.19753906699206522, "grad_norm": 0.8021765947341919, "learning_rate": 9.76141888713629e-05, "loss": 0.8296, "step": 30920 }, { "epoch": 0.19760295414180393, "grad_norm": 0.6978395581245422, "learning_rate": 9.761265716371938e-05, "loss": 0.8845, "step": 30930 }, { "epoch": 0.1976668412915426, "grad_norm": 0.741265594959259, "learning_rate": 9.761112497657522e-05, "loss": 0.8021, "step": 30940 }, { "epoch": 0.1977307284412813, "grad_norm": 0.6882484555244446, "learning_rate": 9.760959230994583e-05, "loss": 0.7808, "step": 30950 }, { "epoch": 0.19779461559102002, "grad_norm": 1.2899192571640015, "learning_rate": 9.760805916384666e-05, "loss": 0.9258, "step": 30960 }, { "epoch": 0.19785850274075872, "grad_norm": 0.7548243999481201, "learning_rate": 9.760652553829314e-05, "loss": 0.749, "step": 30970 }, { "epoch": 0.19792238989049743, "grad_norm": 0.8977358341217041, "learning_rate": 9.760499143330075e-05, "loss": 0.8827, "step": 30980 }, { "epoch": 0.19798627704023614, "grad_norm": 0.8583622574806213, "learning_rate": 9.760345684888489e-05, "loss": 0.7604, "step": 30990 }, { "epoch": 0.19805016418997484, "grad_norm": 0.930568516254425, "learning_rate": 9.760192178506104e-05, "loss": 0.8838, "step": 31000 }, { "epoch": 0.19811405133971352, "grad_norm": 0.7296523451805115, "learning_rate": 9.760038624184466e-05, "loss": 0.997, "step": 31010 }, { "epoch": 0.19817793848945223, "grad_norm": 0.5813782215118408, "learning_rate": 9.75988502192512e-05, "loss": 0.873, "step": 31020 }, { "epoch": 0.19824182563919093, "grad_norm": 1.0174976587295532, "learning_rate": 9.759731371729614e-05, "loss": 0.9311, "step": 31030 }, { "epoch": 0.19830571278892964, "grad_norm": 0.6261200904846191, "learning_rate": 9.759577673599497e-05, "loss": 0.8865, "step": 31040 }, { "epoch": 0.19836959993866835, "grad_norm": 0.5916396975517273, "learning_rate": 9.759423927536316e-05, "loss": 0.7946, "step": 31050 }, { "epoch": 0.19843348708840705, "grad_norm": 1.060449242591858, "learning_rate": 9.759270133541616e-05, "loss": 1.2101, "step": 31060 }, { "epoch": 0.19849737423814573, "grad_norm": 0.5522297620773315, "learning_rate": 9.759116291616948e-05, "loss": 0.86, "step": 31070 }, { "epoch": 0.19856126138788444, "grad_norm": 1.0017218589782715, "learning_rate": 9.758962401763863e-05, "loss": 0.8776, "step": 31080 }, { "epoch": 0.19862514853762314, "grad_norm": 0.8278487920761108, "learning_rate": 9.758808463983911e-05, "loss": 1.0988, "step": 31090 }, { "epoch": 0.19868903568736185, "grad_norm": 0.8800287246704102, "learning_rate": 9.758654478278638e-05, "loss": 0.8976, "step": 31100 }, { "epoch": 0.19875292283710055, "grad_norm": 0.7034065127372742, "learning_rate": 9.758500444649598e-05, "loss": 1.0156, "step": 31110 }, { "epoch": 0.19881680998683926, "grad_norm": 1.02751886844635, "learning_rate": 9.758346363098344e-05, "loss": 0.9064, "step": 31120 }, { "epoch": 0.19888069713657794, "grad_norm": 0.8063342571258545, "learning_rate": 9.758192233626425e-05, "loss": 0.9177, "step": 31130 }, { "epoch": 0.19894458428631664, "grad_norm": 2.721904754638672, "learning_rate": 9.758038056235393e-05, "loss": 0.9505, "step": 31140 }, { "epoch": 0.19900847143605535, "grad_norm": 1.0083937644958496, "learning_rate": 9.757883830926801e-05, "loss": 1.1092, "step": 31150 }, { "epoch": 0.19907235858579406, "grad_norm": 0.841985821723938, "learning_rate": 9.757729557702202e-05, "loss": 0.7708, "step": 31160 }, { "epoch": 0.19913624573553276, "grad_norm": 0.6755800843238831, "learning_rate": 9.757575236563152e-05, "loss": 0.7743, "step": 31170 }, { "epoch": 0.19920013288527147, "grad_norm": 0.7885231971740723, "learning_rate": 9.757420867511202e-05, "loss": 0.9718, "step": 31180 }, { "epoch": 0.19926402003501015, "grad_norm": 1.2030565738677979, "learning_rate": 9.75726645054791e-05, "loss": 0.9473, "step": 31190 }, { "epoch": 0.19932790718474885, "grad_norm": 1.3136283159255981, "learning_rate": 9.757111985674828e-05, "loss": 1.0381, "step": 31200 }, { "epoch": 0.19939179433448756, "grad_norm": 0.6797472834587097, "learning_rate": 9.756957472893513e-05, "loss": 1.0419, "step": 31210 }, { "epoch": 0.19945568148422627, "grad_norm": 0.7219412922859192, "learning_rate": 9.756802912205522e-05, "loss": 1.0792, "step": 31220 }, { "epoch": 0.19951956863396497, "grad_norm": 0.8325220346450806, "learning_rate": 9.756648303612409e-05, "loss": 0.7956, "step": 31230 }, { "epoch": 0.19958345578370368, "grad_norm": 0.9289294481277466, "learning_rate": 9.756493647115734e-05, "loss": 0.7096, "step": 31240 }, { "epoch": 0.19964734293344236, "grad_norm": 0.908420205116272, "learning_rate": 9.756338942717051e-05, "loss": 0.8602, "step": 31250 }, { "epoch": 0.19971123008318106, "grad_norm": 2.6106882095336914, "learning_rate": 9.756184190417921e-05, "loss": 0.9356, "step": 31260 }, { "epoch": 0.19977511723291977, "grad_norm": 0.8880581259727478, "learning_rate": 9.756029390219901e-05, "loss": 0.7862, "step": 31270 }, { "epoch": 0.19983900438265847, "grad_norm": 0.6896887421607971, "learning_rate": 9.755874542124551e-05, "loss": 0.9089, "step": 31280 }, { "epoch": 0.19990289153239718, "grad_norm": 1.0063308477401733, "learning_rate": 9.75571964613343e-05, "loss": 1.0273, "step": 31290 }, { "epoch": 0.1999667786821359, "grad_norm": 1.277763843536377, "learning_rate": 9.755564702248099e-05, "loss": 1.0963, "step": 31300 }, { "epoch": 0.20003066583187457, "grad_norm": 0.6711148023605347, "learning_rate": 9.755409710470116e-05, "loss": 1.016, "step": 31310 }, { "epoch": 0.20009455298161327, "grad_norm": 1.1493245363235474, "learning_rate": 9.755254670801042e-05, "loss": 0.6895, "step": 31320 }, { "epoch": 0.20015844013135198, "grad_norm": 0.5734307765960693, "learning_rate": 9.755099583242442e-05, "loss": 0.9138, "step": 31330 }, { "epoch": 0.20022232728109068, "grad_norm": 0.6204320788383484, "learning_rate": 9.754944447795874e-05, "loss": 0.8158, "step": 31340 }, { "epoch": 0.2002862144308294, "grad_norm": 1.0882511138916016, "learning_rate": 9.754789264462902e-05, "loss": 0.7919, "step": 31350 }, { "epoch": 0.2003501015805681, "grad_norm": 1.2842504978179932, "learning_rate": 9.754634033245089e-05, "loss": 1.1121, "step": 31360 }, { "epoch": 0.20041398873030677, "grad_norm": 0.7042865753173828, "learning_rate": 9.754478754143998e-05, "loss": 0.8598, "step": 31370 }, { "epoch": 0.20047787588004548, "grad_norm": 0.7466055154800415, "learning_rate": 9.754323427161191e-05, "loss": 0.7496, "step": 31380 }, { "epoch": 0.2005417630297842, "grad_norm": 1.2161649465560913, "learning_rate": 9.754168052298237e-05, "loss": 0.9603, "step": 31390 }, { "epoch": 0.2006056501795229, "grad_norm": 0.871167778968811, "learning_rate": 9.754012629556696e-05, "loss": 1.0315, "step": 31400 }, { "epoch": 0.2006695373292616, "grad_norm": 0.853158175945282, "learning_rate": 9.753857158938135e-05, "loss": 0.9701, "step": 31410 }, { "epoch": 0.2007334244790003, "grad_norm": 0.5134825706481934, "learning_rate": 9.753701640444121e-05, "loss": 0.9838, "step": 31420 }, { "epoch": 0.20079731162873898, "grad_norm": 0.7412970662117004, "learning_rate": 9.753546074076217e-05, "loss": 0.802, "step": 31430 }, { "epoch": 0.2008611987784777, "grad_norm": 1.8500874042510986, "learning_rate": 9.753390459835993e-05, "loss": 0.711, "step": 31440 }, { "epoch": 0.2009250859282164, "grad_norm": 0.685453474521637, "learning_rate": 9.753234797725015e-05, "loss": 0.9091, "step": 31450 }, { "epoch": 0.2009889730779551, "grad_norm": 1.9982002973556519, "learning_rate": 9.75307908774485e-05, "loss": 0.8882, "step": 31460 }, { "epoch": 0.2010528602276938, "grad_norm": 1.9767764806747437, "learning_rate": 9.752923329897066e-05, "loss": 0.8807, "step": 31470 }, { "epoch": 0.20111674737743251, "grad_norm": 0.6557339429855347, "learning_rate": 9.752767524183233e-05, "loss": 0.8447, "step": 31480 }, { "epoch": 0.2011806345271712, "grad_norm": 0.6405972838401794, "learning_rate": 9.752611670604919e-05, "loss": 0.8889, "step": 31490 }, { "epoch": 0.2012445216769099, "grad_norm": 0.8593305349349976, "learning_rate": 9.752455769163693e-05, "loss": 1.1378, "step": 31500 }, { "epoch": 0.2013084088266486, "grad_norm": 0.6940191984176636, "learning_rate": 9.752299819861127e-05, "loss": 0.9958, "step": 31510 }, { "epoch": 0.2013722959763873, "grad_norm": 0.8981072306632996, "learning_rate": 9.752143822698789e-05, "loss": 0.8305, "step": 31520 }, { "epoch": 0.20143618312612602, "grad_norm": 1.0248847007751465, "learning_rate": 9.751987777678253e-05, "loss": 0.91, "step": 31530 }, { "epoch": 0.20150007027586472, "grad_norm": 0.8903045654296875, "learning_rate": 9.751831684801089e-05, "loss": 0.8491, "step": 31540 }, { "epoch": 0.2015639574256034, "grad_norm": 1.1542670726776123, "learning_rate": 9.75167554406887e-05, "loss": 0.8601, "step": 31550 }, { "epoch": 0.2016278445753421, "grad_norm": 0.7678368091583252, "learning_rate": 9.751519355483166e-05, "loss": 0.8247, "step": 31560 }, { "epoch": 0.20169173172508081, "grad_norm": 0.9471594095230103, "learning_rate": 9.75136311904555e-05, "loss": 0.9038, "step": 31570 }, { "epoch": 0.20175561887481952, "grad_norm": 0.8465635180473328, "learning_rate": 9.7512068347576e-05, "loss": 0.788, "step": 31580 }, { "epoch": 0.20181950602455823, "grad_norm": 0.9624682664871216, "learning_rate": 9.751050502620885e-05, "loss": 1.0697, "step": 31590 }, { "epoch": 0.20188339317429693, "grad_norm": 0.569759726524353, "learning_rate": 9.750894122636982e-05, "loss": 1.0777, "step": 31600 }, { "epoch": 0.2019472803240356, "grad_norm": 3.1683318614959717, "learning_rate": 9.750737694807464e-05, "loss": 0.9169, "step": 31610 }, { "epoch": 0.20201116747377432, "grad_norm": 0.7441072463989258, "learning_rate": 9.75058121913391e-05, "loss": 0.771, "step": 31620 }, { "epoch": 0.20207505462351302, "grad_norm": 1.1185020208358765, "learning_rate": 9.75042469561789e-05, "loss": 0.8128, "step": 31630 }, { "epoch": 0.20213894177325173, "grad_norm": 0.7714232206344604, "learning_rate": 9.750268124260987e-05, "loss": 0.8612, "step": 31640 }, { "epoch": 0.20220282892299044, "grad_norm": 0.6368833184242249, "learning_rate": 9.75011150506477e-05, "loss": 1.076, "step": 31650 }, { "epoch": 0.20226671607272914, "grad_norm": 1.164900779724121, "learning_rate": 9.749954838030824e-05, "loss": 0.9611, "step": 31660 }, { "epoch": 0.20233060322246782, "grad_norm": 0.66245436668396, "learning_rate": 9.749798123160723e-05, "loss": 0.8932, "step": 31670 }, { "epoch": 0.20239449037220653, "grad_norm": 0.7968323826789856, "learning_rate": 9.749641360456045e-05, "loss": 1.0375, "step": 31680 }, { "epoch": 0.20245837752194523, "grad_norm": 1.1304694414138794, "learning_rate": 9.749484549918371e-05, "loss": 0.843, "step": 31690 }, { "epoch": 0.20252226467168394, "grad_norm": 1.4667329788208008, "learning_rate": 9.749327691549277e-05, "loss": 0.8078, "step": 31700 }, { "epoch": 0.20258615182142264, "grad_norm": 0.8826027512550354, "learning_rate": 9.749170785350344e-05, "loss": 1.0263, "step": 31710 }, { "epoch": 0.20265003897116135, "grad_norm": 0.7443497180938721, "learning_rate": 9.749013831323154e-05, "loss": 0.7889, "step": 31720 }, { "epoch": 0.20271392612090003, "grad_norm": 0.5433924198150635, "learning_rate": 9.748856829469287e-05, "loss": 0.9073, "step": 31730 }, { "epoch": 0.20277781327063873, "grad_norm": 0.5322934985160828, "learning_rate": 9.74869977979032e-05, "loss": 0.7924, "step": 31740 }, { "epoch": 0.20284170042037744, "grad_norm": 1.0181642770767212, "learning_rate": 9.748542682287841e-05, "loss": 0.7738, "step": 31750 }, { "epoch": 0.20290558757011615, "grad_norm": 0.8533402681350708, "learning_rate": 9.74838553696343e-05, "loss": 1.1269, "step": 31760 }, { "epoch": 0.20296947471985485, "grad_norm": 0.6573584079742432, "learning_rate": 9.748228343818666e-05, "loss": 0.9684, "step": 31770 }, { "epoch": 0.20303336186959356, "grad_norm": 1.141799807548523, "learning_rate": 9.748071102855135e-05, "loss": 1.0159, "step": 31780 }, { "epoch": 0.20309724901933224, "grad_norm": 2.4994301795959473, "learning_rate": 9.747913814074421e-05, "loss": 0.7334, "step": 31790 }, { "epoch": 0.20316113616907094, "grad_norm": 1.0525953769683838, "learning_rate": 9.747756477478108e-05, "loss": 0.9094, "step": 31800 }, { "epoch": 0.20322502331880965, "grad_norm": 0.6493773460388184, "learning_rate": 9.747599093067779e-05, "loss": 0.7929, "step": 31810 }, { "epoch": 0.20328891046854836, "grad_norm": 1.622753381729126, "learning_rate": 9.747441660845021e-05, "loss": 1.3227, "step": 31820 }, { "epoch": 0.20335279761828706, "grad_norm": 0.932706356048584, "learning_rate": 9.747284180811417e-05, "loss": 1.2109, "step": 31830 }, { "epoch": 0.20341668476802577, "grad_norm": 0.706366240978241, "learning_rate": 9.747126652968554e-05, "loss": 1.0372, "step": 31840 }, { "epoch": 0.20348057191776447, "grad_norm": 0.5680680871009827, "learning_rate": 9.74696907731802e-05, "loss": 0.8115, "step": 31850 }, { "epoch": 0.20354445906750315, "grad_norm": 0.7886488437652588, "learning_rate": 9.7468114538614e-05, "loss": 0.8516, "step": 31860 }, { "epoch": 0.20360834621724186, "grad_norm": 1.467068076133728, "learning_rate": 9.746653782600284e-05, "loss": 0.9352, "step": 31870 }, { "epoch": 0.20367223336698057, "grad_norm": 0.6054574251174927, "learning_rate": 9.746496063536254e-05, "loss": 0.738, "step": 31880 }, { "epoch": 0.20373612051671927, "grad_norm": 0.8049781322479248, "learning_rate": 9.746338296670906e-05, "loss": 0.9212, "step": 31890 }, { "epoch": 0.20380000766645798, "grad_norm": 2.8067591190338135, "learning_rate": 9.746180482005825e-05, "loss": 1.1401, "step": 31900 }, { "epoch": 0.20386389481619668, "grad_norm": 0.5604707598686218, "learning_rate": 9.746022619542599e-05, "loss": 0.7448, "step": 31910 }, { "epoch": 0.20392778196593536, "grad_norm": 0.6594801545143127, "learning_rate": 9.745864709282819e-05, "loss": 1.0038, "step": 31920 }, { "epoch": 0.20399166911567407, "grad_norm": 0.7814098000526428, "learning_rate": 9.745706751228076e-05, "loss": 1.0487, "step": 31930 }, { "epoch": 0.20405555626541277, "grad_norm": 0.892376184463501, "learning_rate": 9.745548745379961e-05, "loss": 1.006, "step": 31940 }, { "epoch": 0.20411944341515148, "grad_norm": 0.4663401246070862, "learning_rate": 9.745390691740064e-05, "loss": 0.9555, "step": 31950 }, { "epoch": 0.2041833305648902, "grad_norm": 0.9874062538146973, "learning_rate": 9.745232590309978e-05, "loss": 1.0092, "step": 31960 }, { "epoch": 0.2042472177146289, "grad_norm": 0.5330253839492798, "learning_rate": 9.745074441091294e-05, "loss": 1.0081, "step": 31970 }, { "epoch": 0.20431110486436757, "grad_norm": 1.0687589645385742, "learning_rate": 9.744916244085606e-05, "loss": 0.8934, "step": 31980 }, { "epoch": 0.20437499201410628, "grad_norm": 0.6077286601066589, "learning_rate": 9.744757999294506e-05, "loss": 0.8938, "step": 31990 }, { "epoch": 0.20443887916384498, "grad_norm": 0.6717079281806946, "learning_rate": 9.744599706719588e-05, "loss": 0.9467, "step": 32000 }, { "epoch": 0.2045027663135837, "grad_norm": 1.032605767250061, "learning_rate": 9.744441366362447e-05, "loss": 0.9648, "step": 32010 }, { "epoch": 0.2045666534633224, "grad_norm": 0.6703940629959106, "learning_rate": 9.744282978224677e-05, "loss": 0.7152, "step": 32020 }, { "epoch": 0.2046305406130611, "grad_norm": 1.4983042478561401, "learning_rate": 9.744124542307871e-05, "loss": 0.9562, "step": 32030 }, { "epoch": 0.20469442776279978, "grad_norm": 0.7340278029441833, "learning_rate": 9.743966058613629e-05, "loss": 0.9512, "step": 32040 }, { "epoch": 0.20475831491253849, "grad_norm": 2.0036234855651855, "learning_rate": 9.743807527143544e-05, "loss": 1.1441, "step": 32050 }, { "epoch": 0.2048222020622772, "grad_norm": 0.8495148420333862, "learning_rate": 9.743648947899214e-05, "loss": 1.0051, "step": 32060 }, { "epoch": 0.2048860892120159, "grad_norm": 1.4452283382415771, "learning_rate": 9.743490320882234e-05, "loss": 0.8346, "step": 32070 }, { "epoch": 0.2049499763617546, "grad_norm": 0.7870922684669495, "learning_rate": 9.743331646094202e-05, "loss": 0.8006, "step": 32080 }, { "epoch": 0.2050138635114933, "grad_norm": 0.8627803325653076, "learning_rate": 9.743172923536718e-05, "loss": 0.947, "step": 32090 }, { "epoch": 0.205077750661232, "grad_norm": 1.4049910306930542, "learning_rate": 9.74301415321138e-05, "loss": 1.1061, "step": 32100 }, { "epoch": 0.2051416378109707, "grad_norm": 0.6403430700302124, "learning_rate": 9.742855335119785e-05, "loss": 0.8099, "step": 32110 }, { "epoch": 0.2052055249607094, "grad_norm": 1.0958514213562012, "learning_rate": 9.742696469263533e-05, "loss": 0.9353, "step": 32120 }, { "epoch": 0.2052694121104481, "grad_norm": 0.828372597694397, "learning_rate": 9.742537555644225e-05, "loss": 0.7152, "step": 32130 }, { "epoch": 0.2053332992601868, "grad_norm": 0.743424654006958, "learning_rate": 9.742378594263461e-05, "loss": 0.8165, "step": 32140 }, { "epoch": 0.20539718640992552, "grad_norm": 1.0300029516220093, "learning_rate": 9.742219585122843e-05, "loss": 1.0059, "step": 32150 }, { "epoch": 0.2054610735596642, "grad_norm": 0.9428716897964478, "learning_rate": 9.74206052822397e-05, "loss": 0.9221, "step": 32160 }, { "epoch": 0.2055249607094029, "grad_norm": 0.9042668342590332, "learning_rate": 9.741901423568446e-05, "loss": 0.8297, "step": 32170 }, { "epoch": 0.2055888478591416, "grad_norm": 1.2374792098999023, "learning_rate": 9.741742271157872e-05, "loss": 0.8647, "step": 32180 }, { "epoch": 0.20565273500888032, "grad_norm": 0.9123538136482239, "learning_rate": 9.74158307099385e-05, "loss": 0.6822, "step": 32190 }, { "epoch": 0.20571662215861902, "grad_norm": 0.725796103477478, "learning_rate": 9.741423823077986e-05, "loss": 1.1005, "step": 32200 }, { "epoch": 0.20578050930835773, "grad_norm": 0.8962036371231079, "learning_rate": 9.741264527411881e-05, "loss": 1.0891, "step": 32210 }, { "epoch": 0.2058443964580964, "grad_norm": 0.9846658110618591, "learning_rate": 9.741105183997141e-05, "loss": 1.0041, "step": 32220 }, { "epoch": 0.2059082836078351, "grad_norm": 0.4427562654018402, "learning_rate": 9.74094579283537e-05, "loss": 0.8606, "step": 32230 }, { "epoch": 0.20597217075757382, "grad_norm": 0.8591815829277039, "learning_rate": 9.740786353928173e-05, "loss": 1.0499, "step": 32240 }, { "epoch": 0.20603605790731253, "grad_norm": 0.5261662602424622, "learning_rate": 9.740626867277157e-05, "loss": 0.9264, "step": 32250 }, { "epoch": 0.20609994505705123, "grad_norm": 1.2539498805999756, "learning_rate": 9.740467332883926e-05, "loss": 1.0337, "step": 32260 }, { "epoch": 0.20616383220678994, "grad_norm": 0.6254390478134155, "learning_rate": 9.740307750750088e-05, "loss": 1.0999, "step": 32270 }, { "epoch": 0.20622771935652862, "grad_norm": 0.6762027144432068, "learning_rate": 9.740148120877251e-05, "loss": 0.9724, "step": 32280 }, { "epoch": 0.20629160650626732, "grad_norm": 0.9390422105789185, "learning_rate": 9.73998844326702e-05, "loss": 0.8626, "step": 32290 }, { "epoch": 0.20635549365600603, "grad_norm": 0.8526495695114136, "learning_rate": 9.739828717921006e-05, "loss": 0.911, "step": 32300 }, { "epoch": 0.20641938080574473, "grad_norm": 1.050434947013855, "learning_rate": 9.739668944840817e-05, "loss": 1.0802, "step": 32310 }, { "epoch": 0.20648326795548344, "grad_norm": 0.8968641757965088, "learning_rate": 9.739509124028062e-05, "loss": 1.0353, "step": 32320 }, { "epoch": 0.20654715510522215, "grad_norm": 0.9247165322303772, "learning_rate": 9.739349255484346e-05, "loss": 1.1142, "step": 32330 }, { "epoch": 0.20661104225496082, "grad_norm": 0.7122106552124023, "learning_rate": 9.739189339211286e-05, "loss": 1.0356, "step": 32340 }, { "epoch": 0.20667492940469953, "grad_norm": 0.5841015577316284, "learning_rate": 9.739029375210489e-05, "loss": 0.9243, "step": 32350 }, { "epoch": 0.20673881655443824, "grad_norm": 0.7304105758666992, "learning_rate": 9.738869363483565e-05, "loss": 0.8895, "step": 32360 }, { "epoch": 0.20680270370417694, "grad_norm": 0.9879099726676941, "learning_rate": 9.738709304032128e-05, "loss": 1.0733, "step": 32370 }, { "epoch": 0.20686659085391565, "grad_norm": 1.092883586883545, "learning_rate": 9.738549196857789e-05, "loss": 0.9595, "step": 32380 }, { "epoch": 0.20693047800365436, "grad_norm": 0.8127654194831848, "learning_rate": 9.738389041962159e-05, "loss": 0.6739, "step": 32390 }, { "epoch": 0.20699436515339303, "grad_norm": 0.60942542552948, "learning_rate": 9.738228839346853e-05, "loss": 0.829, "step": 32400 }, { "epoch": 0.20705825230313174, "grad_norm": 1.1465409994125366, "learning_rate": 9.738068589013483e-05, "loss": 0.7232, "step": 32410 }, { "epoch": 0.20712213945287045, "grad_norm": 0.6177552342414856, "learning_rate": 9.737908290963663e-05, "loss": 0.8286, "step": 32420 }, { "epoch": 0.20718602660260915, "grad_norm": 0.5419365763664246, "learning_rate": 9.737747945199009e-05, "loss": 0.8722, "step": 32430 }, { "epoch": 0.20724991375234786, "grad_norm": 1.0209770202636719, "learning_rate": 9.737587551721132e-05, "loss": 0.9187, "step": 32440 }, { "epoch": 0.20731380090208656, "grad_norm": 0.7830290198326111, "learning_rate": 9.737427110531652e-05, "loss": 1.1599, "step": 32450 }, { "epoch": 0.20737768805182524, "grad_norm": 1.0259994268417358, "learning_rate": 9.737266621632182e-05, "loss": 1.1211, "step": 32460 }, { "epoch": 0.20744157520156395, "grad_norm": 0.6848270893096924, "learning_rate": 9.73710608502434e-05, "loss": 0.7989, "step": 32470 }, { "epoch": 0.20750546235130266, "grad_norm": 0.779099702835083, "learning_rate": 9.736945500709737e-05, "loss": 0.9887, "step": 32480 }, { "epoch": 0.20756934950104136, "grad_norm": 0.7140209078788757, "learning_rate": 9.736784868689999e-05, "loss": 1.075, "step": 32490 }, { "epoch": 0.20763323665078007, "grad_norm": 0.7910488247871399, "learning_rate": 9.736624188966738e-05, "loss": 1.1467, "step": 32500 }, { "epoch": 0.20769712380051877, "grad_norm": 0.8852772116661072, "learning_rate": 9.736463461541574e-05, "loss": 1.0504, "step": 32510 }, { "epoch": 0.20776101095025745, "grad_norm": 1.6205745935440063, "learning_rate": 9.736302686416126e-05, "loss": 0.8582, "step": 32520 }, { "epoch": 0.20782489809999616, "grad_norm": 0.9984052777290344, "learning_rate": 9.736141863592012e-05, "loss": 1.0526, "step": 32530 }, { "epoch": 0.20788878524973486, "grad_norm": 0.7698317170143127, "learning_rate": 9.735980993070852e-05, "loss": 0.8745, "step": 32540 }, { "epoch": 0.20795267239947357, "grad_norm": 1.8012065887451172, "learning_rate": 9.735820074854265e-05, "loss": 0.9542, "step": 32550 }, { "epoch": 0.20801655954921228, "grad_norm": 0.7188138365745544, "learning_rate": 9.735659108943876e-05, "loss": 0.6682, "step": 32560 }, { "epoch": 0.20808044669895098, "grad_norm": 0.7604565620422363, "learning_rate": 9.7354980953413e-05, "loss": 0.7246, "step": 32570 }, { "epoch": 0.20814433384868966, "grad_norm": 0.6722016334533691, "learning_rate": 9.735337034048162e-05, "loss": 0.8719, "step": 32580 }, { "epoch": 0.20820822099842837, "grad_norm": 0.5613377690315247, "learning_rate": 9.735175925066082e-05, "loss": 0.8531, "step": 32590 }, { "epoch": 0.20827210814816707, "grad_norm": 1.168945550918579, "learning_rate": 9.735014768396686e-05, "loss": 1.1047, "step": 32600 }, { "epoch": 0.20833599529790578, "grad_norm": 0.7283167243003845, "learning_rate": 9.734853564041595e-05, "loss": 0.7414, "step": 32610 }, { "epoch": 0.20839988244764449, "grad_norm": 0.8897091150283813, "learning_rate": 9.734692312002431e-05, "loss": 0.9406, "step": 32620 }, { "epoch": 0.2084637695973832, "grad_norm": 0.6193281412124634, "learning_rate": 9.734531012280821e-05, "loss": 0.8429, "step": 32630 }, { "epoch": 0.20852765674712187, "grad_norm": 1.2287752628326416, "learning_rate": 9.734369664878387e-05, "loss": 0.9993, "step": 32640 }, { "epoch": 0.20859154389686058, "grad_norm": 1.4086371660232544, "learning_rate": 9.734208269796754e-05, "loss": 0.6823, "step": 32650 }, { "epoch": 0.20865543104659928, "grad_norm": 0.9113640785217285, "learning_rate": 9.734046827037548e-05, "loss": 1.1112, "step": 32660 }, { "epoch": 0.208719318196338, "grad_norm": 0.7698211073875427, "learning_rate": 9.733885336602396e-05, "loss": 0.7977, "step": 32670 }, { "epoch": 0.2087832053460767, "grad_norm": 2.5170323848724365, "learning_rate": 9.733723798492921e-05, "loss": 0.8861, "step": 32680 }, { "epoch": 0.2088470924958154, "grad_norm": 0.5907607078552246, "learning_rate": 9.733562212710755e-05, "loss": 0.8325, "step": 32690 }, { "epoch": 0.2089109796455541, "grad_norm": 0.7293870449066162, "learning_rate": 9.733400579257521e-05, "loss": 0.8956, "step": 32700 }, { "epoch": 0.20897486679529279, "grad_norm": 0.9861850738525391, "learning_rate": 9.733238898134848e-05, "loss": 0.8441, "step": 32710 }, { "epoch": 0.2090387539450315, "grad_norm": 0.8502741456031799, "learning_rate": 9.733077169344366e-05, "loss": 0.7623, "step": 32720 }, { "epoch": 0.2091026410947702, "grad_norm": 0.6573517322540283, "learning_rate": 9.7329153928877e-05, "loss": 1.134, "step": 32730 }, { "epoch": 0.2091665282445089, "grad_norm": 1.0283352136611938, "learning_rate": 9.732753568766482e-05, "loss": 1.1342, "step": 32740 }, { "epoch": 0.2092304153942476, "grad_norm": 0.9217149019241333, "learning_rate": 9.732591696982343e-05, "loss": 0.7505, "step": 32750 }, { "epoch": 0.20929430254398632, "grad_norm": 1.5344794988632202, "learning_rate": 9.732429777536909e-05, "loss": 0.8524, "step": 32760 }, { "epoch": 0.209358189693725, "grad_norm": 0.6569311022758484, "learning_rate": 9.732267810431814e-05, "loss": 0.9557, "step": 32770 }, { "epoch": 0.2094220768434637, "grad_norm": 1.269944190979004, "learning_rate": 9.732105795668689e-05, "loss": 0.8407, "step": 32780 }, { "epoch": 0.2094859639932024, "grad_norm": 0.914414644241333, "learning_rate": 9.731943733249164e-05, "loss": 0.7725, "step": 32790 }, { "epoch": 0.2095498511429411, "grad_norm": 0.5438032746315002, "learning_rate": 9.731781623174871e-05, "loss": 0.9418, "step": 32800 }, { "epoch": 0.20961373829267982, "grad_norm": 0.9533820152282715, "learning_rate": 9.731619465447445e-05, "loss": 0.7887, "step": 32810 }, { "epoch": 0.20967762544241852, "grad_norm": 0.9719078540802002, "learning_rate": 9.731457260068517e-05, "loss": 1.0511, "step": 32820 }, { "epoch": 0.2097415125921572, "grad_norm": 0.8131768107414246, "learning_rate": 9.73129500703972e-05, "loss": 0.7443, "step": 32830 }, { "epoch": 0.2098053997418959, "grad_norm": 0.9436559081077576, "learning_rate": 9.731132706362692e-05, "loss": 0.7655, "step": 32840 }, { "epoch": 0.20986928689163462, "grad_norm": 0.6353892683982849, "learning_rate": 9.730970358039062e-05, "loss": 0.9139, "step": 32850 }, { "epoch": 0.20993317404137332, "grad_norm": 1.7300466299057007, "learning_rate": 9.730807962070467e-05, "loss": 0.8533, "step": 32860 }, { "epoch": 0.20999706119111203, "grad_norm": 0.9070175886154175, "learning_rate": 9.730645518458545e-05, "loss": 1.0384, "step": 32870 }, { "epoch": 0.21006094834085073, "grad_norm": 1.630418300628662, "learning_rate": 9.73048302720493e-05, "loss": 1.1833, "step": 32880 }, { "epoch": 0.2101248354905894, "grad_norm": 0.6094731092453003, "learning_rate": 9.730320488311258e-05, "loss": 0.8528, "step": 32890 }, { "epoch": 0.21018872264032812, "grad_norm": 0.9163777828216553, "learning_rate": 9.730157901779165e-05, "loss": 0.986, "step": 32900 }, { "epoch": 0.21025260979006682, "grad_norm": 0.885759174823761, "learning_rate": 9.729995267610293e-05, "loss": 1.0211, "step": 32910 }, { "epoch": 0.21031649693980553, "grad_norm": 0.6660359501838684, "learning_rate": 9.729832585806273e-05, "loss": 0.8855, "step": 32920 }, { "epoch": 0.21038038408954424, "grad_norm": 0.9728102087974548, "learning_rate": 9.729669856368748e-05, "loss": 0.9548, "step": 32930 }, { "epoch": 0.21044427123928294, "grad_norm": 0.8899286985397339, "learning_rate": 9.729507079299359e-05, "loss": 0.98, "step": 32940 }, { "epoch": 0.21050815838902162, "grad_norm": 0.8630788326263428, "learning_rate": 9.729344254599738e-05, "loss": 0.8842, "step": 32950 }, { "epoch": 0.21057204553876033, "grad_norm": 1.159555435180664, "learning_rate": 9.72918138227153e-05, "loss": 1.0565, "step": 32960 }, { "epoch": 0.21063593268849903, "grad_norm": 0.9720593690872192, "learning_rate": 9.729018462316375e-05, "loss": 0.8663, "step": 32970 }, { "epoch": 0.21069981983823774, "grad_norm": 1.0807291269302368, "learning_rate": 9.728855494735914e-05, "loss": 0.7609, "step": 32980 }, { "epoch": 0.21076370698797645, "grad_norm": 0.9693974852561951, "learning_rate": 9.728692479531784e-05, "loss": 0.9466, "step": 32990 }, { "epoch": 0.21082759413771515, "grad_norm": 1.1828261613845825, "learning_rate": 9.728529416705632e-05, "loss": 1.17, "step": 33000 }, { "epoch": 0.21089148128745383, "grad_norm": 0.8070554733276367, "learning_rate": 9.728366306259098e-05, "loss": 0.999, "step": 33010 }, { "epoch": 0.21095536843719254, "grad_norm": 0.6054061651229858, "learning_rate": 9.728203148193824e-05, "loss": 0.7462, "step": 33020 }, { "epoch": 0.21101925558693124, "grad_norm": 0.9334638714790344, "learning_rate": 9.728039942511453e-05, "loss": 0.8478, "step": 33030 }, { "epoch": 0.21108314273666995, "grad_norm": 0.692486584186554, "learning_rate": 9.727876689213631e-05, "loss": 0.9051, "step": 33040 }, { "epoch": 0.21114702988640865, "grad_norm": 0.7370048761367798, "learning_rate": 9.727713388302e-05, "loss": 1.131, "step": 33050 }, { "epoch": 0.21121091703614736, "grad_norm": 0.8169997930526733, "learning_rate": 9.727550039778205e-05, "loss": 0.762, "step": 33060 }, { "epoch": 0.21127480418588604, "grad_norm": 1.1108886003494263, "learning_rate": 9.727386643643891e-05, "loss": 0.8818, "step": 33070 }, { "epoch": 0.21133869133562475, "grad_norm": 2.2037575244903564, "learning_rate": 9.727223199900704e-05, "loss": 0.9574, "step": 33080 }, { "epoch": 0.21140257848536345, "grad_norm": 0.820559024810791, "learning_rate": 9.72705970855029e-05, "loss": 1.0319, "step": 33090 }, { "epoch": 0.21146646563510216, "grad_norm": 0.6320390701293945, "learning_rate": 9.726896169594295e-05, "loss": 0.8773, "step": 33100 }, { "epoch": 0.21153035278484086, "grad_norm": 0.6292109489440918, "learning_rate": 9.726732583034365e-05, "loss": 0.7979, "step": 33110 }, { "epoch": 0.21159423993457957, "grad_norm": 1.0046201944351196, "learning_rate": 9.72656894887215e-05, "loss": 0.7807, "step": 33120 }, { "epoch": 0.21165812708431825, "grad_norm": 0.8816448450088501, "learning_rate": 9.726405267109297e-05, "loss": 0.7321, "step": 33130 }, { "epoch": 0.21172201423405695, "grad_norm": 0.9356503486633301, "learning_rate": 9.726241537747454e-05, "loss": 0.791, "step": 33140 }, { "epoch": 0.21178590138379566, "grad_norm": 0.8952210545539856, "learning_rate": 9.72607776078827e-05, "loss": 0.9033, "step": 33150 }, { "epoch": 0.21184978853353437, "grad_norm": 0.6787972450256348, "learning_rate": 9.725913936233393e-05, "loss": 0.8994, "step": 33160 }, { "epoch": 0.21191367568327307, "grad_norm": 1.112884759902954, "learning_rate": 9.725750064084476e-05, "loss": 0.8439, "step": 33170 }, { "epoch": 0.21197756283301178, "grad_norm": 1.08254873752594, "learning_rate": 9.725586144343166e-05, "loss": 0.8901, "step": 33180 }, { "epoch": 0.21204144998275046, "grad_norm": 0.7427080273628235, "learning_rate": 9.725422177011116e-05, "loss": 0.9528, "step": 33190 }, { "epoch": 0.21210533713248916, "grad_norm": 0.6845873594284058, "learning_rate": 9.725274565723552e-05, "loss": 1.1284, "step": 33200 }, { "epoch": 0.21216922428222787, "grad_norm": 2.6716866493225098, "learning_rate": 9.725110507973644e-05, "loss": 0.9867, "step": 33210 }, { "epoch": 0.21223311143196658, "grad_norm": 1.6081085205078125, "learning_rate": 9.724946402637786e-05, "loss": 0.6687, "step": 33220 }, { "epoch": 0.21229699858170528, "grad_norm": 0.7291703820228577, "learning_rate": 9.724782249717628e-05, "loss": 0.8611, "step": 33230 }, { "epoch": 0.212360885731444, "grad_norm": 0.6999391317367554, "learning_rate": 9.724618049214828e-05, "loss": 0.8015, "step": 33240 }, { "epoch": 0.21242477288118267, "grad_norm": 0.7499661445617676, "learning_rate": 9.724453801131035e-05, "loss": 0.8521, "step": 33250 }, { "epoch": 0.21248866003092137, "grad_norm": 1.027510404586792, "learning_rate": 9.724289505467906e-05, "loss": 1.0125, "step": 33260 }, { "epoch": 0.21255254718066008, "grad_norm": 1.0336750745773315, "learning_rate": 9.724125162227095e-05, "loss": 0.8207, "step": 33270 }, { "epoch": 0.21261643433039878, "grad_norm": 0.8094274401664734, "learning_rate": 9.723960771410256e-05, "loss": 0.7034, "step": 33280 }, { "epoch": 0.2126803214801375, "grad_norm": 0.9066417813301086, "learning_rate": 9.723796333019044e-05, "loss": 0.8273, "step": 33290 }, { "epoch": 0.2127442086298762, "grad_norm": 1.2769392728805542, "learning_rate": 9.723631847055119e-05, "loss": 0.792, "step": 33300 }, { "epoch": 0.21280809577961488, "grad_norm": 0.751732349395752, "learning_rate": 9.723467313520133e-05, "loss": 0.8004, "step": 33310 }, { "epoch": 0.21287198292935358, "grad_norm": 0.7040248513221741, "learning_rate": 9.723302732415745e-05, "loss": 1.0993, "step": 33320 }, { "epoch": 0.2129358700790923, "grad_norm": 0.6100977063179016, "learning_rate": 9.723138103743612e-05, "loss": 0.7998, "step": 33330 }, { "epoch": 0.212999757228831, "grad_norm": 1.0050344467163086, "learning_rate": 9.722973427505391e-05, "loss": 0.8967, "step": 33340 }, { "epoch": 0.2130636443785697, "grad_norm": 0.5379306674003601, "learning_rate": 9.722808703702743e-05, "loss": 0.7652, "step": 33350 }, { "epoch": 0.2131275315283084, "grad_norm": 0.6813077330589294, "learning_rate": 9.722643932337327e-05, "loss": 1.2678, "step": 33360 }, { "epoch": 0.21319141867804708, "grad_norm": 1.1152585744857788, "learning_rate": 9.722479113410799e-05, "loss": 0.9101, "step": 33370 }, { "epoch": 0.2132553058277858, "grad_norm": 0.8351494073867798, "learning_rate": 9.722314246924822e-05, "loss": 0.8285, "step": 33380 }, { "epoch": 0.2133191929775245, "grad_norm": 0.7308449149131775, "learning_rate": 9.722149332881054e-05, "loss": 1.1201, "step": 33390 }, { "epoch": 0.2133830801272632, "grad_norm": 1.078356385231018, "learning_rate": 9.721984371281158e-05, "loss": 0.9609, "step": 33400 }, { "epoch": 0.2134469672770019, "grad_norm": 1.385568380355835, "learning_rate": 9.721819362126793e-05, "loss": 0.9715, "step": 33410 }, { "epoch": 0.21351085442674061, "grad_norm": 0.8912048935890198, "learning_rate": 9.721654305419623e-05, "loss": 0.7701, "step": 33420 }, { "epoch": 0.2135747415764793, "grad_norm": 0.7083896994590759, "learning_rate": 9.721489201161309e-05, "loss": 0.8202, "step": 33430 }, { "epoch": 0.213638628726218, "grad_norm": 0.6518615484237671, "learning_rate": 9.721324049353515e-05, "loss": 0.7974, "step": 33440 }, { "epoch": 0.2137025158759567, "grad_norm": 0.7615000605583191, "learning_rate": 9.721158849997903e-05, "loss": 0.9024, "step": 33450 }, { "epoch": 0.2137664030256954, "grad_norm": 0.6199432611465454, "learning_rate": 9.720993603096136e-05, "loss": 1.0076, "step": 33460 }, { "epoch": 0.21383029017543412, "grad_norm": 0.6537955403327942, "learning_rate": 9.720828308649879e-05, "loss": 0.9644, "step": 33470 }, { "epoch": 0.21389417732517282, "grad_norm": 0.8364148139953613, "learning_rate": 9.720662966660799e-05, "loss": 0.7734, "step": 33480 }, { "epoch": 0.21395806447491153, "grad_norm": 0.8252184391021729, "learning_rate": 9.720497577130557e-05, "loss": 0.9241, "step": 33490 }, { "epoch": 0.2140219516246502, "grad_norm": 1.0425599813461304, "learning_rate": 9.72033214006082e-05, "loss": 0.6872, "step": 33500 }, { "epoch": 0.21408583877438891, "grad_norm": 0.7613168358802795, "learning_rate": 9.720166655453256e-05, "loss": 0.8292, "step": 33510 }, { "epoch": 0.21414972592412762, "grad_norm": 0.7358224391937256, "learning_rate": 9.72000112330953e-05, "loss": 0.7993, "step": 33520 }, { "epoch": 0.21421361307386633, "grad_norm": 1.4351872205734253, "learning_rate": 9.71983554363131e-05, "loss": 0.9427, "step": 33530 }, { "epoch": 0.21427750022360503, "grad_norm": 0.9211145043373108, "learning_rate": 9.719669916420262e-05, "loss": 0.7403, "step": 33540 }, { "epoch": 0.21434138737334374, "grad_norm": 0.5790296792984009, "learning_rate": 9.719504241678054e-05, "loss": 0.77, "step": 33550 }, { "epoch": 0.21440527452308242, "grad_norm": 1.3659369945526123, "learning_rate": 9.719338519406358e-05, "loss": 0.9941, "step": 33560 }, { "epoch": 0.21446916167282112, "grad_norm": 0.6189954876899719, "learning_rate": 9.719172749606838e-05, "loss": 0.8592, "step": 33570 }, { "epoch": 0.21453304882255983, "grad_norm": 0.8214682936668396, "learning_rate": 9.719006932281167e-05, "loss": 0.7411, "step": 33580 }, { "epoch": 0.21459693597229854, "grad_norm": 0.5750226974487305, "learning_rate": 9.718841067431013e-05, "loss": 0.7238, "step": 33590 }, { "epoch": 0.21466082312203724, "grad_norm": 1.5233280658721924, "learning_rate": 9.718675155058046e-05, "loss": 0.7061, "step": 33600 }, { "epoch": 0.21472471027177595, "grad_norm": 0.5941923260688782, "learning_rate": 9.718509195163939e-05, "loss": 1.0065, "step": 33610 }, { "epoch": 0.21478859742151463, "grad_norm": 0.8326600790023804, "learning_rate": 9.718343187750363e-05, "loss": 0.8198, "step": 33620 }, { "epoch": 0.21485248457125333, "grad_norm": 0.6903313994407654, "learning_rate": 9.718177132818988e-05, "loss": 0.8067, "step": 33630 }, { "epoch": 0.21491637172099204, "grad_norm": 1.647194266319275, "learning_rate": 9.71801103037149e-05, "loss": 0.8966, "step": 33640 }, { "epoch": 0.21498025887073074, "grad_norm": 0.6679027080535889, "learning_rate": 9.717844880409537e-05, "loss": 0.7546, "step": 33650 }, { "epoch": 0.21504414602046945, "grad_norm": 0.8270406723022461, "learning_rate": 9.717678682934803e-05, "loss": 1.068, "step": 33660 }, { "epoch": 0.21510803317020816, "grad_norm": 0.6147032976150513, "learning_rate": 9.717512437948966e-05, "loss": 0.747, "step": 33670 }, { "epoch": 0.21517192031994684, "grad_norm": 1.2196052074432373, "learning_rate": 9.717346145453696e-05, "loss": 0.7214, "step": 33680 }, { "epoch": 0.21523580746968554, "grad_norm": 1.0216395854949951, "learning_rate": 9.717179805450671e-05, "loss": 0.8437, "step": 33690 }, { "epoch": 0.21529969461942425, "grad_norm": 0.7304588556289673, "learning_rate": 9.717013417941563e-05, "loss": 0.6288, "step": 33700 }, { "epoch": 0.21536358176916295, "grad_norm": 1.711125135421753, "learning_rate": 9.716846982928049e-05, "loss": 0.8811, "step": 33710 }, { "epoch": 0.21542746891890166, "grad_norm": 0.868000864982605, "learning_rate": 9.716680500411805e-05, "loss": 0.857, "step": 33720 }, { "epoch": 0.21549135606864037, "grad_norm": 0.7319660186767578, "learning_rate": 9.716513970394509e-05, "loss": 0.8252, "step": 33730 }, { "epoch": 0.21555524321837904, "grad_norm": 0.9054515361785889, "learning_rate": 9.716347392877836e-05, "loss": 0.8681, "step": 33740 }, { "epoch": 0.21561913036811775, "grad_norm": 1.218607783317566, "learning_rate": 9.716180767863465e-05, "loss": 0.9609, "step": 33750 }, { "epoch": 0.21568301751785646, "grad_norm": 0.9217560291290283, "learning_rate": 9.716014095353075e-05, "loss": 0.8119, "step": 33760 }, { "epoch": 0.21574690466759516, "grad_norm": 0.7078598141670227, "learning_rate": 9.715847375348342e-05, "loss": 0.9151, "step": 33770 }, { "epoch": 0.21581079181733387, "grad_norm": 0.7617483139038086, "learning_rate": 9.715680607850945e-05, "loss": 0.9346, "step": 33780 }, { "epoch": 0.21587467896707258, "grad_norm": 0.7594091892242432, "learning_rate": 9.715513792862565e-05, "loss": 0.9478, "step": 33790 }, { "epoch": 0.21593856611681125, "grad_norm": 0.9850571155548096, "learning_rate": 9.715346930384882e-05, "loss": 0.7815, "step": 33800 }, { "epoch": 0.21600245326654996, "grad_norm": 0.8838279843330383, "learning_rate": 9.715180020419576e-05, "loss": 1.0338, "step": 33810 }, { "epoch": 0.21606634041628867, "grad_norm": 0.7649998068809509, "learning_rate": 9.715013062968328e-05, "loss": 0.839, "step": 33820 }, { "epoch": 0.21613022756602737, "grad_norm": 0.8073322176933289, "learning_rate": 9.71484605803282e-05, "loss": 1.0359, "step": 33830 }, { "epoch": 0.21619411471576608, "grad_norm": 1.914969563484192, "learning_rate": 9.714679005614733e-05, "loss": 0.972, "step": 33840 }, { "epoch": 0.21625800186550478, "grad_norm": 0.781913161277771, "learning_rate": 9.714511905715749e-05, "loss": 1.2603, "step": 33850 }, { "epoch": 0.21632188901524346, "grad_norm": 0.5499342083930969, "learning_rate": 9.714344758337553e-05, "loss": 1.0211, "step": 33860 }, { "epoch": 0.21638577616498217, "grad_norm": 2.390815496444702, "learning_rate": 9.714177563481824e-05, "loss": 1.1886, "step": 33870 }, { "epoch": 0.21644966331472087, "grad_norm": 2.6002392768859863, "learning_rate": 9.71401032115025e-05, "loss": 1.0595, "step": 33880 }, { "epoch": 0.21651355046445958, "grad_norm": 0.8145592212677002, "learning_rate": 9.713843031344515e-05, "loss": 0.8558, "step": 33890 }, { "epoch": 0.2165774376141983, "grad_norm": 0.7605422139167786, "learning_rate": 9.713675694066302e-05, "loss": 0.79, "step": 33900 }, { "epoch": 0.216641324763937, "grad_norm": 0.9282397031784058, "learning_rate": 9.713508309317296e-05, "loss": 0.8963, "step": 33910 }, { "epoch": 0.21670521191367567, "grad_norm": 0.6586880683898926, "learning_rate": 9.713340877099183e-05, "loss": 0.7421, "step": 33920 }, { "epoch": 0.21676909906341438, "grad_norm": 0.9235056042671204, "learning_rate": 9.713173397413652e-05, "loss": 0.8292, "step": 33930 }, { "epoch": 0.21683298621315308, "grad_norm": 0.7915987372398376, "learning_rate": 9.713005870262386e-05, "loss": 0.8096, "step": 33940 }, { "epoch": 0.2168968733628918, "grad_norm": 0.5287061333656311, "learning_rate": 9.712838295647074e-05, "loss": 0.7746, "step": 33950 }, { "epoch": 0.2169607605126305, "grad_norm": 0.7330449819564819, "learning_rate": 9.712670673569403e-05, "loss": 0.9486, "step": 33960 }, { "epoch": 0.2170246476623692, "grad_norm": 0.7698398232460022, "learning_rate": 9.712503004031061e-05, "loss": 0.9407, "step": 33970 }, { "epoch": 0.21708853481210788, "grad_norm": 5.071091651916504, "learning_rate": 9.712335287033739e-05, "loss": 1.046, "step": 33980 }, { "epoch": 0.2171524219618466, "grad_norm": 0.8342990875244141, "learning_rate": 9.712167522579121e-05, "loss": 0.7953, "step": 33990 }, { "epoch": 0.2172163091115853, "grad_norm": 1.221957802772522, "learning_rate": 9.7119997106689e-05, "loss": 0.8411, "step": 34000 }, { "epoch": 0.217280196261324, "grad_norm": 1.421647548675537, "learning_rate": 9.711831851304767e-05, "loss": 0.8459, "step": 34010 }, { "epoch": 0.2173440834110627, "grad_norm": 1.0833210945129395, "learning_rate": 9.71166394448841e-05, "loss": 0.9682, "step": 34020 }, { "epoch": 0.2174079705608014, "grad_norm": 0.7942554354667664, "learning_rate": 9.71149599022152e-05, "loss": 0.9468, "step": 34030 }, { "epoch": 0.2174718577105401, "grad_norm": 0.5950953364372253, "learning_rate": 9.71132798850579e-05, "loss": 0.9885, "step": 34040 }, { "epoch": 0.2175357448602788, "grad_norm": 1.1501030921936035, "learning_rate": 9.711159939342911e-05, "loss": 0.7241, "step": 34050 }, { "epoch": 0.2175996320100175, "grad_norm": 0.8352699875831604, "learning_rate": 9.710991842734577e-05, "loss": 0.9376, "step": 34060 }, { "epoch": 0.2176635191597562, "grad_norm": 1.2237290143966675, "learning_rate": 9.710823698682478e-05, "loss": 0.8397, "step": 34070 }, { "epoch": 0.21772740630949491, "grad_norm": 1.1886348724365234, "learning_rate": 9.71065550718831e-05, "loss": 0.8056, "step": 34080 }, { "epoch": 0.21779129345923362, "grad_norm": 0.954849362373352, "learning_rate": 9.710487268253765e-05, "loss": 0.9837, "step": 34090 }, { "epoch": 0.2178551806089723, "grad_norm": 0.7035555243492126, "learning_rate": 9.710318981880539e-05, "loss": 0.7794, "step": 34100 }, { "epoch": 0.217919067758711, "grad_norm": 1.048746109008789, "learning_rate": 9.710150648070325e-05, "loss": 0.8262, "step": 34110 }, { "epoch": 0.2179829549084497, "grad_norm": 0.8809221386909485, "learning_rate": 9.70998226682482e-05, "loss": 1.0516, "step": 34120 }, { "epoch": 0.21804684205818842, "grad_norm": 1.0661201477050781, "learning_rate": 9.709813838145718e-05, "loss": 1.0833, "step": 34130 }, { "epoch": 0.21811072920792712, "grad_norm": 1.1189355850219727, "learning_rate": 9.709645362034716e-05, "loss": 1.1234, "step": 34140 }, { "epoch": 0.21817461635766583, "grad_norm": 0.872307538986206, "learning_rate": 9.709476838493511e-05, "loss": 1.0436, "step": 34150 }, { "epoch": 0.2182385035074045, "grad_norm": 0.6649029850959778, "learning_rate": 9.709308267523801e-05, "loss": 0.8959, "step": 34160 }, { "epoch": 0.2183023906571432, "grad_norm": 0.6744316220283508, "learning_rate": 9.70913964912728e-05, "loss": 0.9037, "step": 34170 }, { "epoch": 0.21836627780688192, "grad_norm": 1.0881192684173584, "learning_rate": 9.708970983305652e-05, "loss": 0.8183, "step": 34180 }, { "epoch": 0.21843016495662063, "grad_norm": 0.9044772386550903, "learning_rate": 9.70880227006061e-05, "loss": 0.9755, "step": 34190 }, { "epoch": 0.21849405210635933, "grad_norm": 0.9986025094985962, "learning_rate": 9.708633509393856e-05, "loss": 0.9058, "step": 34200 }, { "epoch": 0.21855793925609804, "grad_norm": 0.7025921940803528, "learning_rate": 9.70846470130709e-05, "loss": 1.0454, "step": 34210 }, { "epoch": 0.21862182640583672, "grad_norm": 0.6166189312934875, "learning_rate": 9.70829584580201e-05, "loss": 0.9536, "step": 34220 }, { "epoch": 0.21868571355557542, "grad_norm": 1.0105708837509155, "learning_rate": 9.708126942880318e-05, "loss": 0.7328, "step": 34230 }, { "epoch": 0.21874960070531413, "grad_norm": 0.7658517956733704, "learning_rate": 9.707957992543714e-05, "loss": 0.753, "step": 34240 }, { "epoch": 0.21881348785505284, "grad_norm": 0.8330119252204895, "learning_rate": 9.707788994793901e-05, "loss": 0.9129, "step": 34250 }, { "epoch": 0.21887737500479154, "grad_norm": 1.216202735900879, "learning_rate": 9.707619949632578e-05, "loss": 0.7501, "step": 34260 }, { "epoch": 0.21894126215453025, "grad_norm": 0.7274483442306519, "learning_rate": 9.707450857061452e-05, "loss": 0.8814, "step": 34270 }, { "epoch": 0.21900514930426893, "grad_norm": 0.7238608598709106, "learning_rate": 9.707281717082222e-05, "loss": 0.7132, "step": 34280 }, { "epoch": 0.21906903645400763, "grad_norm": 0.736379861831665, "learning_rate": 9.707112529696594e-05, "loss": 0.8236, "step": 34290 }, { "epoch": 0.21913292360374634, "grad_norm": 0.8833523988723755, "learning_rate": 9.706943294906268e-05, "loss": 1.0377, "step": 34300 }, { "epoch": 0.21919681075348504, "grad_norm": 0.7226671576499939, "learning_rate": 9.706774012712953e-05, "loss": 0.9242, "step": 34310 }, { "epoch": 0.21926069790322375, "grad_norm": 1.3238605260849, "learning_rate": 9.706604683118353e-05, "loss": 0.9551, "step": 34320 }, { "epoch": 0.21932458505296246, "grad_norm": 2.324223279953003, "learning_rate": 9.706435306124169e-05, "loss": 1.0707, "step": 34330 }, { "epoch": 0.21938847220270116, "grad_norm": 0.6457687020301819, "learning_rate": 9.70626588173211e-05, "loss": 0.8856, "step": 34340 }, { "epoch": 0.21945235935243984, "grad_norm": 0.7554599642753601, "learning_rate": 9.706096409943883e-05, "loss": 0.8271, "step": 34350 }, { "epoch": 0.21951624650217855, "grad_norm": 1.154531478881836, "learning_rate": 9.705926890761195e-05, "loss": 1.2138, "step": 34360 }, { "epoch": 0.21958013365191725, "grad_norm": 0.8493779897689819, "learning_rate": 9.705757324185751e-05, "loss": 0.7191, "step": 34370 }, { "epoch": 0.21964402080165596, "grad_norm": 1.1541070938110352, "learning_rate": 9.705587710219259e-05, "loss": 0.9184, "step": 34380 }, { "epoch": 0.21970790795139467, "grad_norm": 2.6271910667419434, "learning_rate": 9.705418048863429e-05, "loss": 1.0036, "step": 34390 }, { "epoch": 0.21977179510113337, "grad_norm": 0.7804545164108276, "learning_rate": 9.705248340119968e-05, "loss": 1.1445, "step": 34400 }, { "epoch": 0.21983568225087205, "grad_norm": 0.515604555606842, "learning_rate": 9.705078583990586e-05, "loss": 0.729, "step": 34410 }, { "epoch": 0.21989956940061076, "grad_norm": 0.9133629202842712, "learning_rate": 9.704908780476991e-05, "loss": 1.0537, "step": 34420 }, { "epoch": 0.21996345655034946, "grad_norm": 1.274163842201233, "learning_rate": 9.704738929580896e-05, "loss": 1.0591, "step": 34430 }, { "epoch": 0.22002734370008817, "grad_norm": 0.45899906754493713, "learning_rate": 9.704569031304009e-05, "loss": 0.6701, "step": 34440 }, { "epoch": 0.22009123084982687, "grad_norm": 0.942436933517456, "learning_rate": 9.704399085648041e-05, "loss": 0.9153, "step": 34450 }, { "epoch": 0.22015511799956558, "grad_norm": 1.0042204856872559, "learning_rate": 9.704229092614705e-05, "loss": 0.8758, "step": 34460 }, { "epoch": 0.22021900514930426, "grad_norm": 0.4676646292209625, "learning_rate": 9.704059052205712e-05, "loss": 0.7552, "step": 34470 }, { "epoch": 0.22028289229904296, "grad_norm": 0.8477068543434143, "learning_rate": 9.703888964422775e-05, "loss": 0.8348, "step": 34480 }, { "epoch": 0.22034677944878167, "grad_norm": 1.006347417831421, "learning_rate": 9.703718829267607e-05, "loss": 0.9339, "step": 34490 }, { "epoch": 0.22041066659852038, "grad_norm": 0.8507176637649536, "learning_rate": 9.703548646741923e-05, "loss": 0.9948, "step": 34500 }, { "epoch": 0.22047455374825908, "grad_norm": 0.9493306279182434, "learning_rate": 9.703378416847431e-05, "loss": 0.7232, "step": 34510 }, { "epoch": 0.2205384408979978, "grad_norm": 0.7349863052368164, "learning_rate": 9.703208139585851e-05, "loss": 0.7541, "step": 34520 }, { "epoch": 0.22060232804773647, "grad_norm": 0.8959886431694031, "learning_rate": 9.703037814958898e-05, "loss": 0.9639, "step": 34530 }, { "epoch": 0.22066621519747517, "grad_norm": 0.6771888136863708, "learning_rate": 9.702867442968283e-05, "loss": 0.9092, "step": 34540 }, { "epoch": 0.22073010234721388, "grad_norm": 0.784125804901123, "learning_rate": 9.702697023615726e-05, "loss": 0.8621, "step": 34550 }, { "epoch": 0.2207939894969526, "grad_norm": 1.009945273399353, "learning_rate": 9.70252655690294e-05, "loss": 0.7527, "step": 34560 }, { "epoch": 0.2208578766466913, "grad_norm": 1.0403534173965454, "learning_rate": 9.702356042831643e-05, "loss": 1.0779, "step": 34570 }, { "epoch": 0.22092176379643, "grad_norm": 0.9144579172134399, "learning_rate": 9.702185481403555e-05, "loss": 0.9942, "step": 34580 }, { "epoch": 0.22098565094616868, "grad_norm": 1.012250542640686, "learning_rate": 9.702014872620388e-05, "loss": 0.8412, "step": 34590 }, { "epoch": 0.22104953809590738, "grad_norm": 1.3977776765823364, "learning_rate": 9.701844216483866e-05, "loss": 0.9844, "step": 34600 }, { "epoch": 0.2211134252456461, "grad_norm": 0.8186967372894287, "learning_rate": 9.701673512995704e-05, "loss": 0.8303, "step": 34610 }, { "epoch": 0.2211773123953848, "grad_norm": 0.7828638553619385, "learning_rate": 9.701502762157623e-05, "loss": 0.9695, "step": 34620 }, { "epoch": 0.2212411995451235, "grad_norm": 0.9973053336143494, "learning_rate": 9.701331963971341e-05, "loss": 0.8977, "step": 34630 }, { "epoch": 0.2213050866948622, "grad_norm": 1.1445131301879883, "learning_rate": 9.70116111843858e-05, "loss": 0.8871, "step": 34640 }, { "epoch": 0.22136897384460089, "grad_norm": 0.8758741617202759, "learning_rate": 9.700990225561058e-05, "loss": 0.81, "step": 34650 }, { "epoch": 0.2214328609943396, "grad_norm": 0.49622881412506104, "learning_rate": 9.700819285340497e-05, "loss": 0.8899, "step": 34660 }, { "epoch": 0.2214967481440783, "grad_norm": 0.9389495253562927, "learning_rate": 9.700648297778621e-05, "loss": 0.867, "step": 34670 }, { "epoch": 0.221560635293817, "grad_norm": 2.2437360286712646, "learning_rate": 9.700477262877149e-05, "loss": 1.0428, "step": 34680 }, { "epoch": 0.2216245224435557, "grad_norm": 1.3925631046295166, "learning_rate": 9.700306180637804e-05, "loss": 0.91, "step": 34690 }, { "epoch": 0.22168840959329442, "grad_norm": 1.310964822769165, "learning_rate": 9.700135051062312e-05, "loss": 0.8114, "step": 34700 }, { "epoch": 0.2217522967430331, "grad_norm": 1.04167902469635, "learning_rate": 9.699963874152392e-05, "loss": 0.7845, "step": 34710 }, { "epoch": 0.2218161838927718, "grad_norm": 0.9633674621582031, "learning_rate": 9.699792649909768e-05, "loss": 0.6929, "step": 34720 }, { "epoch": 0.2218800710425105, "grad_norm": 0.6973922252655029, "learning_rate": 9.699621378336168e-05, "loss": 0.7923, "step": 34730 }, { "epoch": 0.2219439581922492, "grad_norm": 0.6631523370742798, "learning_rate": 9.699450059433314e-05, "loss": 0.8096, "step": 34740 }, { "epoch": 0.22200784534198792, "grad_norm": 1.064477801322937, "learning_rate": 9.699278693202933e-05, "loss": 0.9907, "step": 34750 }, { "epoch": 0.22207173249172663, "grad_norm": 1.0626312494277954, "learning_rate": 9.699107279646751e-05, "loss": 0.7736, "step": 34760 }, { "epoch": 0.2221356196414653, "grad_norm": 0.5820396542549133, "learning_rate": 9.698935818766493e-05, "loss": 0.7869, "step": 34770 }, { "epoch": 0.222199506791204, "grad_norm": 0.7940320372581482, "learning_rate": 9.698764310563885e-05, "loss": 0.8672, "step": 34780 }, { "epoch": 0.22226339394094272, "grad_norm": 0.9088238477706909, "learning_rate": 9.698592755040657e-05, "loss": 0.8374, "step": 34790 }, { "epoch": 0.22232728109068142, "grad_norm": 1.1797140836715698, "learning_rate": 9.698421152198533e-05, "loss": 1.074, "step": 34800 }, { "epoch": 0.22239116824042013, "grad_norm": 0.7393913269042969, "learning_rate": 9.698249502039243e-05, "loss": 0.7102, "step": 34810 }, { "epoch": 0.22245505539015883, "grad_norm": 1.2401602268218994, "learning_rate": 9.698077804564519e-05, "loss": 1.0855, "step": 34820 }, { "epoch": 0.2225189425398975, "grad_norm": 0.7187434434890747, "learning_rate": 9.697906059776085e-05, "loss": 1.02, "step": 34830 }, { "epoch": 0.22258282968963622, "grad_norm": 0.719468355178833, "learning_rate": 9.697734267675674e-05, "loss": 0.932, "step": 34840 }, { "epoch": 0.22264671683937493, "grad_norm": 0.8819088935852051, "learning_rate": 9.697562428265012e-05, "loss": 0.8238, "step": 34850 }, { "epoch": 0.22271060398911363, "grad_norm": 0.49491390585899353, "learning_rate": 9.697390541545834e-05, "loss": 0.9514, "step": 34860 }, { "epoch": 0.22277449113885234, "grad_norm": 1.5479438304901123, "learning_rate": 9.697218607519871e-05, "loss": 0.9275, "step": 34870 }, { "epoch": 0.22283837828859104, "grad_norm": 0.763923704624176, "learning_rate": 9.697046626188852e-05, "loss": 0.8258, "step": 34880 }, { "epoch": 0.22290226543832972, "grad_norm": 1.1767523288726807, "learning_rate": 9.696874597554509e-05, "loss": 0.7937, "step": 34890 }, { "epoch": 0.22296615258806843, "grad_norm": 1.3667820692062378, "learning_rate": 9.696702521618576e-05, "loss": 0.8892, "step": 34900 }, { "epoch": 0.22303003973780713, "grad_norm": 0.7159459590911865, "learning_rate": 9.696530398382786e-05, "loss": 0.9855, "step": 34910 }, { "epoch": 0.22309392688754584, "grad_norm": 0.6876511573791504, "learning_rate": 9.69635822784887e-05, "loss": 1.0461, "step": 34920 }, { "epoch": 0.22315781403728455, "grad_norm": 0.6138442158699036, "learning_rate": 9.696186010018566e-05, "loss": 0.8192, "step": 34930 }, { "epoch": 0.22322170118702325, "grad_norm": 0.6763925552368164, "learning_rate": 9.696013744893604e-05, "loss": 0.8746, "step": 34940 }, { "epoch": 0.22328558833676193, "grad_norm": 0.807370126247406, "learning_rate": 9.695841432475723e-05, "loss": 0.7289, "step": 34950 }, { "epoch": 0.22334947548650064, "grad_norm": 0.7103719711303711, "learning_rate": 9.695669072766655e-05, "loss": 0.8883, "step": 34960 }, { "epoch": 0.22341336263623934, "grad_norm": 0.6593259572982788, "learning_rate": 9.695496665768138e-05, "loss": 0.845, "step": 34970 }, { "epoch": 0.22347724978597805, "grad_norm": 0.7919392585754395, "learning_rate": 9.695324211481907e-05, "loss": 0.7294, "step": 34980 }, { "epoch": 0.22354113693571676, "grad_norm": 1.0960744619369507, "learning_rate": 9.695151709909698e-05, "loss": 0.8352, "step": 34990 }, { "epoch": 0.22360502408545546, "grad_norm": 0.9134578704833984, "learning_rate": 9.69497916105325e-05, "loss": 0.8196, "step": 35000 }, { "epoch": 0.22366891123519414, "grad_norm": 0.7842540144920349, "learning_rate": 9.6948065649143e-05, "loss": 1.0348, "step": 35010 }, { "epoch": 0.22373279838493285, "grad_norm": 0.6312137842178345, "learning_rate": 9.694633921494588e-05, "loss": 0.62, "step": 35020 }, { "epoch": 0.22379668553467155, "grad_norm": 0.6972392797470093, "learning_rate": 9.69446123079585e-05, "loss": 0.8615, "step": 35030 }, { "epoch": 0.22386057268441026, "grad_norm": 0.7970590591430664, "learning_rate": 9.694288492819825e-05, "loss": 0.925, "step": 35040 }, { "epoch": 0.22392445983414896, "grad_norm": 1.2083357572555542, "learning_rate": 9.694115707568254e-05, "loss": 0.7092, "step": 35050 }, { "epoch": 0.22398834698388767, "grad_norm": 0.585113525390625, "learning_rate": 9.693942875042878e-05, "loss": 0.9887, "step": 35060 }, { "epoch": 0.22405223413362635, "grad_norm": 1.90079665184021, "learning_rate": 9.693769995245437e-05, "loss": 0.9447, "step": 35070 }, { "epoch": 0.22411612128336506, "grad_norm": 0.8119843602180481, "learning_rate": 9.69359706817767e-05, "loss": 0.7767, "step": 35080 }, { "epoch": 0.22418000843310376, "grad_norm": 0.5535334348678589, "learning_rate": 9.69342409384132e-05, "loss": 1.0211, "step": 35090 }, { "epoch": 0.22424389558284247, "grad_norm": 0.5760706663131714, "learning_rate": 9.69325107223813e-05, "loss": 0.7181, "step": 35100 }, { "epoch": 0.22430778273258117, "grad_norm": 0.7716420292854309, "learning_rate": 9.69307800336984e-05, "loss": 0.9217, "step": 35110 }, { "epoch": 0.22437166988231988, "grad_norm": 1.1675033569335938, "learning_rate": 9.692904887238195e-05, "loss": 1.1387, "step": 35120 }, { "epoch": 0.22443555703205856, "grad_norm": 0.8765130043029785, "learning_rate": 9.692731723844939e-05, "loss": 0.7809, "step": 35130 }, { "epoch": 0.22449944418179726, "grad_norm": 2.621401786804199, "learning_rate": 9.692558513191812e-05, "loss": 0.9963, "step": 35140 }, { "epoch": 0.22456333133153597, "grad_norm": 1.051527976989746, "learning_rate": 9.692385255280564e-05, "loss": 1.0771, "step": 35150 }, { "epoch": 0.22462721848127468, "grad_norm": 0.8392159342765808, "learning_rate": 9.692211950112936e-05, "loss": 0.9217, "step": 35160 }, { "epoch": 0.22469110563101338, "grad_norm": 0.7495473027229309, "learning_rate": 9.692038597690674e-05, "loss": 1.0467, "step": 35170 }, { "epoch": 0.2247549927807521, "grad_norm": 0.6959127187728882, "learning_rate": 9.691865198015524e-05, "loss": 1.0204, "step": 35180 }, { "epoch": 0.2248188799304908, "grad_norm": 1.0118756294250488, "learning_rate": 9.691691751089234e-05, "loss": 0.9488, "step": 35190 }, { "epoch": 0.22488276708022947, "grad_norm": 1.1415350437164307, "learning_rate": 9.691518256913547e-05, "loss": 1.2746, "step": 35200 }, { "epoch": 0.22494665422996818, "grad_norm": 0.9474114179611206, "learning_rate": 9.691344715490213e-05, "loss": 0.9522, "step": 35210 }, { "epoch": 0.22501054137970689, "grad_norm": 1.113400936126709, "learning_rate": 9.69117112682098e-05, "loss": 0.6583, "step": 35220 }, { "epoch": 0.2250744285294456, "grad_norm": 1.0649466514587402, "learning_rate": 9.690997490907594e-05, "loss": 0.9882, "step": 35230 }, { "epoch": 0.2251383156791843, "grad_norm": 0.7435470819473267, "learning_rate": 9.690823807751807e-05, "loss": 0.7211, "step": 35240 }, { "epoch": 0.225202202828923, "grad_norm": 1.1411978006362915, "learning_rate": 9.690650077355364e-05, "loss": 0.8664, "step": 35250 }, { "epoch": 0.22526608997866168, "grad_norm": 0.888380765914917, "learning_rate": 9.690476299720018e-05, "loss": 0.9631, "step": 35260 }, { "epoch": 0.2253299771284004, "grad_norm": 0.8436518907546997, "learning_rate": 9.690302474847516e-05, "loss": 1.0347, "step": 35270 }, { "epoch": 0.2253938642781391, "grad_norm": 0.8739194869995117, "learning_rate": 9.690128602739613e-05, "loss": 1.0825, "step": 35280 }, { "epoch": 0.2254577514278778, "grad_norm": 0.9203752875328064, "learning_rate": 9.689954683398057e-05, "loss": 0.8836, "step": 35290 }, { "epoch": 0.2255216385776165, "grad_norm": 0.7080454230308533, "learning_rate": 9.6897807168246e-05, "loss": 0.7775, "step": 35300 }, { "epoch": 0.2255855257273552, "grad_norm": 0.8330931067466736, "learning_rate": 9.689606703020993e-05, "loss": 0.948, "step": 35310 }, { "epoch": 0.2256494128770939, "grad_norm": 0.9416504502296448, "learning_rate": 9.689432641988988e-05, "loss": 0.8721, "step": 35320 }, { "epoch": 0.2257133000268326, "grad_norm": 0.7803798317909241, "learning_rate": 9.689258533730341e-05, "loss": 0.8416, "step": 35330 }, { "epoch": 0.2257771871765713, "grad_norm": 0.6242881417274475, "learning_rate": 9.689084378246804e-05, "loss": 0.7793, "step": 35340 }, { "epoch": 0.22584107432631, "grad_norm": 0.8477808833122253, "learning_rate": 9.68891017554013e-05, "loss": 0.84, "step": 35350 }, { "epoch": 0.22590496147604872, "grad_norm": 0.8038986325263977, "learning_rate": 9.688735925612075e-05, "loss": 0.7162, "step": 35360 }, { "epoch": 0.22596884862578742, "grad_norm": 0.6531451940536499, "learning_rate": 9.688561628464391e-05, "loss": 0.8058, "step": 35370 }, { "epoch": 0.2260327357755261, "grad_norm": 0.8681033849716187, "learning_rate": 9.688387284098837e-05, "loss": 1.0791, "step": 35380 }, { "epoch": 0.2260966229252648, "grad_norm": 1.3325775861740112, "learning_rate": 9.688212892517167e-05, "loss": 0.6875, "step": 35390 }, { "epoch": 0.2261605100750035, "grad_norm": 0.5260213017463684, "learning_rate": 9.688038453721137e-05, "loss": 0.8236, "step": 35400 }, { "epoch": 0.22622439722474222, "grad_norm": 1.0699787139892578, "learning_rate": 9.687863967712503e-05, "loss": 0.8972, "step": 35410 }, { "epoch": 0.22628828437448092, "grad_norm": 0.6690873503684998, "learning_rate": 9.687689434493025e-05, "loss": 0.8042, "step": 35420 }, { "epoch": 0.22635217152421963, "grad_norm": 0.6598352789878845, "learning_rate": 9.687514854064458e-05, "loss": 1.0096, "step": 35430 }, { "epoch": 0.2264160586739583, "grad_norm": 0.839152455329895, "learning_rate": 9.68735769131643e-05, "loss": 1.0554, "step": 35440 }, { "epoch": 0.22647994582369702, "grad_norm": 1.03608238697052, "learning_rate": 9.68718302119544e-05, "loss": 1.0627, "step": 35450 }, { "epoch": 0.22654383297343572, "grad_norm": 0.8941081762313843, "learning_rate": 9.687008303870461e-05, "loss": 0.663, "step": 35460 }, { "epoch": 0.22660772012317443, "grad_norm": 0.5950977802276611, "learning_rate": 9.686833539343256e-05, "loss": 0.9636, "step": 35470 }, { "epoch": 0.22667160727291313, "grad_norm": 0.5966373085975647, "learning_rate": 9.686658727615581e-05, "loss": 0.8674, "step": 35480 }, { "epoch": 0.22673549442265184, "grad_norm": 0.8043856620788574, "learning_rate": 9.686483868689198e-05, "loss": 0.9328, "step": 35490 }, { "epoch": 0.22679938157239052, "grad_norm": 1.025963306427002, "learning_rate": 9.686308962565869e-05, "loss": 0.7796, "step": 35500 }, { "epoch": 0.22686326872212922, "grad_norm": 0.4956408143043518, "learning_rate": 9.686134009247354e-05, "loss": 0.7355, "step": 35510 }, { "epoch": 0.22692715587186793, "grad_norm": 0.9197072386741638, "learning_rate": 9.685959008735414e-05, "loss": 0.7268, "step": 35520 }, { "epoch": 0.22699104302160664, "grad_norm": 0.9792423248291016, "learning_rate": 9.685783961031814e-05, "loss": 0.8215, "step": 35530 }, { "epoch": 0.22705493017134534, "grad_norm": 1.209794282913208, "learning_rate": 9.685608866138316e-05, "loss": 0.7836, "step": 35540 }, { "epoch": 0.22711881732108405, "grad_norm": 0.8678392767906189, "learning_rate": 9.685433724056683e-05, "loss": 0.862, "step": 35550 }, { "epoch": 0.22718270447082273, "grad_norm": 1.0202693939208984, "learning_rate": 9.685258534788679e-05, "loss": 0.8804, "step": 35560 }, { "epoch": 0.22724659162056143, "grad_norm": 0.8867144584655762, "learning_rate": 9.685083298336068e-05, "loss": 0.8365, "step": 35570 }, { "epoch": 0.22731047877030014, "grad_norm": 0.7046698927879333, "learning_rate": 9.684908014700616e-05, "loss": 1.1958, "step": 35580 }, { "epoch": 0.22737436592003885, "grad_norm": 0.7776816487312317, "learning_rate": 9.684732683884085e-05, "loss": 0.8462, "step": 35590 }, { "epoch": 0.22743825306977755, "grad_norm": 0.9116525650024414, "learning_rate": 9.684557305888245e-05, "loss": 0.744, "step": 35600 }, { "epoch": 0.22750214021951626, "grad_norm": 1.0605876445770264, "learning_rate": 9.684381880714858e-05, "loss": 0.9077, "step": 35610 }, { "epoch": 0.22756602736925494, "grad_norm": 1.1371787786483765, "learning_rate": 9.684206408365695e-05, "loss": 1.1714, "step": 35620 }, { "epoch": 0.22762991451899364, "grad_norm": 1.0647424459457397, "learning_rate": 9.684030888842521e-05, "loss": 0.8973, "step": 35630 }, { "epoch": 0.22769380166873235, "grad_norm": 1.0106362104415894, "learning_rate": 9.683855322147103e-05, "loss": 0.7832, "step": 35640 }, { "epoch": 0.22775768881847105, "grad_norm": 1.509164571762085, "learning_rate": 9.68367970828121e-05, "loss": 1.0245, "step": 35650 }, { "epoch": 0.22782157596820976, "grad_norm": 0.6998576521873474, "learning_rate": 9.68350404724661e-05, "loss": 0.9198, "step": 35660 }, { "epoch": 0.22788546311794847, "grad_norm": 0.7818799614906311, "learning_rate": 9.683328339045073e-05, "loss": 0.9013, "step": 35670 }, { "epoch": 0.22794935026768715, "grad_norm": 0.9192219972610474, "learning_rate": 9.683152583678367e-05, "loss": 0.8992, "step": 35680 }, { "epoch": 0.22801323741742585, "grad_norm": 0.7720584273338318, "learning_rate": 9.682976781148265e-05, "loss": 1.0002, "step": 35690 }, { "epoch": 0.22807712456716456, "grad_norm": 1.023474097251892, "learning_rate": 9.682800931456534e-05, "loss": 0.8645, "step": 35700 }, { "epoch": 0.22814101171690326, "grad_norm": 0.7522472143173218, "learning_rate": 9.682625034604946e-05, "loss": 0.9877, "step": 35710 }, { "epoch": 0.22820489886664197, "grad_norm": 0.7929263710975647, "learning_rate": 9.682449090595274e-05, "loss": 0.9654, "step": 35720 }, { "epoch": 0.22826878601638068, "grad_norm": 0.8946601152420044, "learning_rate": 9.682273099429288e-05, "loss": 1.1321, "step": 35730 }, { "epoch": 0.22833267316611935, "grad_norm": 0.884692907333374, "learning_rate": 9.682097061108761e-05, "loss": 0.7554, "step": 35740 }, { "epoch": 0.22839656031585806, "grad_norm": 0.6156822443008423, "learning_rate": 9.681920975635467e-05, "loss": 0.8625, "step": 35750 }, { "epoch": 0.22846044746559677, "grad_norm": 0.6044219732284546, "learning_rate": 9.681744843011177e-05, "loss": 0.758, "step": 35760 }, { "epoch": 0.22852433461533547, "grad_norm": 0.835270345211029, "learning_rate": 9.681568663237668e-05, "loss": 0.8325, "step": 35770 }, { "epoch": 0.22858822176507418, "grad_norm": 0.9461874961853027, "learning_rate": 9.68139243631671e-05, "loss": 0.8916, "step": 35780 }, { "epoch": 0.22865210891481288, "grad_norm": 1.3007314205169678, "learning_rate": 9.681216162250082e-05, "loss": 1.1537, "step": 35790 }, { "epoch": 0.22871599606455156, "grad_norm": 1.0743658542633057, "learning_rate": 9.681039841039557e-05, "loss": 0.7409, "step": 35800 }, { "epoch": 0.22877988321429027, "grad_norm": 2.3741660118103027, "learning_rate": 9.680863472686911e-05, "loss": 0.8093, "step": 35810 }, { "epoch": 0.22884377036402898, "grad_norm": 0.9726037383079529, "learning_rate": 9.68068705719392e-05, "loss": 1.1677, "step": 35820 }, { "epoch": 0.22890765751376768, "grad_norm": 0.7922230958938599, "learning_rate": 9.680510594562362e-05, "loss": 0.9944, "step": 35830 }, { "epoch": 0.2289715446635064, "grad_norm": 0.8513554930686951, "learning_rate": 9.680334084794011e-05, "loss": 0.8125, "step": 35840 }, { "epoch": 0.2290354318132451, "grad_norm": 1.046993374824524, "learning_rate": 9.680157527890649e-05, "loss": 0.9013, "step": 35850 }, { "epoch": 0.22909931896298377, "grad_norm": 0.6349254250526428, "learning_rate": 9.679980923854051e-05, "loss": 0.903, "step": 35860 }, { "epoch": 0.22916320611272248, "grad_norm": 0.4237905740737915, "learning_rate": 9.679804272685995e-05, "loss": 0.7127, "step": 35870 }, { "epoch": 0.22922709326246118, "grad_norm": 0.7686927914619446, "learning_rate": 9.679627574388264e-05, "loss": 0.8212, "step": 35880 }, { "epoch": 0.2292909804121999, "grad_norm": 1.274295687675476, "learning_rate": 9.679450828962633e-05, "loss": 0.7458, "step": 35890 }, { "epoch": 0.2293548675619386, "grad_norm": 0.8231094479560852, "learning_rate": 9.679274036410884e-05, "loss": 0.851, "step": 35900 }, { "epoch": 0.2294187547116773, "grad_norm": 0.5917838215827942, "learning_rate": 9.679097196734797e-05, "loss": 1.0595, "step": 35910 }, { "epoch": 0.22948264186141598, "grad_norm": 0.9595643877983093, "learning_rate": 9.678920309936155e-05, "loss": 0.8143, "step": 35920 }, { "epoch": 0.2295465290111547, "grad_norm": 0.9315831661224365, "learning_rate": 9.678743376016736e-05, "loss": 0.8278, "step": 35930 }, { "epoch": 0.2296104161608934, "grad_norm": 0.8110885620117188, "learning_rate": 9.678566394978323e-05, "loss": 0.9624, "step": 35940 }, { "epoch": 0.2296743033106321, "grad_norm": 0.8156410455703735, "learning_rate": 9.6783893668227e-05, "loss": 0.7916, "step": 35950 }, { "epoch": 0.2297381904603708, "grad_norm": 0.7576091885566711, "learning_rate": 9.678212291551649e-05, "loss": 1.2787, "step": 35960 }, { "epoch": 0.2298020776101095, "grad_norm": 0.6447461843490601, "learning_rate": 9.678035169166953e-05, "loss": 0.8515, "step": 35970 }, { "epoch": 0.2298659647598482, "grad_norm": 0.6504492163658142, "learning_rate": 9.677857999670394e-05, "loss": 1.0268, "step": 35980 }, { "epoch": 0.2299298519095869, "grad_norm": 1.1412609815597534, "learning_rate": 9.677680783063761e-05, "loss": 1.1179, "step": 35990 }, { "epoch": 0.2299937390593256, "grad_norm": 0.7995015382766724, "learning_rate": 9.677503519348834e-05, "loss": 0.9593, "step": 36000 }, { "epoch": 0.2300576262090643, "grad_norm": 1.159679889678955, "learning_rate": 9.677326208527399e-05, "loss": 0.9146, "step": 36010 }, { "epoch": 0.23012151335880301, "grad_norm": 0.721098780632019, "learning_rate": 9.677148850601243e-05, "loss": 1.0502, "step": 36020 }, { "epoch": 0.23018540050854172, "grad_norm": 0.4577333927154541, "learning_rate": 9.676971445572152e-05, "loss": 0.9092, "step": 36030 }, { "epoch": 0.23024928765828043, "grad_norm": 0.8602834343910217, "learning_rate": 9.676793993441913e-05, "loss": 0.7162, "step": 36040 }, { "epoch": 0.2303131748080191, "grad_norm": 0.8518884181976318, "learning_rate": 9.676616494212314e-05, "loss": 0.9275, "step": 36050 }, { "epoch": 0.2303770619577578, "grad_norm": 1.1824616193771362, "learning_rate": 9.676438947885138e-05, "loss": 0.6779, "step": 36060 }, { "epoch": 0.23044094910749652, "grad_norm": 1.0840277671813965, "learning_rate": 9.676261354462177e-05, "loss": 0.8189, "step": 36070 }, { "epoch": 0.23050483625723522, "grad_norm": 0.7747464776039124, "learning_rate": 9.67608371394522e-05, "loss": 1.0298, "step": 36080 }, { "epoch": 0.23056872340697393, "grad_norm": 0.7132411003112793, "learning_rate": 9.675906026336053e-05, "loss": 0.823, "step": 36090 }, { "epoch": 0.23063261055671264, "grad_norm": 1.1659483909606934, "learning_rate": 9.675728291636467e-05, "loss": 0.8323, "step": 36100 }, { "epoch": 0.23069649770645131, "grad_norm": 0.7727037072181702, "learning_rate": 9.675550509848253e-05, "loss": 0.8996, "step": 36110 }, { "epoch": 0.23076038485619002, "grad_norm": 0.511026918888092, "learning_rate": 9.6753726809732e-05, "loss": 0.9119, "step": 36120 }, { "epoch": 0.23082427200592873, "grad_norm": 1.2003488540649414, "learning_rate": 9.6751948050131e-05, "loss": 0.9831, "step": 36130 }, { "epoch": 0.23088815915566743, "grad_norm": 0.9001702070236206, "learning_rate": 9.675016881969743e-05, "loss": 1.0382, "step": 36140 }, { "epoch": 0.23095204630540614, "grad_norm": 0.8864395618438721, "learning_rate": 9.674838911844923e-05, "loss": 0.8401, "step": 36150 }, { "epoch": 0.23101593345514485, "grad_norm": 0.8258879780769348, "learning_rate": 9.674660894640429e-05, "loss": 0.9833, "step": 36160 }, { "epoch": 0.23107982060488352, "grad_norm": 0.8250300884246826, "learning_rate": 9.674482830358056e-05, "loss": 0.8936, "step": 36170 }, { "epoch": 0.23114370775462223, "grad_norm": 0.9559470415115356, "learning_rate": 9.674304718999598e-05, "loss": 1.2631, "step": 36180 }, { "epoch": 0.23120759490436094, "grad_norm": 2.168290853500366, "learning_rate": 9.674126560566846e-05, "loss": 0.9825, "step": 36190 }, { "epoch": 0.23127148205409964, "grad_norm": 0.775067150592804, "learning_rate": 9.673948355061597e-05, "loss": 0.8517, "step": 36200 }, { "epoch": 0.23133536920383835, "grad_norm": 1.186092495918274, "learning_rate": 9.673770102485644e-05, "loss": 0.8136, "step": 36210 }, { "epoch": 0.23139925635357705, "grad_norm": 1.8314769268035889, "learning_rate": 9.673591802840782e-05, "loss": 1.0137, "step": 36220 }, { "epoch": 0.23146314350331573, "grad_norm": 0.9208132028579712, "learning_rate": 9.673413456128808e-05, "loss": 0.8576, "step": 36230 }, { "epoch": 0.23152703065305444, "grad_norm": 0.8547564148902893, "learning_rate": 9.673235062351517e-05, "loss": 1.1041, "step": 36240 }, { "epoch": 0.23159091780279314, "grad_norm": 0.6247135400772095, "learning_rate": 9.673056621510707e-05, "loss": 0.8918, "step": 36250 }, { "epoch": 0.23165480495253185, "grad_norm": 1.1294952630996704, "learning_rate": 9.672878133608174e-05, "loss": 1.1331, "step": 36260 }, { "epoch": 0.23171869210227056, "grad_norm": 1.048307180404663, "learning_rate": 9.672699598645716e-05, "loss": 1.0438, "step": 36270 }, { "epoch": 0.23178257925200926, "grad_norm": 0.8274295926094055, "learning_rate": 9.672521016625128e-05, "loss": 1.0533, "step": 36280 }, { "epoch": 0.23184646640174794, "grad_norm": 0.6973618268966675, "learning_rate": 9.672342387548215e-05, "loss": 1.0061, "step": 36290 }, { "epoch": 0.23191035355148665, "grad_norm": 0.6850184798240662, "learning_rate": 9.672163711416768e-05, "loss": 0.9715, "step": 36300 }, { "epoch": 0.23197424070122535, "grad_norm": 0.9231820702552795, "learning_rate": 9.671984988232593e-05, "loss": 0.8866, "step": 36310 }, { "epoch": 0.23203812785096406, "grad_norm": 1.0431686639785767, "learning_rate": 9.671806217997485e-05, "loss": 0.8008, "step": 36320 }, { "epoch": 0.23210201500070277, "grad_norm": 0.5410827398300171, "learning_rate": 9.67162740071325e-05, "loss": 0.9067, "step": 36330 }, { "epoch": 0.23216590215044147, "grad_norm": 0.6281831860542297, "learning_rate": 9.671448536381683e-05, "loss": 0.9372, "step": 36340 }, { "epoch": 0.23222978930018015, "grad_norm": 0.8428774476051331, "learning_rate": 9.671269625004589e-05, "loss": 0.9881, "step": 36350 }, { "epoch": 0.23229367644991886, "grad_norm": 0.5876288414001465, "learning_rate": 9.671090666583769e-05, "loss": 0.7809, "step": 36360 }, { "epoch": 0.23235756359965756, "grad_norm": 0.904808521270752, "learning_rate": 9.670911661121023e-05, "loss": 0.9961, "step": 36370 }, { "epoch": 0.23242145074939627, "grad_norm": 0.6523864269256592, "learning_rate": 9.670732608618157e-05, "loss": 0.9395, "step": 36380 }, { "epoch": 0.23248533789913498, "grad_norm": 0.8728864192962646, "learning_rate": 9.670553509076972e-05, "loss": 0.7788, "step": 36390 }, { "epoch": 0.23254922504887368, "grad_norm": 0.7656633257865906, "learning_rate": 9.670374362499274e-05, "loss": 0.9066, "step": 36400 }, { "epoch": 0.23261311219861236, "grad_norm": 0.7706246972084045, "learning_rate": 9.670195168886866e-05, "loss": 1.1319, "step": 36410 }, { "epoch": 0.23267699934835107, "grad_norm": 0.6671524047851562, "learning_rate": 9.670015928241551e-05, "loss": 1.1416, "step": 36420 }, { "epoch": 0.23274088649808977, "grad_norm": 0.8740767240524292, "learning_rate": 9.669836640565136e-05, "loss": 0.9599, "step": 36430 }, { "epoch": 0.23280477364782848, "grad_norm": 0.8602978587150574, "learning_rate": 9.669657305859425e-05, "loss": 0.7909, "step": 36440 }, { "epoch": 0.23286866079756718, "grad_norm": 0.7806286215782166, "learning_rate": 9.669477924126226e-05, "loss": 0.7961, "step": 36450 }, { "epoch": 0.2329325479473059, "grad_norm": 0.5049117803573608, "learning_rate": 9.669298495367345e-05, "loss": 0.742, "step": 36460 }, { "epoch": 0.23299643509704457, "grad_norm": 0.719462513923645, "learning_rate": 9.669119019584589e-05, "loss": 0.8667, "step": 36470 }, { "epoch": 0.23306032224678327, "grad_norm": 0.8203737735748291, "learning_rate": 9.668939496779763e-05, "loss": 1.0432, "step": 36480 }, { "epoch": 0.23312420939652198, "grad_norm": 0.7739396691322327, "learning_rate": 9.668759926954679e-05, "loss": 0.7717, "step": 36490 }, { "epoch": 0.2331880965462607, "grad_norm": 0.5877523422241211, "learning_rate": 9.668580310111142e-05, "loss": 0.8282, "step": 36500 }, { "epoch": 0.2332519836959994, "grad_norm": 0.7117794156074524, "learning_rate": 9.668400646250963e-05, "loss": 0.8262, "step": 36510 }, { "epoch": 0.2333158708457381, "grad_norm": 0.6126281023025513, "learning_rate": 9.668220935375953e-05, "loss": 0.7877, "step": 36520 }, { "epoch": 0.23337975799547678, "grad_norm": 1.1325799226760864, "learning_rate": 9.668041177487917e-05, "loss": 1.2053, "step": 36530 }, { "epoch": 0.23344364514521548, "grad_norm": 0.8727070689201355, "learning_rate": 9.667861372588669e-05, "loss": 0.9724, "step": 36540 }, { "epoch": 0.2335075322949542, "grad_norm": 0.87961345911026, "learning_rate": 9.667681520680017e-05, "loss": 0.8785, "step": 36550 }, { "epoch": 0.2335714194446929, "grad_norm": 0.9073530435562134, "learning_rate": 9.667501621763777e-05, "loss": 0.7719, "step": 36560 }, { "epoch": 0.2336353065944316, "grad_norm": 0.7770230770111084, "learning_rate": 9.667321675841754e-05, "loss": 0.9077, "step": 36570 }, { "epoch": 0.2336991937441703, "grad_norm": 1.0296423435211182, "learning_rate": 9.667141682915765e-05, "loss": 0.867, "step": 36580 }, { "epoch": 0.233763080893909, "grad_norm": 0.7076445817947388, "learning_rate": 9.666961642987624e-05, "loss": 0.7565, "step": 36590 }, { "epoch": 0.2338269680436477, "grad_norm": 1.4758923053741455, "learning_rate": 9.66678155605914e-05, "loss": 1.0654, "step": 36600 }, { "epoch": 0.2338908551933864, "grad_norm": 0.8394945859909058, "learning_rate": 9.666601422132129e-05, "loss": 0.6541, "step": 36610 }, { "epoch": 0.2339547423431251, "grad_norm": 0.946808934211731, "learning_rate": 9.666421241208404e-05, "loss": 0.9308, "step": 36620 }, { "epoch": 0.2340186294928638, "grad_norm": 0.8768804669380188, "learning_rate": 9.666241013289781e-05, "loss": 0.7125, "step": 36630 }, { "epoch": 0.23408251664260252, "grad_norm": 0.9706554412841797, "learning_rate": 9.666060738378072e-05, "loss": 0.8804, "step": 36640 }, { "epoch": 0.2341464037923412, "grad_norm": 1.6427329778671265, "learning_rate": 9.665880416475097e-05, "loss": 0.9644, "step": 36650 }, { "epoch": 0.2342102909420799, "grad_norm": 0.792389988899231, "learning_rate": 9.665700047582667e-05, "loss": 0.8932, "step": 36660 }, { "epoch": 0.2342741780918186, "grad_norm": 0.6772669553756714, "learning_rate": 9.665519631702605e-05, "loss": 0.8973, "step": 36670 }, { "epoch": 0.23433806524155731, "grad_norm": 0.8175477385520935, "learning_rate": 9.66533916883672e-05, "loss": 0.9906, "step": 36680 }, { "epoch": 0.23440195239129602, "grad_norm": 1.3049653768539429, "learning_rate": 9.665158658986835e-05, "loss": 0.9246, "step": 36690 }, { "epoch": 0.23446583954103473, "grad_norm": 0.7505981922149658, "learning_rate": 9.664978102154766e-05, "loss": 0.9096, "step": 36700 }, { "epoch": 0.2345297266907734, "grad_norm": 0.8786876797676086, "learning_rate": 9.664797498342333e-05, "loss": 0.9795, "step": 36710 }, { "epoch": 0.2345936138405121, "grad_norm": 1.1042776107788086, "learning_rate": 9.664616847551354e-05, "loss": 0.871, "step": 36720 }, { "epoch": 0.23465750099025082, "grad_norm": 0.5629504919052124, "learning_rate": 9.664436149783647e-05, "loss": 0.7445, "step": 36730 }, { "epoch": 0.23472138813998952, "grad_norm": 0.7298271656036377, "learning_rate": 9.664255405041031e-05, "loss": 0.9827, "step": 36740 }, { "epoch": 0.23478527528972823, "grad_norm": 0.6317089200019836, "learning_rate": 9.66407461332533e-05, "loss": 0.8363, "step": 36750 }, { "epoch": 0.23484916243946694, "grad_norm": 0.8942947387695312, "learning_rate": 9.663893774638362e-05, "loss": 0.9289, "step": 36760 }, { "epoch": 0.2349130495892056, "grad_norm": 1.3955134153366089, "learning_rate": 9.663712888981949e-05, "loss": 0.9013, "step": 36770 }, { "epoch": 0.23497693673894432, "grad_norm": 0.84214186668396, "learning_rate": 9.663531956357912e-05, "loss": 0.8152, "step": 36780 }, { "epoch": 0.23504082388868303, "grad_norm": 0.4366759955883026, "learning_rate": 9.663350976768074e-05, "loss": 0.7441, "step": 36790 }, { "epoch": 0.23510471103842173, "grad_norm": 0.7600962519645691, "learning_rate": 9.663169950214257e-05, "loss": 0.9543, "step": 36800 }, { "epoch": 0.23516859818816044, "grad_norm": 1.2092550992965698, "learning_rate": 9.662988876698285e-05, "loss": 1.0359, "step": 36810 }, { "epoch": 0.23523248533789914, "grad_norm": 0.6062434911727905, "learning_rate": 9.662807756221981e-05, "loss": 0.6755, "step": 36820 }, { "epoch": 0.23529637248763785, "grad_norm": 0.9666545987129211, "learning_rate": 9.662626588787168e-05, "loss": 0.7634, "step": 36830 }, { "epoch": 0.23536025963737653, "grad_norm": 0.9782662987709045, "learning_rate": 9.662445374395672e-05, "loss": 0.9015, "step": 36840 }, { "epoch": 0.23542414678711523, "grad_norm": 0.6901407241821289, "learning_rate": 9.662264113049318e-05, "loss": 0.8262, "step": 36850 }, { "epoch": 0.23548803393685394, "grad_norm": 0.6084008812904358, "learning_rate": 9.66208280474993e-05, "loss": 0.7851, "step": 36860 }, { "epoch": 0.23555192108659265, "grad_norm": 1.5526678562164307, "learning_rate": 9.661901449499336e-05, "loss": 0.9491, "step": 36870 }, { "epoch": 0.23561580823633135, "grad_norm": 0.6443691849708557, "learning_rate": 9.66172004729936e-05, "loss": 0.8368, "step": 36880 }, { "epoch": 0.23567969538607006, "grad_norm": 1.0201776027679443, "learning_rate": 9.661538598151831e-05, "loss": 0.9269, "step": 36890 }, { "epoch": 0.23574358253580874, "grad_norm": 1.2530359029769897, "learning_rate": 9.661357102058577e-05, "loss": 0.9521, "step": 36900 }, { "epoch": 0.23580746968554744, "grad_norm": 0.675190269947052, "learning_rate": 9.661175559021423e-05, "loss": 0.8889, "step": 36910 }, { "epoch": 0.23587135683528615, "grad_norm": 1.3392939567565918, "learning_rate": 9.660993969042197e-05, "loss": 1.0459, "step": 36920 }, { "epoch": 0.23593524398502486, "grad_norm": 0.7173458337783813, "learning_rate": 9.66081233212273e-05, "loss": 0.8731, "step": 36930 }, { "epoch": 0.23599913113476356, "grad_norm": 1.142118215560913, "learning_rate": 9.660630648264852e-05, "loss": 0.9468, "step": 36940 }, { "epoch": 0.23606301828450227, "grad_norm": 0.6740077137947083, "learning_rate": 9.66044891747039e-05, "loss": 0.8258, "step": 36950 }, { "epoch": 0.23612690543424095, "grad_norm": 0.7697812914848328, "learning_rate": 9.660267139741177e-05, "loss": 0.9605, "step": 36960 }, { "epoch": 0.23619079258397965, "grad_norm": 0.9170047044754028, "learning_rate": 9.660085315079041e-05, "loss": 0.8237, "step": 36970 }, { "epoch": 0.23625467973371836, "grad_norm": 1.0468403100967407, "learning_rate": 9.659903443485816e-05, "loss": 0.7339, "step": 36980 }, { "epoch": 0.23631856688345707, "grad_norm": 0.7569143176078796, "learning_rate": 9.659721524963331e-05, "loss": 0.9094, "step": 36990 }, { "epoch": 0.23638245403319577, "grad_norm": 2.99776291847229, "learning_rate": 9.659539559513418e-05, "loss": 0.7063, "step": 37000 }, { "epoch": 0.23644634118293448, "grad_norm": 0.6073469519615173, "learning_rate": 9.659357547137912e-05, "loss": 0.6927, "step": 37010 }, { "epoch": 0.23651022833267316, "grad_norm": 0.9018070101737976, "learning_rate": 9.659175487838643e-05, "loss": 0.6648, "step": 37020 }, { "epoch": 0.23657411548241186, "grad_norm": 1.5573745965957642, "learning_rate": 9.658993381617447e-05, "loss": 0.866, "step": 37030 }, { "epoch": 0.23663800263215057, "grad_norm": 0.9631299376487732, "learning_rate": 9.658811228476158e-05, "loss": 0.8542, "step": 37040 }, { "epoch": 0.23670188978188927, "grad_norm": 0.5931088924407959, "learning_rate": 9.658629028416608e-05, "loss": 1.0986, "step": 37050 }, { "epoch": 0.23676577693162798, "grad_norm": 1.1451070308685303, "learning_rate": 9.658446781440635e-05, "loss": 1.026, "step": 37060 }, { "epoch": 0.2368296640813667, "grad_norm": 0.9093202352523804, "learning_rate": 9.65826448755007e-05, "loss": 1.0764, "step": 37070 }, { "epoch": 0.23689355123110536, "grad_norm": 0.6607868075370789, "learning_rate": 9.658082146746754e-05, "loss": 0.8786, "step": 37080 }, { "epoch": 0.23695743838084407, "grad_norm": 1.8870525360107422, "learning_rate": 9.65789975903252e-05, "loss": 0.7795, "step": 37090 }, { "epoch": 0.23702132553058278, "grad_norm": 0.9815956354141235, "learning_rate": 9.657717324409207e-05, "loss": 0.9482, "step": 37100 }, { "epoch": 0.23708521268032148, "grad_norm": 0.7396382689476013, "learning_rate": 9.65753484287865e-05, "loss": 1.2419, "step": 37110 }, { "epoch": 0.2371490998300602, "grad_norm": 1.3282475471496582, "learning_rate": 9.657352314442688e-05, "loss": 1.0154, "step": 37120 }, { "epoch": 0.2372129869797989, "grad_norm": 0.5715224742889404, "learning_rate": 9.65716973910316e-05, "loss": 0.8772, "step": 37130 }, { "epoch": 0.23727687412953757, "grad_norm": 0.646783173084259, "learning_rate": 9.656987116861902e-05, "loss": 0.9359, "step": 37140 }, { "epoch": 0.23734076127927628, "grad_norm": 0.9318345189094543, "learning_rate": 9.656804447720755e-05, "loss": 0.7484, "step": 37150 }, { "epoch": 0.23740464842901499, "grad_norm": 0.9858495593070984, "learning_rate": 9.65662173168156e-05, "loss": 0.9772, "step": 37160 }, { "epoch": 0.2374685355787537, "grad_norm": 0.8943020701408386, "learning_rate": 9.656438968746153e-05, "loss": 0.9814, "step": 37170 }, { "epoch": 0.2375324227284924, "grad_norm": 0.7488458752632141, "learning_rate": 9.656256158916379e-05, "loss": 0.8101, "step": 37180 }, { "epoch": 0.2375963098782311, "grad_norm": 1.547443151473999, "learning_rate": 9.656073302194078e-05, "loss": 0.7346, "step": 37190 }, { "epoch": 0.23766019702796978, "grad_norm": 0.7410275340080261, "learning_rate": 9.655890398581088e-05, "loss": 0.8636, "step": 37200 }, { "epoch": 0.2377240841777085, "grad_norm": 1.3418773412704468, "learning_rate": 9.655707448079256e-05, "loss": 0.9634, "step": 37210 }, { "epoch": 0.2377879713274472, "grad_norm": 1.0941447019577026, "learning_rate": 9.655524450690423e-05, "loss": 1.0396, "step": 37220 }, { "epoch": 0.2378518584771859, "grad_norm": 0.6817768216133118, "learning_rate": 9.65534140641643e-05, "loss": 1.1446, "step": 37230 }, { "epoch": 0.2379157456269246, "grad_norm": 1.0512549877166748, "learning_rate": 9.65515831525912e-05, "loss": 0.8289, "step": 37240 }, { "epoch": 0.2379796327766633, "grad_norm": 0.6401187777519226, "learning_rate": 9.654975177220341e-05, "loss": 1.045, "step": 37250 }, { "epoch": 0.238043519926402, "grad_norm": 1.0263795852661133, "learning_rate": 9.654791992301935e-05, "loss": 1.0096, "step": 37260 }, { "epoch": 0.2381074070761407, "grad_norm": 0.7788522839546204, "learning_rate": 9.654608760505745e-05, "loss": 1.0715, "step": 37270 }, { "epoch": 0.2381712942258794, "grad_norm": 0.7468205094337463, "learning_rate": 9.654425481833618e-05, "loss": 1.0105, "step": 37280 }, { "epoch": 0.2382351813756181, "grad_norm": 0.6502282619476318, "learning_rate": 9.6542421562874e-05, "loss": 0.9838, "step": 37290 }, { "epoch": 0.23829906852535682, "grad_norm": 0.6235799193382263, "learning_rate": 9.654058783868938e-05, "loss": 0.8914, "step": 37300 }, { "epoch": 0.23836295567509552, "grad_norm": 0.6103238463401794, "learning_rate": 9.653875364580077e-05, "loss": 0.864, "step": 37310 }, { "epoch": 0.2384268428248342, "grad_norm": 0.9452196955680847, "learning_rate": 9.653691898422666e-05, "loss": 0.8753, "step": 37320 }, { "epoch": 0.2384907299745729, "grad_norm": 0.8040950298309326, "learning_rate": 9.653508385398549e-05, "loss": 0.8442, "step": 37330 }, { "epoch": 0.2385546171243116, "grad_norm": 1.0032446384429932, "learning_rate": 9.65332482550958e-05, "loss": 0.9091, "step": 37340 }, { "epoch": 0.23861850427405032, "grad_norm": 1.0294917821884155, "learning_rate": 9.653141218757602e-05, "loss": 0.7559, "step": 37350 }, { "epoch": 0.23868239142378903, "grad_norm": 0.6536062955856323, "learning_rate": 9.652957565144465e-05, "loss": 0.7608, "step": 37360 }, { "epoch": 0.23874627857352773, "grad_norm": 0.7073416113853455, "learning_rate": 9.652773864672022e-05, "loss": 1.0675, "step": 37370 }, { "epoch": 0.2388101657232664, "grad_norm": 0.8172992467880249, "learning_rate": 9.652590117342122e-05, "loss": 0.8483, "step": 37380 }, { "epoch": 0.23887405287300512, "grad_norm": 0.7354963421821594, "learning_rate": 9.652406323156613e-05, "loss": 0.9358, "step": 37390 }, { "epoch": 0.23893794002274382, "grad_norm": 0.6794359683990479, "learning_rate": 9.652222482117347e-05, "loss": 0.9437, "step": 37400 }, { "epoch": 0.23900182717248253, "grad_norm": 0.7249003648757935, "learning_rate": 9.652038594226177e-05, "loss": 0.8782, "step": 37410 }, { "epoch": 0.23906571432222123, "grad_norm": 0.8355563282966614, "learning_rate": 9.651854659484954e-05, "loss": 0.6612, "step": 37420 }, { "epoch": 0.23912960147195994, "grad_norm": 0.7103647589683533, "learning_rate": 9.651670677895529e-05, "loss": 1.1142, "step": 37430 }, { "epoch": 0.23919348862169862, "grad_norm": 0.5884954929351807, "learning_rate": 9.651486649459755e-05, "loss": 0.9896, "step": 37440 }, { "epoch": 0.23925737577143733, "grad_norm": 0.7389781475067139, "learning_rate": 9.651302574179489e-05, "loss": 0.8372, "step": 37450 }, { "epoch": 0.23932126292117603, "grad_norm": 0.5792128443717957, "learning_rate": 9.651118452056582e-05, "loss": 0.7093, "step": 37460 }, { "epoch": 0.23938515007091474, "grad_norm": 0.7699292898178101, "learning_rate": 9.650934283092887e-05, "loss": 1.0111, "step": 37470 }, { "epoch": 0.23944903722065344, "grad_norm": 0.7070481181144714, "learning_rate": 9.65075006729026e-05, "loss": 1.084, "step": 37480 }, { "epoch": 0.23951292437039215, "grad_norm": 1.0527695417404175, "learning_rate": 9.650565804650556e-05, "loss": 0.8554, "step": 37490 }, { "epoch": 0.23957681152013083, "grad_norm": 0.7435452342033386, "learning_rate": 9.650381495175633e-05, "loss": 0.8564, "step": 37500 }, { "epoch": 0.23964069866986953, "grad_norm": 0.9343265295028687, "learning_rate": 9.650197138867343e-05, "loss": 1.2956, "step": 37510 }, { "epoch": 0.23970458581960824, "grad_norm": 1.0350561141967773, "learning_rate": 9.650012735727546e-05, "loss": 0.9515, "step": 37520 }, { "epoch": 0.23976847296934695, "grad_norm": 1.1967248916625977, "learning_rate": 9.649828285758098e-05, "loss": 0.9391, "step": 37530 }, { "epoch": 0.23983236011908565, "grad_norm": 1.7346086502075195, "learning_rate": 9.649643788960856e-05, "loss": 0.8431, "step": 37540 }, { "epoch": 0.23989624726882436, "grad_norm": 0.7352771162986755, "learning_rate": 9.649459245337679e-05, "loss": 0.6744, "step": 37550 }, { "epoch": 0.23996013441856304, "grad_norm": 0.6544600129127502, "learning_rate": 9.649293116042181e-05, "loss": 1.2871, "step": 37560 }, { "epoch": 0.24002402156830174, "grad_norm": 0.4782470464706421, "learning_rate": 9.649108483454848e-05, "loss": 1.022, "step": 37570 }, { "epoch": 0.24008790871804045, "grad_norm": 0.9200822710990906, "learning_rate": 9.648923804046968e-05, "loss": 0.737, "step": 37580 }, { "epoch": 0.24015179586777916, "grad_norm": 1.8405570983886719, "learning_rate": 9.648739077820405e-05, "loss": 0.832, "step": 37590 }, { "epoch": 0.24021568301751786, "grad_norm": 1.054779052734375, "learning_rate": 9.648554304777017e-05, "loss": 0.8275, "step": 37600 }, { "epoch": 0.24027957016725657, "grad_norm": 0.8630744814872742, "learning_rate": 9.648369484918667e-05, "loss": 0.8602, "step": 37610 }, { "epoch": 0.24034345731699525, "grad_norm": 1.0110766887664795, "learning_rate": 9.648184618247214e-05, "loss": 0.8669, "step": 37620 }, { "epoch": 0.24040734446673395, "grad_norm": 1.0114331245422363, "learning_rate": 9.64799970476452e-05, "loss": 1.0707, "step": 37630 }, { "epoch": 0.24047123161647266, "grad_norm": 0.8818547129631042, "learning_rate": 9.64781474447245e-05, "loss": 0.9434, "step": 37640 }, { "epoch": 0.24053511876621136, "grad_norm": 1.12362802028656, "learning_rate": 9.647629737372863e-05, "loss": 1.1379, "step": 37650 }, { "epoch": 0.24059900591595007, "grad_norm": 0.696323812007904, "learning_rate": 9.647444683467623e-05, "loss": 0.951, "step": 37660 }, { "epoch": 0.24066289306568878, "grad_norm": 0.8041189312934875, "learning_rate": 9.647259582758597e-05, "loss": 0.9218, "step": 37670 }, { "epoch": 0.24072678021542748, "grad_norm": 0.45444270968437195, "learning_rate": 9.647074435247644e-05, "loss": 0.7025, "step": 37680 }, { "epoch": 0.24079066736516616, "grad_norm": 0.6444490551948547, "learning_rate": 9.646889240936632e-05, "loss": 1.011, "step": 37690 }, { "epoch": 0.24085455451490487, "grad_norm": 0.9339631199836731, "learning_rate": 9.646703999827426e-05, "loss": 1.0486, "step": 37700 }, { "epoch": 0.24091844166464357, "grad_norm": 1.2948579788208008, "learning_rate": 9.64651871192189e-05, "loss": 1.2862, "step": 37710 }, { "epoch": 0.24098232881438228, "grad_norm": 0.8452892899513245, "learning_rate": 9.64633337722189e-05, "loss": 0.8614, "step": 37720 }, { "epoch": 0.24104621596412099, "grad_norm": 0.8650469183921814, "learning_rate": 9.646147995729294e-05, "loss": 1.0569, "step": 37730 }, { "epoch": 0.2411101031138597, "grad_norm": 0.8053631782531738, "learning_rate": 9.645962567445969e-05, "loss": 0.9448, "step": 37740 }, { "epoch": 0.24117399026359837, "grad_norm": 0.6854358315467834, "learning_rate": 9.64577709237378e-05, "loss": 0.9552, "step": 37750 }, { "epoch": 0.24123787741333708, "grad_norm": 1.0860304832458496, "learning_rate": 9.645591570514598e-05, "loss": 1.111, "step": 37760 }, { "epoch": 0.24130176456307578, "grad_norm": 0.7687236070632935, "learning_rate": 9.64540600187029e-05, "loss": 0.8652, "step": 37770 }, { "epoch": 0.2413656517128145, "grad_norm": 1.2046473026275635, "learning_rate": 9.645220386442724e-05, "loss": 0.7453, "step": 37780 }, { "epoch": 0.2414295388625532, "grad_norm": 0.9802344441413879, "learning_rate": 9.64503472423377e-05, "loss": 0.8819, "step": 37790 }, { "epoch": 0.2414934260122919, "grad_norm": 0.7101196646690369, "learning_rate": 9.644849015245296e-05, "loss": 0.8814, "step": 37800 }, { "epoch": 0.24155731316203058, "grad_norm": 1.215147852897644, "learning_rate": 9.644663259479177e-05, "loss": 0.9877, "step": 37810 }, { "epoch": 0.24162120031176929, "grad_norm": 0.9594703316688538, "learning_rate": 9.64447745693728e-05, "loss": 0.8849, "step": 37820 }, { "epoch": 0.241685087461508, "grad_norm": 0.6628295183181763, "learning_rate": 9.644291607621476e-05, "loss": 0.6372, "step": 37830 }, { "epoch": 0.2417489746112467, "grad_norm": 0.7092610001564026, "learning_rate": 9.644105711533638e-05, "loss": 0.9584, "step": 37840 }, { "epoch": 0.2418128617609854, "grad_norm": 1.3304320573806763, "learning_rate": 9.643919768675637e-05, "loss": 1.075, "step": 37850 }, { "epoch": 0.2418767489107241, "grad_norm": 0.8040294051170349, "learning_rate": 9.643733779049349e-05, "loss": 0.9873, "step": 37860 }, { "epoch": 0.2419406360604628, "grad_norm": 0.6643669009208679, "learning_rate": 9.643547742656643e-05, "loss": 0.9092, "step": 37870 }, { "epoch": 0.2420045232102015, "grad_norm": 0.8764951229095459, "learning_rate": 9.643361659499392e-05, "loss": 0.7729, "step": 37880 }, { "epoch": 0.2420684103599402, "grad_norm": 6.183263778686523, "learning_rate": 9.643175529579475e-05, "loss": 1.238, "step": 37890 }, { "epoch": 0.2421322975096789, "grad_norm": 1.3563274145126343, "learning_rate": 9.642989352898762e-05, "loss": 0.8639, "step": 37900 }, { "epoch": 0.2421961846594176, "grad_norm": 0.8023094534873962, "learning_rate": 9.64280312945913e-05, "loss": 1.1368, "step": 37910 }, { "epoch": 0.24226007180915632, "grad_norm": 0.6188109517097473, "learning_rate": 9.642616859262455e-05, "loss": 1.0875, "step": 37920 }, { "epoch": 0.242323958958895, "grad_norm": 0.7044292688369751, "learning_rate": 9.64243054231061e-05, "loss": 0.9899, "step": 37930 }, { "epoch": 0.2423878461086337, "grad_norm": 0.8876643776893616, "learning_rate": 9.642244178605473e-05, "loss": 0.9804, "step": 37940 }, { "epoch": 0.2424517332583724, "grad_norm": 0.7421206831932068, "learning_rate": 9.642057768148922e-05, "loss": 0.8828, "step": 37950 }, { "epoch": 0.24251562040811112, "grad_norm": 0.8457249402999878, "learning_rate": 9.641871310942832e-05, "loss": 0.9491, "step": 37960 }, { "epoch": 0.24257950755784982, "grad_norm": 1.0700315237045288, "learning_rate": 9.641684806989084e-05, "loss": 0.8752, "step": 37970 }, { "epoch": 0.24264339470758853, "grad_norm": 0.7216569781303406, "learning_rate": 9.641498256289552e-05, "loss": 1.1564, "step": 37980 }, { "epoch": 0.2427072818573272, "grad_norm": 0.847780704498291, "learning_rate": 9.641311658846119e-05, "loss": 0.8944, "step": 37990 }, { "epoch": 0.2427711690070659, "grad_norm": 0.9553901553153992, "learning_rate": 9.641125014660662e-05, "loss": 0.9013, "step": 38000 }, { "epoch": 0.24283505615680462, "grad_norm": 1.37058424949646, "learning_rate": 9.64093832373506e-05, "loss": 0.9315, "step": 38010 }, { "epoch": 0.24289894330654332, "grad_norm": 0.7296909689903259, "learning_rate": 9.640751586071195e-05, "loss": 0.8648, "step": 38020 }, { "epoch": 0.24296283045628203, "grad_norm": 1.1600792407989502, "learning_rate": 9.640564801670948e-05, "loss": 0.7834, "step": 38030 }, { "epoch": 0.24302671760602074, "grad_norm": 1.0222969055175781, "learning_rate": 9.640377970536197e-05, "loss": 1.0175, "step": 38040 }, { "epoch": 0.24309060475575942, "grad_norm": 1.1712769269943237, "learning_rate": 9.640191092668825e-05, "loss": 1.0173, "step": 38050 }, { "epoch": 0.24315449190549812, "grad_norm": 1.4743320941925049, "learning_rate": 9.640004168070716e-05, "loss": 1.2532, "step": 38060 }, { "epoch": 0.24321837905523683, "grad_norm": 0.7207898497581482, "learning_rate": 9.639817196743749e-05, "loss": 0.9538, "step": 38070 }, { "epoch": 0.24328226620497553, "grad_norm": 1.0338420867919922, "learning_rate": 9.639630178689809e-05, "loss": 0.8653, "step": 38080 }, { "epoch": 0.24334615335471424, "grad_norm": 0.7284950017929077, "learning_rate": 9.639443113910781e-05, "loss": 0.7094, "step": 38090 }, { "epoch": 0.24341004050445295, "grad_norm": 1.0017796754837036, "learning_rate": 9.639256002408545e-05, "loss": 0.7997, "step": 38100 }, { "epoch": 0.24347392765419162, "grad_norm": 1.1406546831130981, "learning_rate": 9.639068844184989e-05, "loss": 0.9456, "step": 38110 }, { "epoch": 0.24353781480393033, "grad_norm": 0.7113826870918274, "learning_rate": 9.638881639241996e-05, "loss": 0.8586, "step": 38120 }, { "epoch": 0.24360170195366904, "grad_norm": 0.8635872602462769, "learning_rate": 9.638694387581453e-05, "loss": 0.8049, "step": 38130 }, { "epoch": 0.24366558910340774, "grad_norm": 0.882631242275238, "learning_rate": 9.638507089205242e-05, "loss": 0.7798, "step": 38140 }, { "epoch": 0.24372947625314645, "grad_norm": 0.9470729231834412, "learning_rate": 9.638319744115254e-05, "loss": 0.875, "step": 38150 }, { "epoch": 0.24379336340288515, "grad_norm": 0.5995595455169678, "learning_rate": 9.638132352313371e-05, "loss": 0.7982, "step": 38160 }, { "epoch": 0.24385725055262383, "grad_norm": 0.702936589717865, "learning_rate": 9.637944913801485e-05, "loss": 0.8089, "step": 38170 }, { "epoch": 0.24392113770236254, "grad_norm": 0.9595651626586914, "learning_rate": 9.63775742858148e-05, "loss": 0.9638, "step": 38180 }, { "epoch": 0.24398502485210125, "grad_norm": 0.706251323223114, "learning_rate": 9.637569896655245e-05, "loss": 0.7911, "step": 38190 }, { "epoch": 0.24404891200183995, "grad_norm": 0.6460245251655579, "learning_rate": 9.63740107798937e-05, "loss": 1.1869, "step": 38200 }, { "epoch": 0.24411279915157866, "grad_norm": 0.7056565880775452, "learning_rate": 9.637213457326503e-05, "loss": 0.874, "step": 38210 }, { "epoch": 0.24417668630131736, "grad_norm": 1.0978292226791382, "learning_rate": 9.637025789962885e-05, "loss": 1.059, "step": 38220 }, { "epoch": 0.24424057345105604, "grad_norm": 1.170605182647705, "learning_rate": 9.636838075900405e-05, "loss": 0.7483, "step": 38230 }, { "epoch": 0.24430446060079475, "grad_norm": 0.9686828255653381, "learning_rate": 9.636650315140955e-05, "loss": 1.0386, "step": 38240 }, { "epoch": 0.24436834775053345, "grad_norm": 0.9862095713615417, "learning_rate": 9.636462507686425e-05, "loss": 0.8925, "step": 38250 }, { "epoch": 0.24443223490027216, "grad_norm": 0.8100789189338684, "learning_rate": 9.636274653538707e-05, "loss": 0.8097, "step": 38260 }, { "epoch": 0.24449612205001087, "grad_norm": 0.9674224257469177, "learning_rate": 9.636086752699691e-05, "loss": 0.9992, "step": 38270 }, { "epoch": 0.24456000919974957, "grad_norm": 0.8364433646202087, "learning_rate": 9.63589880517127e-05, "loss": 0.7888, "step": 38280 }, { "epoch": 0.24462389634948825, "grad_norm": 0.8828948140144348, "learning_rate": 9.63571081095534e-05, "loss": 0.9031, "step": 38290 }, { "epoch": 0.24468778349922696, "grad_norm": 0.7281533479690552, "learning_rate": 9.63552277005379e-05, "loss": 0.7772, "step": 38300 }, { "epoch": 0.24475167064896566, "grad_norm": 1.225823998451233, "learning_rate": 9.635334682468516e-05, "loss": 0.7892, "step": 38310 }, { "epoch": 0.24481555779870437, "grad_norm": 0.8330084681510925, "learning_rate": 9.63514654820141e-05, "loss": 0.8299, "step": 38320 }, { "epoch": 0.24487944494844308, "grad_norm": 1.0153292417526245, "learning_rate": 9.63495836725437e-05, "loss": 1.0699, "step": 38330 }, { "epoch": 0.24494333209818178, "grad_norm": 1.185373306274414, "learning_rate": 9.634770139629288e-05, "loss": 0.9322, "step": 38340 }, { "epoch": 0.24500721924792046, "grad_norm": 0.7242724895477295, "learning_rate": 9.634581865328062e-05, "loss": 0.7414, "step": 38350 }, { "epoch": 0.24507110639765917, "grad_norm": 0.8044312596321106, "learning_rate": 9.634393544352589e-05, "loss": 1.1166, "step": 38360 }, { "epoch": 0.24513499354739787, "grad_norm": 0.9795138239860535, "learning_rate": 9.63420517670476e-05, "loss": 0.7423, "step": 38370 }, { "epoch": 0.24519888069713658, "grad_norm": 0.7290918231010437, "learning_rate": 9.634016762386478e-05, "loss": 0.9426, "step": 38380 }, { "epoch": 0.24526276784687528, "grad_norm": 0.7532154321670532, "learning_rate": 9.633828301399639e-05, "loss": 0.9311, "step": 38390 }, { "epoch": 0.245326654996614, "grad_norm": 1.0642715692520142, "learning_rate": 9.633639793746139e-05, "loss": 0.83, "step": 38400 }, { "epoch": 0.24539054214635267, "grad_norm": 0.5684540271759033, "learning_rate": 9.633451239427877e-05, "loss": 0.9418, "step": 38410 }, { "epoch": 0.24545442929609138, "grad_norm": 0.827085554599762, "learning_rate": 9.633262638446753e-05, "loss": 0.8866, "step": 38420 }, { "epoch": 0.24551831644583008, "grad_norm": 1.217444896697998, "learning_rate": 9.633073990804666e-05, "loss": 0.8359, "step": 38430 }, { "epoch": 0.2455822035955688, "grad_norm": 0.964013397693634, "learning_rate": 9.632885296503515e-05, "loss": 0.9809, "step": 38440 }, { "epoch": 0.2456460907453075, "grad_norm": 0.5672999024391174, "learning_rate": 9.632696555545203e-05, "loss": 0.7156, "step": 38450 }, { "epoch": 0.2457099778950462, "grad_norm": 0.6509802341461182, "learning_rate": 9.632507767931626e-05, "loss": 0.7118, "step": 38460 }, { "epoch": 0.24577386504478488, "grad_norm": 1.3255314826965332, "learning_rate": 9.63231893366469e-05, "loss": 0.9581, "step": 38470 }, { "epoch": 0.24583775219452358, "grad_norm": 0.7225618958473206, "learning_rate": 9.632130052746296e-05, "loss": 0.9634, "step": 38480 }, { "epoch": 0.2459016393442623, "grad_norm": 1.1634318828582764, "learning_rate": 9.631941125178343e-05, "loss": 1.0248, "step": 38490 }, { "epoch": 0.245965526494001, "grad_norm": 0.75383460521698, "learning_rate": 9.631752150962736e-05, "loss": 0.8206, "step": 38500 }, { "epoch": 0.2460294136437397, "grad_norm": 1.0557365417480469, "learning_rate": 9.631563130101377e-05, "loss": 0.8587, "step": 38510 }, { "epoch": 0.2460933007934784, "grad_norm": 0.8488501906394958, "learning_rate": 9.631374062596172e-05, "loss": 0.9565, "step": 38520 }, { "epoch": 0.24615718794321712, "grad_norm": 0.7064526081085205, "learning_rate": 9.631184948449023e-05, "loss": 0.8974, "step": 38530 }, { "epoch": 0.2462210750929558, "grad_norm": 1.123842716217041, "learning_rate": 9.630995787661834e-05, "loss": 1.0052, "step": 38540 }, { "epoch": 0.2462849622426945, "grad_norm": 0.8303399085998535, "learning_rate": 9.630806580236512e-05, "loss": 0.6941, "step": 38550 }, { "epoch": 0.2463488493924332, "grad_norm": 0.6929298043251038, "learning_rate": 9.630617326174962e-05, "loss": 0.9629, "step": 38560 }, { "epoch": 0.2464127365421719, "grad_norm": 0.9439372420310974, "learning_rate": 9.630428025479088e-05, "loss": 0.8717, "step": 38570 }, { "epoch": 0.24647662369191062, "grad_norm": 0.9138436317443848, "learning_rate": 9.630238678150799e-05, "loss": 0.8803, "step": 38580 }, { "epoch": 0.24654051084164932, "grad_norm": 0.8482638597488403, "learning_rate": 9.630049284192001e-05, "loss": 0.9979, "step": 38590 }, { "epoch": 0.246604397991388, "grad_norm": 0.779525637626648, "learning_rate": 9.6298598436046e-05, "loss": 0.7542, "step": 38600 }, { "epoch": 0.2466682851411267, "grad_norm": 0.8518264293670654, "learning_rate": 9.629670356390505e-05, "loss": 0.9886, "step": 38610 }, { "epoch": 0.24673217229086541, "grad_norm": 1.1973893642425537, "learning_rate": 9.629480822551627e-05, "loss": 1.0507, "step": 38620 }, { "epoch": 0.24679605944060412, "grad_norm": 0.8994162678718567, "learning_rate": 9.629291242089869e-05, "loss": 0.8537, "step": 38630 }, { "epoch": 0.24685994659034283, "grad_norm": 0.5772508978843689, "learning_rate": 9.629101615007145e-05, "loss": 0.8669, "step": 38640 }, { "epoch": 0.24692383374008153, "grad_norm": 0.7069337368011475, "learning_rate": 9.628911941305361e-05, "loss": 1.0522, "step": 38650 }, { "epoch": 0.2469877208898202, "grad_norm": 0.722184956073761, "learning_rate": 9.62872222098643e-05, "loss": 1.1426, "step": 38660 }, { "epoch": 0.24705160803955892, "grad_norm": 1.3423835039138794, "learning_rate": 9.628532454052263e-05, "loss": 0.7297, "step": 38670 }, { "epoch": 0.24711549518929762, "grad_norm": 0.7924500703811646, "learning_rate": 9.628342640504769e-05, "loss": 0.7804, "step": 38680 }, { "epoch": 0.24717938233903633, "grad_norm": 0.9021787047386169, "learning_rate": 9.628152780345861e-05, "loss": 0.8003, "step": 38690 }, { "epoch": 0.24724326948877504, "grad_norm": 0.991362452507019, "learning_rate": 9.62796287357745e-05, "loss": 0.6662, "step": 38700 }, { "epoch": 0.24730715663851374, "grad_norm": 0.8103492856025696, "learning_rate": 9.62777292020145e-05, "loss": 0.8117, "step": 38710 }, { "epoch": 0.24737104378825242, "grad_norm": 0.6966975331306458, "learning_rate": 9.627582920219773e-05, "loss": 0.8408, "step": 38720 }, { "epoch": 0.24743493093799113, "grad_norm": 0.7350102663040161, "learning_rate": 9.627392873634332e-05, "loss": 0.7777, "step": 38730 }, { "epoch": 0.24749881808772983, "grad_norm": 0.7632319927215576, "learning_rate": 9.627202780447041e-05, "loss": 1.0788, "step": 38740 }, { "epoch": 0.24756270523746854, "grad_norm": 0.6083953976631165, "learning_rate": 9.627012640659816e-05, "loss": 0.8096, "step": 38750 }, { "epoch": 0.24762659238720724, "grad_norm": 0.7463345527648926, "learning_rate": 9.62682245427457e-05, "loss": 0.7349, "step": 38760 }, { "epoch": 0.24769047953694595, "grad_norm": 0.7767286896705627, "learning_rate": 9.626632221293219e-05, "loss": 1.1039, "step": 38770 }, { "epoch": 0.24775436668668463, "grad_norm": 1.4006277322769165, "learning_rate": 9.626441941717678e-05, "loss": 0.7395, "step": 38780 }, { "epoch": 0.24781825383642334, "grad_norm": 0.9676879048347473, "learning_rate": 9.626251615549867e-05, "loss": 1.1208, "step": 38790 }, { "epoch": 0.24788214098616204, "grad_norm": 0.8642779588699341, "learning_rate": 9.626061242791699e-05, "loss": 0.8964, "step": 38800 }, { "epoch": 0.24794602813590075, "grad_norm": 1.1887127161026, "learning_rate": 9.625870823445092e-05, "loss": 0.8361, "step": 38810 }, { "epoch": 0.24800991528563945, "grad_norm": 0.7598790526390076, "learning_rate": 9.625680357511962e-05, "loss": 0.9854, "step": 38820 }, { "epoch": 0.24807380243537816, "grad_norm": 1.8052411079406738, "learning_rate": 9.625489844994231e-05, "loss": 0.867, "step": 38830 }, { "epoch": 0.24813768958511684, "grad_norm": 0.9150733947753906, "learning_rate": 9.625299285893816e-05, "loss": 0.9337, "step": 38840 }, { "epoch": 0.24820157673485554, "grad_norm": 0.8813467621803284, "learning_rate": 9.625108680212633e-05, "loss": 0.6746, "step": 38850 }, { "epoch": 0.24826546388459425, "grad_norm": 0.7594160437583923, "learning_rate": 9.624918027952607e-05, "loss": 1.0736, "step": 38860 }, { "epoch": 0.24832935103433296, "grad_norm": 0.7469112873077393, "learning_rate": 9.624727329115655e-05, "loss": 0.8787, "step": 38870 }, { "epoch": 0.24839323818407166, "grad_norm": 0.7725271582603455, "learning_rate": 9.624536583703697e-05, "loss": 0.768, "step": 38880 }, { "epoch": 0.24845712533381037, "grad_norm": 2.877929210662842, "learning_rate": 9.624345791718656e-05, "loss": 1.0134, "step": 38890 }, { "epoch": 0.24852101248354905, "grad_norm": 0.7709558606147766, "learning_rate": 9.62415495316245e-05, "loss": 0.922, "step": 38900 }, { "epoch": 0.24858489963328775, "grad_norm": 1.2846732139587402, "learning_rate": 9.623964068037006e-05, "loss": 1.2037, "step": 38910 }, { "epoch": 0.24864878678302646, "grad_norm": 0.6465185284614563, "learning_rate": 9.62377313634424e-05, "loss": 0.9262, "step": 38920 }, { "epoch": 0.24871267393276517, "grad_norm": 0.7968388199806213, "learning_rate": 9.623582158086081e-05, "loss": 0.7902, "step": 38930 }, { "epoch": 0.24877656108250387, "grad_norm": 0.9779314994812012, "learning_rate": 9.62339113326445e-05, "loss": 0.9249, "step": 38940 }, { "epoch": 0.24884044823224258, "grad_norm": 1.0651602745056152, "learning_rate": 9.62320006188127e-05, "loss": 0.8328, "step": 38950 }, { "epoch": 0.24890433538198126, "grad_norm": 0.6496372222900391, "learning_rate": 9.623008943938466e-05, "loss": 0.8704, "step": 38960 }, { "epoch": 0.24896822253171996, "grad_norm": 1.1484968662261963, "learning_rate": 9.62281777943796e-05, "loss": 0.8183, "step": 38970 }, { "epoch": 0.24903210968145867, "grad_norm": 1.39591383934021, "learning_rate": 9.622626568381684e-05, "loss": 1.0141, "step": 38980 }, { "epoch": 0.24909599683119737, "grad_norm": 0.6642056107521057, "learning_rate": 9.622435310771556e-05, "loss": 0.8481, "step": 38990 }, { "epoch": 0.24915988398093608, "grad_norm": 1.48539400100708, "learning_rate": 9.622244006609506e-05, "loss": 0.8486, "step": 39000 }, { "epoch": 0.2492237711306748, "grad_norm": 0.6797450184822083, "learning_rate": 9.62205265589746e-05, "loss": 0.8783, "step": 39010 }, { "epoch": 0.24928765828041347, "grad_norm": 0.6741315126419067, "learning_rate": 9.621861258637345e-05, "loss": 0.8605, "step": 39020 }, { "epoch": 0.24935154543015217, "grad_norm": 0.8567194938659668, "learning_rate": 9.621669814831089e-05, "loss": 1.1903, "step": 39030 }, { "epoch": 0.24941543257989088, "grad_norm": 0.665440022945404, "learning_rate": 9.62147832448062e-05, "loss": 0.9027, "step": 39040 }, { "epoch": 0.24947931972962958, "grad_norm": 0.7882826924324036, "learning_rate": 9.621286787587866e-05, "loss": 0.7315, "step": 39050 }, { "epoch": 0.2495432068793683, "grad_norm": 0.6440451145172119, "learning_rate": 9.621095204154756e-05, "loss": 1.0849, "step": 39060 }, { "epoch": 0.249607094029107, "grad_norm": 0.7116972804069519, "learning_rate": 9.62090357418322e-05, "loss": 0.9145, "step": 39070 }, { "epoch": 0.24967098117884567, "grad_norm": 0.895585298538208, "learning_rate": 9.620711897675185e-05, "loss": 0.9243, "step": 39080 }, { "epoch": 0.24973486832858438, "grad_norm": 0.730792224407196, "learning_rate": 9.620520174632585e-05, "loss": 0.8114, "step": 39090 }, { "epoch": 0.2497987554783231, "grad_norm": 1.1607691049575806, "learning_rate": 9.620328405057352e-05, "loss": 0.9737, "step": 39100 }, { "epoch": 0.2498626426280618, "grad_norm": 0.927515983581543, "learning_rate": 9.620136588951412e-05, "loss": 1.0355, "step": 39110 }, { "epoch": 0.2499265297778005, "grad_norm": 1.267722487449646, "learning_rate": 9.6199447263167e-05, "loss": 1.0208, "step": 39120 }, { "epoch": 0.2499904169275392, "grad_norm": 0.9854876399040222, "learning_rate": 9.619752817155149e-05, "loss": 1.2538, "step": 39130 }, { "epoch": 0.2500543040772779, "grad_norm": 0.8375557065010071, "learning_rate": 9.61956086146869e-05, "loss": 0.8444, "step": 39140 }, { "epoch": 0.2501181912270166, "grad_norm": 0.7425163388252258, "learning_rate": 9.619368859259255e-05, "loss": 0.6912, "step": 39150 }, { "epoch": 0.2501820783767553, "grad_norm": 0.9362971782684326, "learning_rate": 9.61917681052878e-05, "loss": 1.0123, "step": 39160 }, { "epoch": 0.250245965526494, "grad_norm": 0.7394902110099792, "learning_rate": 9.6189847152792e-05, "loss": 0.7578, "step": 39170 }, { "epoch": 0.2503098526762327, "grad_norm": 2.326955556869507, "learning_rate": 9.618792573512447e-05, "loss": 0.9821, "step": 39180 }, { "epoch": 0.2503737398259714, "grad_norm": 0.5567811727523804, "learning_rate": 9.618600385230456e-05, "loss": 1.543, "step": 39190 }, { "epoch": 0.2504376269757101, "grad_norm": 0.8062513470649719, "learning_rate": 9.618408150435165e-05, "loss": 1.0742, "step": 39200 }, { "epoch": 0.2505015141254488, "grad_norm": 0.8506273031234741, "learning_rate": 9.618215869128507e-05, "loss": 0.8167, "step": 39210 }, { "epoch": 0.2505654012751875, "grad_norm": 1.3575971126556396, "learning_rate": 9.61802354131242e-05, "loss": 1.0804, "step": 39220 }, { "epoch": 0.2506292884249262, "grad_norm": 0.6114894151687622, "learning_rate": 9.617831166988842e-05, "loss": 0.7265, "step": 39230 }, { "epoch": 0.2506931755746649, "grad_norm": 1.1617469787597656, "learning_rate": 9.617638746159709e-05, "loss": 1.1414, "step": 39240 }, { "epoch": 0.2507570627244036, "grad_norm": 1.004840612411499, "learning_rate": 9.617446278826958e-05, "loss": 0.8523, "step": 39250 }, { "epoch": 0.25082094987414233, "grad_norm": 0.69590824842453, "learning_rate": 9.617253764992529e-05, "loss": 0.6603, "step": 39260 }, { "epoch": 0.25088483702388104, "grad_norm": 2.461747169494629, "learning_rate": 9.61706120465836e-05, "loss": 0.973, "step": 39270 }, { "epoch": 0.25094872417361974, "grad_norm": 1.718680500984192, "learning_rate": 9.616868597826389e-05, "loss": 0.9792, "step": 39280 }, { "epoch": 0.2510126113233584, "grad_norm": 0.9190512299537659, "learning_rate": 9.616675944498559e-05, "loss": 1.0048, "step": 39290 }, { "epoch": 0.2510764984730971, "grad_norm": 0.6358333230018616, "learning_rate": 9.616483244676809e-05, "loss": 0.8014, "step": 39300 }, { "epoch": 0.2511403856228358, "grad_norm": 0.8349801301956177, "learning_rate": 9.616290498363076e-05, "loss": 0.9359, "step": 39310 }, { "epoch": 0.2512042727725745, "grad_norm": 0.715552568435669, "learning_rate": 9.616097705559306e-05, "loss": 0.9922, "step": 39320 }, { "epoch": 0.2512681599223132, "grad_norm": 0.7606783509254456, "learning_rate": 9.615904866267438e-05, "loss": 0.8454, "step": 39330 }, { "epoch": 0.2513320470720519, "grad_norm": 0.4417531192302704, "learning_rate": 9.615711980489415e-05, "loss": 0.7793, "step": 39340 }, { "epoch": 0.25139593422179063, "grad_norm": 0.9691148400306702, "learning_rate": 9.615519048227178e-05, "loss": 1.0334, "step": 39350 }, { "epoch": 0.25145982137152934, "grad_norm": 0.9279484748840332, "learning_rate": 9.615326069482673e-05, "loss": 0.76, "step": 39360 }, { "epoch": 0.25152370852126804, "grad_norm": 0.8048676252365112, "learning_rate": 9.61513304425784e-05, "loss": 0.7468, "step": 39370 }, { "epoch": 0.25158759567100675, "grad_norm": 0.460409939289093, "learning_rate": 9.614939972554626e-05, "loss": 0.9653, "step": 39380 }, { "epoch": 0.25165148282074545, "grad_norm": 0.5549013018608093, "learning_rate": 9.614746854374972e-05, "loss": 0.6636, "step": 39390 }, { "epoch": 0.25171536997048416, "grad_norm": 0.5946542024612427, "learning_rate": 9.614553689720827e-05, "loss": 1.0487, "step": 39400 }, { "epoch": 0.25177925712022287, "grad_norm": 0.4577612578868866, "learning_rate": 9.614360478594133e-05, "loss": 0.7034, "step": 39410 }, { "epoch": 0.2518431442699615, "grad_norm": 1.0320554971694946, "learning_rate": 9.614167220996838e-05, "loss": 1.0302, "step": 39420 }, { "epoch": 0.2519070314197002, "grad_norm": 0.5677948594093323, "learning_rate": 9.613973916930887e-05, "loss": 0.7853, "step": 39430 }, { "epoch": 0.25197091856943893, "grad_norm": 0.8020201325416565, "learning_rate": 9.613780566398227e-05, "loss": 0.9151, "step": 39440 }, { "epoch": 0.25203480571917763, "grad_norm": 0.966617226600647, "learning_rate": 9.613587169400805e-05, "loss": 0.8891, "step": 39450 }, { "epoch": 0.25209869286891634, "grad_norm": 1.3065134286880493, "learning_rate": 9.613393725940568e-05, "loss": 0.8974, "step": 39460 }, { "epoch": 0.25216258001865505, "grad_norm": 0.45482340455055237, "learning_rate": 9.613200236019466e-05, "loss": 0.8328, "step": 39470 }, { "epoch": 0.25222646716839375, "grad_norm": 1.2391636371612549, "learning_rate": 9.613006699639446e-05, "loss": 1.1481, "step": 39480 }, { "epoch": 0.25229035431813246, "grad_norm": 0.6843194365501404, "learning_rate": 9.612813116802459e-05, "loss": 0.9104, "step": 39490 }, { "epoch": 0.25235424146787117, "grad_norm": 0.9102997779846191, "learning_rate": 9.612619487510452e-05, "loss": 0.9072, "step": 39500 }, { "epoch": 0.25241812861760987, "grad_norm": 1.0311905145645142, "learning_rate": 9.612425811765376e-05, "loss": 0.7641, "step": 39510 }, { "epoch": 0.2524820157673486, "grad_norm": 0.7546457648277283, "learning_rate": 9.612232089569183e-05, "loss": 0.7963, "step": 39520 }, { "epoch": 0.2525459029170873, "grad_norm": 0.6303521990776062, "learning_rate": 9.612038320923822e-05, "loss": 0.7462, "step": 39530 }, { "epoch": 0.25260979006682593, "grad_norm": 0.8027763366699219, "learning_rate": 9.611844505831245e-05, "loss": 1.104, "step": 39540 }, { "epoch": 0.25267367721656464, "grad_norm": 0.7128822803497314, "learning_rate": 9.611650644293404e-05, "loss": 0.8728, "step": 39550 }, { "epoch": 0.25273756436630335, "grad_norm": 0.6497736573219299, "learning_rate": 9.611456736312252e-05, "loss": 0.7607, "step": 39560 }, { "epoch": 0.25280145151604205, "grad_norm": 0.8743992447853088, "learning_rate": 9.61126278188974e-05, "loss": 0.7501, "step": 39570 }, { "epoch": 0.25286533866578076, "grad_norm": 0.9536669254302979, "learning_rate": 9.611068781027824e-05, "loss": 0.9285, "step": 39580 }, { "epoch": 0.25292922581551947, "grad_norm": 0.5790122747421265, "learning_rate": 9.610874733728455e-05, "loss": 0.9496, "step": 39590 }, { "epoch": 0.25299311296525817, "grad_norm": 0.9158995151519775, "learning_rate": 9.61068063999359e-05, "loss": 0.6402, "step": 39600 }, { "epoch": 0.2530570001149969, "grad_norm": 0.5689387321472168, "learning_rate": 9.61048649982518e-05, "loss": 0.8452, "step": 39610 }, { "epoch": 0.2531208872647356, "grad_norm": 0.7655090093612671, "learning_rate": 9.610292313225184e-05, "loss": 0.8777, "step": 39620 }, { "epoch": 0.2531847744144743, "grad_norm": 0.9562214612960815, "learning_rate": 9.610098080195555e-05, "loss": 0.8012, "step": 39630 }, { "epoch": 0.253248661564213, "grad_norm": 0.9904442429542542, "learning_rate": 9.609903800738251e-05, "loss": 0.7636, "step": 39640 }, { "epoch": 0.2533125487139517, "grad_norm": 1.2793811559677124, "learning_rate": 9.609709474855226e-05, "loss": 0.7783, "step": 39650 }, { "epoch": 0.25337643586369035, "grad_norm": 1.0388842821121216, "learning_rate": 9.60951510254844e-05, "loss": 0.9002, "step": 39660 }, { "epoch": 0.25344032301342906, "grad_norm": 0.5834752321243286, "learning_rate": 9.60932068381985e-05, "loss": 0.8864, "step": 39670 }, { "epoch": 0.25350421016316776, "grad_norm": 0.7528826594352722, "learning_rate": 9.609126218671411e-05, "loss": 0.921, "step": 39680 }, { "epoch": 0.25356809731290647, "grad_norm": 0.8979531526565552, "learning_rate": 9.608931707105085e-05, "loss": 0.7506, "step": 39690 }, { "epoch": 0.2536319844626452, "grad_norm": 0.8471142649650574, "learning_rate": 9.608737149122829e-05, "loss": 0.7908, "step": 39700 }, { "epoch": 0.2536958716123839, "grad_norm": 1.1389135122299194, "learning_rate": 9.608542544726603e-05, "loss": 0.6934, "step": 39710 }, { "epoch": 0.2537597587621226, "grad_norm": 0.7484509944915771, "learning_rate": 9.608347893918366e-05, "loss": 0.8213, "step": 39720 }, { "epoch": 0.2538236459118613, "grad_norm": 1.0670719146728516, "learning_rate": 9.608153196700078e-05, "loss": 0.8251, "step": 39730 }, { "epoch": 0.2538875330616, "grad_norm": 0.6782344579696655, "learning_rate": 9.607958453073702e-05, "loss": 1.1657, "step": 39740 }, { "epoch": 0.2539514202113387, "grad_norm": 1.3708243370056152, "learning_rate": 9.607763663041198e-05, "loss": 0.815, "step": 39750 }, { "epoch": 0.2540153073610774, "grad_norm": 0.6719133853912354, "learning_rate": 9.607568826604528e-05, "loss": 0.8005, "step": 39760 }, { "epoch": 0.2540791945108161, "grad_norm": 0.8270106315612793, "learning_rate": 9.607373943765652e-05, "loss": 0.9914, "step": 39770 }, { "epoch": 0.25414308166055477, "grad_norm": 0.9956563711166382, "learning_rate": 9.607179014526535e-05, "loss": 0.8496, "step": 39780 }, { "epoch": 0.2542069688102935, "grad_norm": 1.2813925743103027, "learning_rate": 9.60698403888914e-05, "loss": 0.8953, "step": 39790 }, { "epoch": 0.2542708559600322, "grad_norm": 0.9512122273445129, "learning_rate": 9.60678901685543e-05, "loss": 1.2809, "step": 39800 }, { "epoch": 0.2543347431097709, "grad_norm": 0.9539186954498291, "learning_rate": 9.60659394842737e-05, "loss": 1.0062, "step": 39810 }, { "epoch": 0.2543986302595096, "grad_norm": 0.963093101978302, "learning_rate": 9.606398833606923e-05, "loss": 0.9276, "step": 39820 }, { "epoch": 0.2544625174092483, "grad_norm": 0.8398544192314148, "learning_rate": 9.606203672396055e-05, "loss": 1.2115, "step": 39830 }, { "epoch": 0.254526404558987, "grad_norm": 0.7242417335510254, "learning_rate": 9.60600846479673e-05, "loss": 0.7489, "step": 39840 }, { "epoch": 0.2545902917087257, "grad_norm": 1.4944490194320679, "learning_rate": 9.605813210810917e-05, "loss": 1.3959, "step": 39850 }, { "epoch": 0.2546541788584644, "grad_norm": 0.8167847394943237, "learning_rate": 9.605617910440579e-05, "loss": 0.8212, "step": 39860 }, { "epoch": 0.2547180660082031, "grad_norm": 0.7867516279220581, "learning_rate": 9.605422563687684e-05, "loss": 1.1342, "step": 39870 }, { "epoch": 0.25478195315794183, "grad_norm": 1.030428171157837, "learning_rate": 9.605227170554201e-05, "loss": 1.0195, "step": 39880 }, { "epoch": 0.25484584030768054, "grad_norm": 0.6590962409973145, "learning_rate": 9.605031731042094e-05, "loss": 0.928, "step": 39890 }, { "epoch": 0.2549097274574192, "grad_norm": 1.0177749395370483, "learning_rate": 9.604836245153334e-05, "loss": 0.8701, "step": 39900 }, { "epoch": 0.2549736146071579, "grad_norm": 0.6760947108268738, "learning_rate": 9.604640712889891e-05, "loss": 0.75, "step": 39910 }, { "epoch": 0.2550375017568966, "grad_norm": 0.6182072758674622, "learning_rate": 9.604445134253731e-05, "loss": 0.7936, "step": 39920 }, { "epoch": 0.2551013889066353, "grad_norm": 2.0102100372314453, "learning_rate": 9.604249509246826e-05, "loss": 0.7663, "step": 39930 }, { "epoch": 0.255165276056374, "grad_norm": 0.7477763295173645, "learning_rate": 9.604053837871145e-05, "loss": 0.7996, "step": 39940 }, { "epoch": 0.2552291632061127, "grad_norm": 0.8741987943649292, "learning_rate": 9.603858120128658e-05, "loss": 0.6339, "step": 39950 }, { "epoch": 0.2552930503558514, "grad_norm": 0.9012942314147949, "learning_rate": 9.603662356021337e-05, "loss": 1.0624, "step": 39960 }, { "epoch": 0.25535693750559013, "grad_norm": 0.9270766973495483, "learning_rate": 9.603466545551155e-05, "loss": 0.8575, "step": 39970 }, { "epoch": 0.25542082465532884, "grad_norm": 1.3213611841201782, "learning_rate": 9.603270688720081e-05, "loss": 1.0033, "step": 39980 }, { "epoch": 0.25548471180506754, "grad_norm": 1.1009562015533447, "learning_rate": 9.603074785530088e-05, "loss": 1.2523, "step": 39990 }, { "epoch": 0.25554859895480625, "grad_norm": 0.6423203945159912, "learning_rate": 9.602878835983151e-05, "loss": 1.3048, "step": 40000 }, { "epoch": 0.25561248610454496, "grad_norm": 0.747044026851654, "learning_rate": 9.60268284008124e-05, "loss": 0.8828, "step": 40010 }, { "epoch": 0.2556763732542836, "grad_norm": 2.6469714641571045, "learning_rate": 9.602486797826333e-05, "loss": 1.0208, "step": 40020 }, { "epoch": 0.2557402604040223, "grad_norm": 1.1847596168518066, "learning_rate": 9.602290709220403e-05, "loss": 1.0072, "step": 40030 }, { "epoch": 0.255804147553761, "grad_norm": 2.8336246013641357, "learning_rate": 9.602094574265421e-05, "loss": 1.1539, "step": 40040 }, { "epoch": 0.2558680347034997, "grad_norm": 0.8645704388618469, "learning_rate": 9.601898392963368e-05, "loss": 0.9056, "step": 40050 }, { "epoch": 0.25593192185323843, "grad_norm": 1.049857258796692, "learning_rate": 9.601702165316216e-05, "loss": 0.9048, "step": 40060 }, { "epoch": 0.25599580900297714, "grad_norm": 0.4633677005767822, "learning_rate": 9.601505891325941e-05, "loss": 0.8304, "step": 40070 }, { "epoch": 0.25605969615271584, "grad_norm": 0.9349238872528076, "learning_rate": 9.601309570994522e-05, "loss": 0.8373, "step": 40080 }, { "epoch": 0.25612358330245455, "grad_norm": 0.8270478844642639, "learning_rate": 9.601113204323935e-05, "loss": 0.9072, "step": 40090 }, { "epoch": 0.25618747045219326, "grad_norm": 0.9788760542869568, "learning_rate": 9.600916791316157e-05, "loss": 0.9037, "step": 40100 }, { "epoch": 0.25625135760193196, "grad_norm": 0.9276544451713562, "learning_rate": 9.600720331973167e-05, "loss": 0.855, "step": 40110 }, { "epoch": 0.25631524475167067, "grad_norm": 0.6395992636680603, "learning_rate": 9.600523826296943e-05, "loss": 0.7903, "step": 40120 }, { "epoch": 0.2563791319014094, "grad_norm": 0.8007004261016846, "learning_rate": 9.600327274289464e-05, "loss": 0.8177, "step": 40130 }, { "epoch": 0.256443019051148, "grad_norm": 1.1678056716918945, "learning_rate": 9.60013067595271e-05, "loss": 0.8526, "step": 40140 }, { "epoch": 0.25650690620088673, "grad_norm": 0.8026590347290039, "learning_rate": 9.59993403128866e-05, "loss": 0.9358, "step": 40150 }, { "epoch": 0.25657079335062544, "grad_norm": 1.020652174949646, "learning_rate": 9.599737340299294e-05, "loss": 1.0027, "step": 40160 }, { "epoch": 0.25663468050036414, "grad_norm": 0.8998063206672668, "learning_rate": 9.599540602986594e-05, "loss": 0.9003, "step": 40170 }, { "epoch": 0.25669856765010285, "grad_norm": 1.1373684406280518, "learning_rate": 9.599343819352542e-05, "loss": 0.9378, "step": 40180 }, { "epoch": 0.25676245479984156, "grad_norm": 0.9886625409126282, "learning_rate": 9.599146989399117e-05, "loss": 0.7858, "step": 40190 }, { "epoch": 0.25682634194958026, "grad_norm": 0.899255633354187, "learning_rate": 9.598950113128304e-05, "loss": 0.829, "step": 40200 }, { "epoch": 0.25689022909931897, "grad_norm": 1.3470916748046875, "learning_rate": 9.598753190542086e-05, "loss": 1.1775, "step": 40210 }, { "epoch": 0.2569541162490577, "grad_norm": 0.6345730423927307, "learning_rate": 9.598556221642443e-05, "loss": 0.9636, "step": 40220 }, { "epoch": 0.2570180033987964, "grad_norm": 0.9716305136680603, "learning_rate": 9.598359206431362e-05, "loss": 0.7459, "step": 40230 }, { "epoch": 0.2570818905485351, "grad_norm": 0.5304000377655029, "learning_rate": 9.598162144910824e-05, "loss": 1.0725, "step": 40240 }, { "epoch": 0.2571457776982738, "grad_norm": 0.5957240462303162, "learning_rate": 9.597965037082817e-05, "loss": 0.6653, "step": 40250 }, { "epoch": 0.2572096648480125, "grad_norm": 0.4943158030509949, "learning_rate": 9.597767882949322e-05, "loss": 0.86, "step": 40260 }, { "epoch": 0.25727355199775115, "grad_norm": 0.5531061291694641, "learning_rate": 9.59757068251233e-05, "loss": 1.0038, "step": 40270 }, { "epoch": 0.25733743914748985, "grad_norm": 0.747992753982544, "learning_rate": 9.59737343577382e-05, "loss": 0.7299, "step": 40280 }, { "epoch": 0.25740132629722856, "grad_norm": 0.6985594034194946, "learning_rate": 9.597176142735784e-05, "loss": 0.7868, "step": 40290 }, { "epoch": 0.25746521344696727, "grad_norm": 0.7488262057304382, "learning_rate": 9.596978803400207e-05, "loss": 0.8295, "step": 40300 }, { "epoch": 0.257529100596706, "grad_norm": 1.0540517568588257, "learning_rate": 9.596781417769076e-05, "loss": 0.9902, "step": 40310 }, { "epoch": 0.2575929877464447, "grad_norm": 0.7886415123939514, "learning_rate": 9.596583985844381e-05, "loss": 1.1803, "step": 40320 }, { "epoch": 0.2576568748961834, "grad_norm": 0.8558555841445923, "learning_rate": 9.596386507628108e-05, "loss": 0.7431, "step": 40330 }, { "epoch": 0.2577207620459221, "grad_norm": 1.4910976886749268, "learning_rate": 9.596188983122246e-05, "loss": 0.804, "step": 40340 }, { "epoch": 0.2577846491956608, "grad_norm": 0.7117838859558105, "learning_rate": 9.595991412328784e-05, "loss": 1.1232, "step": 40350 }, { "epoch": 0.2578485363453995, "grad_norm": 1.4588252305984497, "learning_rate": 9.595793795249714e-05, "loss": 0.8111, "step": 40360 }, { "epoch": 0.2579124234951382, "grad_norm": 1.322583794593811, "learning_rate": 9.595596131887024e-05, "loss": 0.6423, "step": 40370 }, { "epoch": 0.2579763106448769, "grad_norm": 1.1012037992477417, "learning_rate": 9.595398422242702e-05, "loss": 1.0775, "step": 40380 }, { "epoch": 0.25804019779461557, "grad_norm": 0.7852954864501953, "learning_rate": 9.595200666318746e-05, "loss": 0.9674, "step": 40390 }, { "epoch": 0.2581040849443543, "grad_norm": 0.9846484661102295, "learning_rate": 9.595002864117144e-05, "loss": 1.0256, "step": 40400 }, { "epoch": 0.258167972094093, "grad_norm": 0.7954578399658203, "learning_rate": 9.594805015639887e-05, "loss": 0.8524, "step": 40410 }, { "epoch": 0.2582318592438317, "grad_norm": 1.4870191812515259, "learning_rate": 9.594607120888968e-05, "loss": 1.0345, "step": 40420 }, { "epoch": 0.2582957463935704, "grad_norm": 0.8756714463233948, "learning_rate": 9.594409179866382e-05, "loss": 0.7956, "step": 40430 }, { "epoch": 0.2583596335433091, "grad_norm": 0.9294307231903076, "learning_rate": 9.594211192574119e-05, "loss": 1.2266, "step": 40440 }, { "epoch": 0.2584235206930478, "grad_norm": 0.6904338598251343, "learning_rate": 9.594013159014174e-05, "loss": 1.1333, "step": 40450 }, { "epoch": 0.2584874078427865, "grad_norm": 0.9498498439788818, "learning_rate": 9.593815079188544e-05, "loss": 0.8015, "step": 40460 }, { "epoch": 0.2585512949925252, "grad_norm": 0.686410665512085, "learning_rate": 9.593616953099222e-05, "loss": 1.0482, "step": 40470 }, { "epoch": 0.2586151821422639, "grad_norm": 0.8870381116867065, "learning_rate": 9.593418780748203e-05, "loss": 0.9889, "step": 40480 }, { "epoch": 0.25867906929200263, "grad_norm": 0.6513268947601318, "learning_rate": 9.593220562137481e-05, "loss": 0.8747, "step": 40490 }, { "epoch": 0.25874295644174133, "grad_norm": 0.8651543855667114, "learning_rate": 9.593022297269056e-05, "loss": 0.8722, "step": 40500 }, { "epoch": 0.25880684359148, "grad_norm": 0.8488206267356873, "learning_rate": 9.592823986144923e-05, "loss": 0.8462, "step": 40510 }, { "epoch": 0.2588707307412187, "grad_norm": 1.4681724309921265, "learning_rate": 9.592625628767079e-05, "loss": 0.8677, "step": 40520 }, { "epoch": 0.2589346178909574, "grad_norm": 4.392560958862305, "learning_rate": 9.592427225137521e-05, "loss": 0.8277, "step": 40530 }, { "epoch": 0.2589985050406961, "grad_norm": 0.7526928782463074, "learning_rate": 9.59222877525825e-05, "loss": 0.9594, "step": 40540 }, { "epoch": 0.2590623921904348, "grad_norm": 0.8426737785339355, "learning_rate": 9.59203027913126e-05, "loss": 0.9697, "step": 40550 }, { "epoch": 0.2591262793401735, "grad_norm": 1.0020760297775269, "learning_rate": 9.591831736758553e-05, "loss": 1.0801, "step": 40560 }, { "epoch": 0.2591901664899122, "grad_norm": 0.746593177318573, "learning_rate": 9.591633148142129e-05, "loss": 0.9383, "step": 40570 }, { "epoch": 0.2592540536396509, "grad_norm": 0.7140881419181824, "learning_rate": 9.591434513283986e-05, "loss": 0.8904, "step": 40580 }, { "epoch": 0.25931794078938963, "grad_norm": 0.9984216690063477, "learning_rate": 9.591235832186125e-05, "loss": 0.8797, "step": 40590 }, { "epoch": 0.25938182793912834, "grad_norm": 0.9869062304496765, "learning_rate": 9.591037104850546e-05, "loss": 0.8081, "step": 40600 }, { "epoch": 0.25944571508886705, "grad_norm": 1.0706651210784912, "learning_rate": 9.590838331279255e-05, "loss": 1.1206, "step": 40610 }, { "epoch": 0.25950960223860575, "grad_norm": 1.028882384300232, "learning_rate": 9.590639511474248e-05, "loss": 0.9107, "step": 40620 }, { "epoch": 0.2595734893883444, "grad_norm": 1.1108314990997314, "learning_rate": 9.590440645437529e-05, "loss": 0.8844, "step": 40630 }, { "epoch": 0.2596373765380831, "grad_norm": 0.9321774840354919, "learning_rate": 9.590241733171104e-05, "loss": 0.8686, "step": 40640 }, { "epoch": 0.2597012636878218, "grad_norm": 1.029826283454895, "learning_rate": 9.59004277467697e-05, "loss": 1.1136, "step": 40650 }, { "epoch": 0.2597651508375605, "grad_norm": 0.908687949180603, "learning_rate": 9.589843769957138e-05, "loss": 0.7381, "step": 40660 }, { "epoch": 0.2598290379872992, "grad_norm": 0.565542459487915, "learning_rate": 9.589644719013607e-05, "loss": 0.9248, "step": 40670 }, { "epoch": 0.25989292513703793, "grad_norm": 0.7926294803619385, "learning_rate": 9.589445621848384e-05, "loss": 0.8401, "step": 40680 }, { "epoch": 0.25995681228677664, "grad_norm": 0.6880931854248047, "learning_rate": 9.589246478463471e-05, "loss": 1.0261, "step": 40690 }, { "epoch": 0.26002069943651535, "grad_norm": 1.0573099851608276, "learning_rate": 9.589047288860876e-05, "loss": 0.7308, "step": 40700 }, { "epoch": 0.26008458658625405, "grad_norm": 1.2168048620224, "learning_rate": 9.588848053042605e-05, "loss": 0.9018, "step": 40710 }, { "epoch": 0.26014847373599276, "grad_norm": 0.883126437664032, "learning_rate": 9.588648771010666e-05, "loss": 1.0485, "step": 40720 }, { "epoch": 0.26021236088573146, "grad_norm": 0.7025336623191833, "learning_rate": 9.588449442767062e-05, "loss": 0.8312, "step": 40730 }, { "epoch": 0.26027624803547017, "grad_norm": 1.2748278379440308, "learning_rate": 9.588250068313803e-05, "loss": 1.1546, "step": 40740 }, { "epoch": 0.2603401351852088, "grad_norm": 0.5718688368797302, "learning_rate": 9.588050647652898e-05, "loss": 1.0319, "step": 40750 }, { "epoch": 0.2604040223349475, "grad_norm": 0.8465229868888855, "learning_rate": 9.587851180786351e-05, "loss": 0.9557, "step": 40760 }, { "epoch": 0.26046790948468623, "grad_norm": 0.9384807348251343, "learning_rate": 9.587651667716175e-05, "loss": 0.9037, "step": 40770 }, { "epoch": 0.26053179663442494, "grad_norm": 0.8962541222572327, "learning_rate": 9.58745210844438e-05, "loss": 0.782, "step": 40780 }, { "epoch": 0.26059568378416365, "grad_norm": 1.1555324792861938, "learning_rate": 9.58725250297297e-05, "loss": 0.9214, "step": 40790 }, { "epoch": 0.26065957093390235, "grad_norm": 0.5348967909812927, "learning_rate": 9.587052851303961e-05, "loss": 0.9402, "step": 40800 }, { "epoch": 0.26072345808364106, "grad_norm": 0.9009259343147278, "learning_rate": 9.586853153439359e-05, "loss": 0.8024, "step": 40810 }, { "epoch": 0.26078734523337976, "grad_norm": 0.7219264507293701, "learning_rate": 9.58665340938118e-05, "loss": 0.6523, "step": 40820 }, { "epoch": 0.26085123238311847, "grad_norm": 0.5624069571495056, "learning_rate": 9.586453619131432e-05, "loss": 0.8817, "step": 40830 }, { "epoch": 0.2609151195328572, "grad_norm": 0.8910852074623108, "learning_rate": 9.586253782692129e-05, "loss": 0.9517, "step": 40840 }, { "epoch": 0.2609790066825959, "grad_norm": 0.9586001038551331, "learning_rate": 9.586053900065282e-05, "loss": 0.788, "step": 40850 }, { "epoch": 0.2610428938323346, "grad_norm": 0.696487307548523, "learning_rate": 9.585853971252905e-05, "loss": 0.8417, "step": 40860 }, { "epoch": 0.26110678098207324, "grad_norm": 0.5967971682548523, "learning_rate": 9.585653996257011e-05, "loss": 0.6904, "step": 40870 }, { "epoch": 0.26117066813181194, "grad_norm": 0.7355442643165588, "learning_rate": 9.585453975079615e-05, "loss": 0.8139, "step": 40880 }, { "epoch": 0.26123455528155065, "grad_norm": 1.0505903959274292, "learning_rate": 9.585253907722729e-05, "loss": 0.9211, "step": 40890 }, { "epoch": 0.26129844243128936, "grad_norm": 0.8949944376945496, "learning_rate": 9.58505379418837e-05, "loss": 0.7938, "step": 40900 }, { "epoch": 0.26136232958102806, "grad_norm": 0.9528142809867859, "learning_rate": 9.584853634478553e-05, "loss": 1.1102, "step": 40910 }, { "epoch": 0.26142621673076677, "grad_norm": 1.1576639413833618, "learning_rate": 9.584653428595294e-05, "loss": 1.1977, "step": 40920 }, { "epoch": 0.2614901038805055, "grad_norm": 0.895746648311615, "learning_rate": 9.584453176540607e-05, "loss": 0.9543, "step": 40930 }, { "epoch": 0.2615539910302442, "grad_norm": 0.5793939828872681, "learning_rate": 9.58425287831651e-05, "loss": 0.7111, "step": 40940 }, { "epoch": 0.2616178781799829, "grad_norm": 0.8070379495620728, "learning_rate": 9.584052533925023e-05, "loss": 0.9304, "step": 40950 }, { "epoch": 0.2616817653297216, "grad_norm": 0.7948583364486694, "learning_rate": 9.583852143368159e-05, "loss": 0.8819, "step": 40960 }, { "epoch": 0.2617456524794603, "grad_norm": 0.9934036731719971, "learning_rate": 9.58365170664794e-05, "loss": 0.9324, "step": 40970 }, { "epoch": 0.261809539629199, "grad_norm": 0.47576090693473816, "learning_rate": 9.583451223766382e-05, "loss": 0.686, "step": 40980 }, { "epoch": 0.26187342677893766, "grad_norm": 0.9381804466247559, "learning_rate": 9.583250694725505e-05, "loss": 1.1989, "step": 40990 }, { "epoch": 0.26193731392867636, "grad_norm": 0.6319658756256104, "learning_rate": 9.58305011952733e-05, "loss": 0.9446, "step": 41000 }, { "epoch": 0.26200120107841507, "grad_norm": 0.8187490701675415, "learning_rate": 9.582849498173873e-05, "loss": 1.0167, "step": 41010 }, { "epoch": 0.2620650882281538, "grad_norm": 0.682817816734314, "learning_rate": 9.582648830667157e-05, "loss": 0.8066, "step": 41020 }, { "epoch": 0.2621289753778925, "grad_norm": 1.3718403577804565, "learning_rate": 9.582448117009205e-05, "loss": 0.6587, "step": 41030 }, { "epoch": 0.2621928625276312, "grad_norm": 0.7690182328224182, "learning_rate": 9.582247357202035e-05, "loss": 0.8507, "step": 41040 }, { "epoch": 0.2622567496773699, "grad_norm": 1.0088491439819336, "learning_rate": 9.58204655124767e-05, "loss": 0.907, "step": 41050 }, { "epoch": 0.2623206368271086, "grad_norm": 0.912486732006073, "learning_rate": 9.581845699148132e-05, "loss": 1.0735, "step": 41060 }, { "epoch": 0.2623845239768473, "grad_norm": 0.9121546149253845, "learning_rate": 9.581644800905442e-05, "loss": 0.8556, "step": 41070 }, { "epoch": 0.262448411126586, "grad_norm": 0.8414210677146912, "learning_rate": 9.581443856521628e-05, "loss": 1.1905, "step": 41080 }, { "epoch": 0.2625122982763247, "grad_norm": 0.5232017040252686, "learning_rate": 9.58124286599871e-05, "loss": 0.8749, "step": 41090 }, { "epoch": 0.2625761854260634, "grad_norm": 1.9335732460021973, "learning_rate": 9.581041829338712e-05, "loss": 0.7256, "step": 41100 }, { "epoch": 0.26264007257580213, "grad_norm": 0.7388540506362915, "learning_rate": 9.58084074654366e-05, "loss": 0.8657, "step": 41110 }, { "epoch": 0.2627039597255408, "grad_norm": 1.9808521270751953, "learning_rate": 9.580639617615579e-05, "loss": 0.7139, "step": 41120 }, { "epoch": 0.2627678468752795, "grad_norm": 1.5845258235931396, "learning_rate": 9.580438442556494e-05, "loss": 0.9972, "step": 41130 }, { "epoch": 0.2628317340250182, "grad_norm": 0.8241519331932068, "learning_rate": 9.580237221368431e-05, "loss": 0.7273, "step": 41140 }, { "epoch": 0.2628956211747569, "grad_norm": 0.8821679353713989, "learning_rate": 9.580035954053418e-05, "loss": 0.8493, "step": 41150 }, { "epoch": 0.2629595083244956, "grad_norm": 0.7632741332054138, "learning_rate": 9.57983464061348e-05, "loss": 1.0987, "step": 41160 }, { "epoch": 0.2630233954742343, "grad_norm": 0.6200475692749023, "learning_rate": 9.579633281050644e-05, "loss": 0.7774, "step": 41170 }, { "epoch": 0.263087282623973, "grad_norm": 0.7186120748519897, "learning_rate": 9.57943187536694e-05, "loss": 1.0146, "step": 41180 }, { "epoch": 0.2631511697737117, "grad_norm": 1.4124023914337158, "learning_rate": 9.579230423564395e-05, "loss": 0.9421, "step": 41190 }, { "epoch": 0.26321505692345043, "grad_norm": 1.0203825235366821, "learning_rate": 9.579028925645038e-05, "loss": 0.7599, "step": 41200 }, { "epoch": 0.26327894407318914, "grad_norm": 0.9980260729789734, "learning_rate": 9.578827381610899e-05, "loss": 0.7085, "step": 41210 }, { "epoch": 0.26334283122292784, "grad_norm": 0.6271802186965942, "learning_rate": 9.578625791464006e-05, "loss": 0.7378, "step": 41220 }, { "epoch": 0.26340671837266655, "grad_norm": 0.8588720560073853, "learning_rate": 9.578424155206392e-05, "loss": 1.1045, "step": 41230 }, { "epoch": 0.2634706055224052, "grad_norm": 0.9197202920913696, "learning_rate": 9.578222472840083e-05, "loss": 0.7892, "step": 41240 }, { "epoch": 0.2635344926721439, "grad_norm": 1.5513139963150024, "learning_rate": 9.578020744367115e-05, "loss": 0.9384, "step": 41250 }, { "epoch": 0.2635983798218826, "grad_norm": 0.952202320098877, "learning_rate": 9.577818969789516e-05, "loss": 1.0154, "step": 41260 }, { "epoch": 0.2636622669716213, "grad_norm": 0.7039241790771484, "learning_rate": 9.577617149109322e-05, "loss": 0.8493, "step": 41270 }, { "epoch": 0.26372615412136, "grad_norm": 0.8046781420707703, "learning_rate": 9.577415282328561e-05, "loss": 0.8281, "step": 41280 }, { "epoch": 0.26379004127109873, "grad_norm": 1.6643345355987549, "learning_rate": 9.57721336944927e-05, "loss": 0.9271, "step": 41290 }, { "epoch": 0.26385392842083744, "grad_norm": 0.9182053208351135, "learning_rate": 9.577011410473477e-05, "loss": 0.9575, "step": 41300 }, { "epoch": 0.26391781557057614, "grad_norm": 1.2427566051483154, "learning_rate": 9.576809405403222e-05, "loss": 1.1546, "step": 41310 }, { "epoch": 0.26398170272031485, "grad_norm": 1.6159720420837402, "learning_rate": 9.576607354240536e-05, "loss": 0.8444, "step": 41320 }, { "epoch": 0.26404558987005355, "grad_norm": 1.222095012664795, "learning_rate": 9.576405256987456e-05, "loss": 1.0805, "step": 41330 }, { "epoch": 0.26410947701979226, "grad_norm": 0.5747536420822144, "learning_rate": 9.576203113646015e-05, "loss": 0.9668, "step": 41340 }, { "epoch": 0.26417336416953097, "grad_norm": 0.505827009677887, "learning_rate": 9.576000924218249e-05, "loss": 0.812, "step": 41350 }, { "epoch": 0.2642372513192696, "grad_norm": 0.9075201749801636, "learning_rate": 9.575798688706196e-05, "loss": 0.907, "step": 41360 }, { "epoch": 0.2643011384690083, "grad_norm": 0.6353416442871094, "learning_rate": 9.575596407111891e-05, "loss": 0.9169, "step": 41370 }, { "epoch": 0.26436502561874703, "grad_norm": 0.8017897009849548, "learning_rate": 9.575394079437372e-05, "loss": 0.8862, "step": 41380 }, { "epoch": 0.26442891276848574, "grad_norm": 0.6533048748970032, "learning_rate": 9.575191705684676e-05, "loss": 1.004, "step": 41390 }, { "epoch": 0.26449279991822444, "grad_norm": 0.6979532837867737, "learning_rate": 9.574989285855842e-05, "loss": 0.7255, "step": 41400 }, { "epoch": 0.26455668706796315, "grad_norm": 1.3084895610809326, "learning_rate": 9.574786819952908e-05, "loss": 0.9328, "step": 41410 }, { "epoch": 0.26462057421770185, "grad_norm": 0.8081639409065247, "learning_rate": 9.574584307977912e-05, "loss": 1.1026, "step": 41420 }, { "epoch": 0.26468446136744056, "grad_norm": 0.6545292735099792, "learning_rate": 9.574381749932894e-05, "loss": 0.8194, "step": 41430 }, { "epoch": 0.26474834851717927, "grad_norm": 1.0248223543167114, "learning_rate": 9.574179145819898e-05, "loss": 0.9576, "step": 41440 }, { "epoch": 0.26481223566691797, "grad_norm": 1.235369086265564, "learning_rate": 9.573976495640958e-05, "loss": 0.758, "step": 41450 }, { "epoch": 0.2648761228166567, "grad_norm": 0.8014651536941528, "learning_rate": 9.573773799398116e-05, "loss": 0.7959, "step": 41460 }, { "epoch": 0.2649400099663954, "grad_norm": 0.6294002532958984, "learning_rate": 9.573571057093418e-05, "loss": 0.9757, "step": 41470 }, { "epoch": 0.26500389711613404, "grad_norm": 0.9327560663223267, "learning_rate": 9.573368268728901e-05, "loss": 1.1375, "step": 41480 }, { "epoch": 0.26506778426587274, "grad_norm": 1.0414352416992188, "learning_rate": 9.57316543430661e-05, "loss": 1.0202, "step": 41490 }, { "epoch": 0.26513167141561145, "grad_norm": 1.695721983909607, "learning_rate": 9.572962553828586e-05, "loss": 0.8127, "step": 41500 }, { "epoch": 0.26519555856535015, "grad_norm": 0.8339122533798218, "learning_rate": 9.572759627296872e-05, "loss": 0.7414, "step": 41510 }, { "epoch": 0.26525944571508886, "grad_norm": 0.8159180283546448, "learning_rate": 9.572556654713514e-05, "loss": 0.811, "step": 41520 }, { "epoch": 0.26532333286482757, "grad_norm": 0.9597871899604797, "learning_rate": 9.572353636080555e-05, "loss": 1.089, "step": 41530 }, { "epoch": 0.26538722001456627, "grad_norm": 0.5867129564285278, "learning_rate": 9.572150571400038e-05, "loss": 0.9204, "step": 41540 }, { "epoch": 0.265451107164305, "grad_norm": 0.706292450428009, "learning_rate": 9.57194746067401e-05, "loss": 0.8317, "step": 41550 }, { "epoch": 0.2655149943140437, "grad_norm": 1.7897143363952637, "learning_rate": 9.571744303904515e-05, "loss": 0.9645, "step": 41560 }, { "epoch": 0.2655788814637824, "grad_norm": 0.9594964981079102, "learning_rate": 9.571541101093602e-05, "loss": 0.9729, "step": 41570 }, { "epoch": 0.2656427686135211, "grad_norm": 0.7055974006652832, "learning_rate": 9.571337852243313e-05, "loss": 0.8736, "step": 41580 }, { "epoch": 0.2657066557632598, "grad_norm": 0.8249185681343079, "learning_rate": 9.571134557355697e-05, "loss": 0.9665, "step": 41590 }, { "epoch": 0.26577054291299845, "grad_norm": 0.5963894128799438, "learning_rate": 9.570931216432801e-05, "loss": 0.8331, "step": 41600 }, { "epoch": 0.26583443006273716, "grad_norm": 0.5887112617492676, "learning_rate": 9.570727829476676e-05, "loss": 0.8705, "step": 41610 }, { "epoch": 0.26589831721247587, "grad_norm": 0.7210960388183594, "learning_rate": 9.570524396489365e-05, "loss": 0.7763, "step": 41620 }, { "epoch": 0.26596220436221457, "grad_norm": 0.8247012495994568, "learning_rate": 9.570320917472919e-05, "loss": 0.8148, "step": 41630 }, { "epoch": 0.2660260915119533, "grad_norm": 0.8511599898338318, "learning_rate": 9.57011739242939e-05, "loss": 0.9971, "step": 41640 }, { "epoch": 0.266089978661692, "grad_norm": 0.6672869324684143, "learning_rate": 9.569913821360824e-05, "loss": 0.9768, "step": 41650 }, { "epoch": 0.2661538658114307, "grad_norm": 1.1974848508834839, "learning_rate": 9.569710204269271e-05, "loss": 0.991, "step": 41660 }, { "epoch": 0.2662177529611694, "grad_norm": 0.8013436198234558, "learning_rate": 9.569506541156784e-05, "loss": 0.8786, "step": 41670 }, { "epoch": 0.2662816401109081, "grad_norm": 0.7496063113212585, "learning_rate": 9.569302832025413e-05, "loss": 0.858, "step": 41680 }, { "epoch": 0.2663455272606468, "grad_norm": 0.8424622416496277, "learning_rate": 9.569099076877208e-05, "loss": 0.6938, "step": 41690 }, { "epoch": 0.2664094144103855, "grad_norm": 0.8976988196372986, "learning_rate": 9.568895275714225e-05, "loss": 0.9233, "step": 41700 }, { "epoch": 0.2664733015601242, "grad_norm": 1.4451935291290283, "learning_rate": 9.568691428538512e-05, "loss": 1.0334, "step": 41710 }, { "epoch": 0.26653718870986287, "grad_norm": 1.2237658500671387, "learning_rate": 9.568487535352124e-05, "loss": 0.8801, "step": 41720 }, { "epoch": 0.2666010758596016, "grad_norm": 2.1000301837921143, "learning_rate": 9.568283596157115e-05, "loss": 1.0871, "step": 41730 }, { "epoch": 0.2666649630093403, "grad_norm": 0.8955364227294922, "learning_rate": 9.568079610955539e-05, "loss": 0.8636, "step": 41740 }, { "epoch": 0.266728850159079, "grad_norm": 0.9859808087348938, "learning_rate": 9.567875579749447e-05, "loss": 0.9954, "step": 41750 }, { "epoch": 0.2667927373088177, "grad_norm": 1.0908890962600708, "learning_rate": 9.567671502540897e-05, "loss": 0.8674, "step": 41760 }, { "epoch": 0.2668566244585564, "grad_norm": 0.48835402727127075, "learning_rate": 9.567467379331943e-05, "loss": 1.2828, "step": 41770 }, { "epoch": 0.2669205116082951, "grad_norm": 0.7387278079986572, "learning_rate": 9.567263210124641e-05, "loss": 0.9535, "step": 41780 }, { "epoch": 0.2669843987580338, "grad_norm": 0.813470184803009, "learning_rate": 9.567058994921049e-05, "loss": 0.9851, "step": 41790 }, { "epoch": 0.2670482859077725, "grad_norm": 0.8729559779167175, "learning_rate": 9.566854733723221e-05, "loss": 0.8694, "step": 41800 }, { "epoch": 0.2671121730575112, "grad_norm": 0.9543429613113403, "learning_rate": 9.566650426533214e-05, "loss": 0.8436, "step": 41810 }, { "epoch": 0.26717606020724993, "grad_norm": 1.1418914794921875, "learning_rate": 9.566446073353089e-05, "loss": 0.9252, "step": 41820 }, { "epoch": 0.26723994735698864, "grad_norm": 0.9289169907569885, "learning_rate": 9.566241674184898e-05, "loss": 0.9306, "step": 41830 }, { "epoch": 0.2673038345067273, "grad_norm": 1.3721492290496826, "learning_rate": 9.566037229030704e-05, "loss": 1.0788, "step": 41840 }, { "epoch": 0.267367721656466, "grad_norm": 0.7210074067115784, "learning_rate": 9.565832737892566e-05, "loss": 0.7705, "step": 41850 }, { "epoch": 0.2674316088062047, "grad_norm": 0.6854256391525269, "learning_rate": 9.565628200772542e-05, "loss": 0.8524, "step": 41860 }, { "epoch": 0.2674954959559434, "grad_norm": 1.1195999383926392, "learning_rate": 9.565423617672691e-05, "loss": 1.0449, "step": 41870 }, { "epoch": 0.2675593831056821, "grad_norm": 1.1100611686706543, "learning_rate": 9.565218988595077e-05, "loss": 0.9949, "step": 41880 }, { "epoch": 0.2676232702554208, "grad_norm": 0.6192795038223267, "learning_rate": 9.565014313541756e-05, "loss": 1.0524, "step": 41890 }, { "epoch": 0.2676871574051595, "grad_norm": 1.138809084892273, "learning_rate": 9.564809592514793e-05, "loss": 1.0164, "step": 41900 }, { "epoch": 0.26775104455489823, "grad_norm": 0.6620566844940186, "learning_rate": 9.564604825516248e-05, "loss": 0.9091, "step": 41910 }, { "epoch": 0.26781493170463694, "grad_norm": 0.8734396696090698, "learning_rate": 9.564400012548183e-05, "loss": 1.1338, "step": 41920 }, { "epoch": 0.26787881885437564, "grad_norm": 0.5580737590789795, "learning_rate": 9.56419515361266e-05, "loss": 0.8139, "step": 41930 }, { "epoch": 0.26794270600411435, "grad_norm": 0.8338034152984619, "learning_rate": 9.563990248711745e-05, "loss": 0.8791, "step": 41940 }, { "epoch": 0.26800659315385306, "grad_norm": 0.557685136795044, "learning_rate": 9.563785297847501e-05, "loss": 0.7298, "step": 41950 }, { "epoch": 0.26807048030359176, "grad_norm": 0.8459478616714478, "learning_rate": 9.563580301021988e-05, "loss": 0.9614, "step": 41960 }, { "epoch": 0.2681343674533304, "grad_norm": 0.918144166469574, "learning_rate": 9.563375258237275e-05, "loss": 0.9374, "step": 41970 }, { "epoch": 0.2681982546030691, "grad_norm": 0.9142857193946838, "learning_rate": 9.563170169495424e-05, "loss": 0.8053, "step": 41980 }, { "epoch": 0.2682621417528078, "grad_norm": 0.9095722436904907, "learning_rate": 9.562965034798502e-05, "loss": 0.9772, "step": 41990 }, { "epoch": 0.26832602890254653, "grad_norm": 1.0715844631195068, "learning_rate": 9.562759854148575e-05, "loss": 0.8129, "step": 42000 }, { "epoch": 0.26838991605228524, "grad_norm": 0.8338362574577332, "learning_rate": 9.562554627547709e-05, "loss": 0.9305, "step": 42010 }, { "epoch": 0.26845380320202394, "grad_norm": 0.709132969379425, "learning_rate": 9.562349354997971e-05, "loss": 0.7656, "step": 42020 }, { "epoch": 0.26851769035176265, "grad_norm": 1.1715177297592163, "learning_rate": 9.562144036501428e-05, "loss": 0.9993, "step": 42030 }, { "epoch": 0.26858157750150136, "grad_norm": 1.6266652345657349, "learning_rate": 9.561938672060147e-05, "loss": 0.8534, "step": 42040 }, { "epoch": 0.26864546465124006, "grad_norm": 0.8818448781967163, "learning_rate": 9.561733261676196e-05, "loss": 0.8328, "step": 42050 }, { "epoch": 0.26870935180097877, "grad_norm": 1.15716552734375, "learning_rate": 9.561527805351646e-05, "loss": 0.7874, "step": 42060 }, { "epoch": 0.2687732389507175, "grad_norm": 0.913690984249115, "learning_rate": 9.561322303088565e-05, "loss": 0.7483, "step": 42070 }, { "epoch": 0.2688371261004562, "grad_norm": 0.644812822341919, "learning_rate": 9.561116754889022e-05, "loss": 1.011, "step": 42080 }, { "epoch": 0.26890101325019483, "grad_norm": 0.7774586081504822, "learning_rate": 9.560911160755088e-05, "loss": 1.0542, "step": 42090 }, { "epoch": 0.26896490039993354, "grad_norm": 0.4670238494873047, "learning_rate": 9.56070552068883e-05, "loss": 0.9087, "step": 42100 }, { "epoch": 0.26902878754967224, "grad_norm": 1.1221692562103271, "learning_rate": 9.560499834692325e-05, "loss": 0.8704, "step": 42110 }, { "epoch": 0.26909267469941095, "grad_norm": 1.4995726346969604, "learning_rate": 9.56029410276764e-05, "loss": 0.9453, "step": 42120 }, { "epoch": 0.26915656184914966, "grad_norm": 0.8239421844482422, "learning_rate": 9.56008832491685e-05, "loss": 0.8527, "step": 42130 }, { "epoch": 0.26922044899888836, "grad_norm": 0.7687192559242249, "learning_rate": 9.559882501142024e-05, "loss": 1.0542, "step": 42140 }, { "epoch": 0.26928433614862707, "grad_norm": 0.8161906003952026, "learning_rate": 9.559676631445236e-05, "loss": 0.8796, "step": 42150 }, { "epoch": 0.2693482232983658, "grad_norm": 0.7608240842819214, "learning_rate": 9.559470715828559e-05, "loss": 0.8504, "step": 42160 }, { "epoch": 0.2694121104481045, "grad_norm": 1.2632617950439453, "learning_rate": 9.559264754294068e-05, "loss": 0.6786, "step": 42170 }, { "epoch": 0.2694759975978432, "grad_norm": 1.0786528587341309, "learning_rate": 9.55907934965501e-05, "loss": 1.0747, "step": 42180 }, { "epoch": 0.2695398847475819, "grad_norm": 0.9386286735534668, "learning_rate": 9.558873300882385e-05, "loss": 0.6627, "step": 42190 }, { "epoch": 0.2696037718973206, "grad_norm": 0.9134590029716492, "learning_rate": 9.558667206197964e-05, "loss": 0.7985, "step": 42200 }, { "epoch": 0.26966765904705925, "grad_norm": 0.6581794619560242, "learning_rate": 9.55846106560382e-05, "loss": 0.7476, "step": 42210 }, { "epoch": 0.26973154619679796, "grad_norm": 0.7346853017807007, "learning_rate": 9.558254879102028e-05, "loss": 1.1158, "step": 42220 }, { "epoch": 0.26979543334653666, "grad_norm": 0.7923113107681274, "learning_rate": 9.558048646694668e-05, "loss": 1.0275, "step": 42230 }, { "epoch": 0.26985932049627537, "grad_norm": 0.9055522680282593, "learning_rate": 9.557842368383813e-05, "loss": 0.7192, "step": 42240 }, { "epoch": 0.2699232076460141, "grad_norm": 0.5908991694450378, "learning_rate": 9.557636044171542e-05, "loss": 0.9693, "step": 42250 }, { "epoch": 0.2699870947957528, "grad_norm": 0.636661946773529, "learning_rate": 9.557429674059935e-05, "loss": 1.0553, "step": 42260 }, { "epoch": 0.2700509819454915, "grad_norm": 0.9865610599517822, "learning_rate": 9.557223258051069e-05, "loss": 1.0789, "step": 42270 }, { "epoch": 0.2701148690952302, "grad_norm": 0.9444893598556519, "learning_rate": 9.557016796147021e-05, "loss": 0.8252, "step": 42280 }, { "epoch": 0.2701787562449689, "grad_norm": 0.7374017238616943, "learning_rate": 9.556810288349871e-05, "loss": 0.9914, "step": 42290 }, { "epoch": 0.2702426433947076, "grad_norm": 0.772415041923523, "learning_rate": 9.5566037346617e-05, "loss": 0.7827, "step": 42300 }, { "epoch": 0.2703065305444463, "grad_norm": 1.0962374210357666, "learning_rate": 9.556397135084587e-05, "loss": 0.995, "step": 42310 }, { "epoch": 0.270370417694185, "grad_norm": 0.7097411751747131, "learning_rate": 9.556190489620612e-05, "loss": 0.8302, "step": 42320 }, { "epoch": 0.27043430484392367, "grad_norm": 0.7932478785514832, "learning_rate": 9.555983798271859e-05, "loss": 0.9678, "step": 42330 }, { "epoch": 0.2704981919936624, "grad_norm": 0.6816592812538147, "learning_rate": 9.555777061040407e-05, "loss": 1.0183, "step": 42340 }, { "epoch": 0.2705620791434011, "grad_norm": 0.6527500152587891, "learning_rate": 9.555570277928338e-05, "loss": 0.8971, "step": 42350 }, { "epoch": 0.2706259662931398, "grad_norm": 0.6478419899940491, "learning_rate": 9.555363448937735e-05, "loss": 0.8146, "step": 42360 }, { "epoch": 0.2706898534428785, "grad_norm": 0.5460163354873657, "learning_rate": 9.555156574070681e-05, "loss": 0.7972, "step": 42370 }, { "epoch": 0.2707537405926172, "grad_norm": 0.5501124262809753, "learning_rate": 9.554949653329262e-05, "loss": 0.6, "step": 42380 }, { "epoch": 0.2708176277423559, "grad_norm": 0.9783973693847656, "learning_rate": 9.554742686715557e-05, "loss": 0.9689, "step": 42390 }, { "epoch": 0.2708815148920946, "grad_norm": 1.4098743200302124, "learning_rate": 9.554535674231652e-05, "loss": 0.8839, "step": 42400 }, { "epoch": 0.2709454020418333, "grad_norm": 0.9374015927314758, "learning_rate": 9.554328615879636e-05, "loss": 1.1531, "step": 42410 }, { "epoch": 0.271009289191572, "grad_norm": 2.437901258468628, "learning_rate": 9.554121511661587e-05, "loss": 0.8329, "step": 42420 }, { "epoch": 0.27107317634131073, "grad_norm": 0.5805662870407104, "learning_rate": 9.553914361579597e-05, "loss": 0.8364, "step": 42430 }, { "epoch": 0.27113706349104943, "grad_norm": 0.8254538178443909, "learning_rate": 9.553707165635747e-05, "loss": 0.5683, "step": 42440 }, { "epoch": 0.2712009506407881, "grad_norm": 1.0397802591323853, "learning_rate": 9.55349992383213e-05, "loss": 0.9475, "step": 42450 }, { "epoch": 0.2712648377905268, "grad_norm": 1.111701488494873, "learning_rate": 9.553292636170827e-05, "loss": 0.9378, "step": 42460 }, { "epoch": 0.2713287249402655, "grad_norm": 1.4257961511611938, "learning_rate": 9.553085302653929e-05, "loss": 0.828, "step": 42470 }, { "epoch": 0.2713926120900042, "grad_norm": 1.0189239978790283, "learning_rate": 9.552877923283522e-05, "loss": 1.1691, "step": 42480 }, { "epoch": 0.2714564992397429, "grad_norm": 1.3065085411071777, "learning_rate": 9.552670498061697e-05, "loss": 1.0535, "step": 42490 }, { "epoch": 0.2715203863894816, "grad_norm": 0.6838773488998413, "learning_rate": 9.55246302699054e-05, "loss": 0.938, "step": 42500 }, { "epoch": 0.2715842735392203, "grad_norm": 0.9010002613067627, "learning_rate": 9.552255510072142e-05, "loss": 0.7285, "step": 42510 }, { "epoch": 0.27164816068895903, "grad_norm": 0.7863100171089172, "learning_rate": 9.552047947308593e-05, "loss": 0.8349, "step": 42520 }, { "epoch": 0.27171204783869773, "grad_norm": 1.3251279592514038, "learning_rate": 9.551840338701983e-05, "loss": 0.8618, "step": 42530 }, { "epoch": 0.27177593498843644, "grad_norm": 1.1294409036636353, "learning_rate": 9.551632684254405e-05, "loss": 0.9233, "step": 42540 }, { "epoch": 0.27183982213817515, "grad_norm": 1.4269248247146606, "learning_rate": 9.551424983967946e-05, "loss": 0.8823, "step": 42550 }, { "epoch": 0.27190370928791385, "grad_norm": 0.7050525546073914, "learning_rate": 9.551217237844701e-05, "loss": 0.8103, "step": 42560 }, { "epoch": 0.2719675964376525, "grad_norm": 1.4217504262924194, "learning_rate": 9.551009445886759e-05, "loss": 0.7929, "step": 42570 }, { "epoch": 0.2720314835873912, "grad_norm": 0.6999850869178772, "learning_rate": 9.550801608096216e-05, "loss": 1.3094, "step": 42580 }, { "epoch": 0.2720953707371299, "grad_norm": 0.5494612455368042, "learning_rate": 9.550593724475163e-05, "loss": 0.9256, "step": 42590 }, { "epoch": 0.2721592578868686, "grad_norm": 0.5456877946853638, "learning_rate": 9.550385795025696e-05, "loss": 0.8309, "step": 42600 }, { "epoch": 0.27222314503660733, "grad_norm": 0.6689077615737915, "learning_rate": 9.550177819749905e-05, "loss": 1.158, "step": 42610 }, { "epoch": 0.27228703218634603, "grad_norm": 0.811871349811554, "learning_rate": 9.54996979864989e-05, "loss": 0.6157, "step": 42620 }, { "epoch": 0.27235091933608474, "grad_norm": 0.5832274556159973, "learning_rate": 9.549761731727741e-05, "loss": 0.9875, "step": 42630 }, { "epoch": 0.27241480648582345, "grad_norm": 0.874345064163208, "learning_rate": 9.549553618985556e-05, "loss": 0.8906, "step": 42640 }, { "epoch": 0.27247869363556215, "grad_norm": 2.064990282058716, "learning_rate": 9.54934546042543e-05, "loss": 0.8612, "step": 42650 }, { "epoch": 0.27254258078530086, "grad_norm": 0.5960216522216797, "learning_rate": 9.549137256049459e-05, "loss": 0.8631, "step": 42660 }, { "epoch": 0.27260646793503956, "grad_norm": 1.0062336921691895, "learning_rate": 9.548929005859739e-05, "loss": 0.786, "step": 42670 }, { "epoch": 0.27267035508477827, "grad_norm": 0.9856522679328918, "learning_rate": 9.548720709858371e-05, "loss": 0.8347, "step": 42680 }, { "epoch": 0.272734242234517, "grad_norm": 1.2544548511505127, "learning_rate": 9.548512368047448e-05, "loss": 1.0405, "step": 42690 }, { "epoch": 0.2727981293842556, "grad_norm": 0.45234474539756775, "learning_rate": 9.548303980429072e-05, "loss": 0.8274, "step": 42700 }, { "epoch": 0.27286201653399433, "grad_norm": 0.7926174402236938, "learning_rate": 9.54809554700534e-05, "loss": 0.9903, "step": 42710 }, { "epoch": 0.27292590368373304, "grad_norm": 0.9858782291412354, "learning_rate": 9.547887067778352e-05, "loss": 0.8354, "step": 42720 }, { "epoch": 0.27298979083347175, "grad_norm": 0.5488384962081909, "learning_rate": 9.547678542750204e-05, "loss": 0.9663, "step": 42730 }, { "epoch": 0.27305367798321045, "grad_norm": 0.7685027122497559, "learning_rate": 9.547469971923001e-05, "loss": 0.8943, "step": 42740 }, { "epoch": 0.27311756513294916, "grad_norm": 0.910285472869873, "learning_rate": 9.54726135529884e-05, "loss": 0.7713, "step": 42750 }, { "epoch": 0.27318145228268786, "grad_norm": 1.956381916999817, "learning_rate": 9.547052692879825e-05, "loss": 0.7784, "step": 42760 }, { "epoch": 0.27324533943242657, "grad_norm": 0.7288005352020264, "learning_rate": 9.546843984668055e-05, "loss": 0.8306, "step": 42770 }, { "epoch": 0.2733092265821653, "grad_norm": 0.8818903565406799, "learning_rate": 9.54663523066563e-05, "loss": 0.8758, "step": 42780 }, { "epoch": 0.273373113731904, "grad_norm": 1.380008339881897, "learning_rate": 9.546426430874658e-05, "loss": 0.8951, "step": 42790 }, { "epoch": 0.2734370008816427, "grad_norm": 1.8795280456542969, "learning_rate": 9.546217585297236e-05, "loss": 0.8414, "step": 42800 }, { "epoch": 0.2735008880313814, "grad_norm": 1.4341343641281128, "learning_rate": 9.546008693935473e-05, "loss": 0.7366, "step": 42810 }, { "epoch": 0.27356477518112005, "grad_norm": 1.095348596572876, "learning_rate": 9.545799756791467e-05, "loss": 0.8262, "step": 42820 }, { "epoch": 0.27362866233085875, "grad_norm": 0.8619642853736877, "learning_rate": 9.545590773867325e-05, "loss": 0.9742, "step": 42830 }, { "epoch": 0.27369254948059746, "grad_norm": 0.8517597317695618, "learning_rate": 9.545381745165154e-05, "loss": 0.9214, "step": 42840 }, { "epoch": 0.27375643663033616, "grad_norm": 0.5588153600692749, "learning_rate": 9.545172670687053e-05, "loss": 0.7431, "step": 42850 }, { "epoch": 0.27382032378007487, "grad_norm": 0.8848083019256592, "learning_rate": 9.544963550435133e-05, "loss": 0.7999, "step": 42860 }, { "epoch": 0.2738842109298136, "grad_norm": 0.7159634232521057, "learning_rate": 9.544754384411499e-05, "loss": 0.9701, "step": 42870 }, { "epoch": 0.2739480980795523, "grad_norm": 0.8267273306846619, "learning_rate": 9.544545172618255e-05, "loss": 0.8219, "step": 42880 }, { "epoch": 0.274011985229291, "grad_norm": 1.47396981716156, "learning_rate": 9.54433591505751e-05, "loss": 1.1159, "step": 42890 }, { "epoch": 0.2740758723790297, "grad_norm": 0.6588977575302124, "learning_rate": 9.54412661173137e-05, "loss": 0.9557, "step": 42900 }, { "epoch": 0.2741397595287684, "grad_norm": 0.9745913743972778, "learning_rate": 9.543917262641944e-05, "loss": 0.9112, "step": 42910 }, { "epoch": 0.2742036466785071, "grad_norm": 0.8897466063499451, "learning_rate": 9.543707867791342e-05, "loss": 0.9167, "step": 42920 }, { "epoch": 0.2742675338282458, "grad_norm": 0.7316814064979553, "learning_rate": 9.543498427181669e-05, "loss": 0.8055, "step": 42930 }, { "epoch": 0.27433142097798446, "grad_norm": 1.263555645942688, "learning_rate": 9.543288940815036e-05, "loss": 0.9486, "step": 42940 }, { "epoch": 0.27439530812772317, "grad_norm": 0.6425541639328003, "learning_rate": 9.543079408693554e-05, "loss": 0.7572, "step": 42950 }, { "epoch": 0.2744591952774619, "grad_norm": 0.5203328728675842, "learning_rate": 9.542869830819332e-05, "loss": 0.7523, "step": 42960 }, { "epoch": 0.2745230824272006, "grad_norm": 0.5347851514816284, "learning_rate": 9.542660207194481e-05, "loss": 0.8578, "step": 42970 }, { "epoch": 0.2745869695769393, "grad_norm": 0.863381564617157, "learning_rate": 9.542450537821111e-05, "loss": 0.87, "step": 42980 }, { "epoch": 0.274650856726678, "grad_norm": 0.608485996723175, "learning_rate": 9.542240822701333e-05, "loss": 1.1265, "step": 42990 }, { "epoch": 0.2747147438764167, "grad_norm": 0.8107671737670898, "learning_rate": 9.542031061837262e-05, "loss": 1.0411, "step": 43000 }, { "epoch": 0.2747786310261554, "grad_norm": 0.8316226601600647, "learning_rate": 9.541821255231009e-05, "loss": 0.7935, "step": 43010 }, { "epoch": 0.2748425181758941, "grad_norm": 0.741036593914032, "learning_rate": 9.541611402884685e-05, "loss": 0.9842, "step": 43020 }, { "epoch": 0.2749064053256328, "grad_norm": 1.501214861869812, "learning_rate": 9.541401504800407e-05, "loss": 1.4551, "step": 43030 }, { "epoch": 0.2749702924753715, "grad_norm": 0.6897192001342773, "learning_rate": 9.541191560980287e-05, "loss": 0.7029, "step": 43040 }, { "epoch": 0.27503417962511023, "grad_norm": 0.555748701095581, "learning_rate": 9.540981571426437e-05, "loss": 0.8156, "step": 43050 }, { "epoch": 0.2750980667748489, "grad_norm": 0.5355345010757446, "learning_rate": 9.540771536140976e-05, "loss": 1.1097, "step": 43060 }, { "epoch": 0.2751619539245876, "grad_norm": 0.839185357093811, "learning_rate": 9.540561455126018e-05, "loss": 0.7823, "step": 43070 }, { "epoch": 0.2752258410743263, "grad_norm": 1.0240132808685303, "learning_rate": 9.540351328383676e-05, "loss": 0.825, "step": 43080 }, { "epoch": 0.275289728224065, "grad_norm": 0.6717436909675598, "learning_rate": 9.54014115591607e-05, "loss": 0.8841, "step": 43090 }, { "epoch": 0.2753536153738037, "grad_norm": 0.7767571806907654, "learning_rate": 9.539930937725313e-05, "loss": 0.8338, "step": 43100 }, { "epoch": 0.2754175025235424, "grad_norm": 0.9243952035903931, "learning_rate": 9.539720673813526e-05, "loss": 0.8565, "step": 43110 }, { "epoch": 0.2754813896732811, "grad_norm": 0.7475656270980835, "learning_rate": 9.539510364182822e-05, "loss": 0.8373, "step": 43120 }, { "epoch": 0.2755452768230198, "grad_norm": 0.7929537296295166, "learning_rate": 9.539300008835323e-05, "loss": 1.1913, "step": 43130 }, { "epoch": 0.27560916397275853, "grad_norm": 0.9647955298423767, "learning_rate": 9.539089607773145e-05, "loss": 0.8969, "step": 43140 }, { "epoch": 0.27567305112249724, "grad_norm": 0.7929497957229614, "learning_rate": 9.538879160998408e-05, "loss": 0.8129, "step": 43150 }, { "epoch": 0.27573693827223594, "grad_norm": 0.7888079881668091, "learning_rate": 9.538668668513232e-05, "loss": 1.0804, "step": 43160 }, { "epoch": 0.27580082542197465, "grad_norm": 1.1037369966506958, "learning_rate": 9.538458130319736e-05, "loss": 0.7396, "step": 43170 }, { "epoch": 0.2758647125717133, "grad_norm": 1.0101234912872314, "learning_rate": 9.538247546420038e-05, "loss": 0.899, "step": 43180 }, { "epoch": 0.275928599721452, "grad_norm": 1.06256902217865, "learning_rate": 9.538036916816264e-05, "loss": 0.8693, "step": 43190 }, { "epoch": 0.2759924868711907, "grad_norm": 1.1790313720703125, "learning_rate": 9.53782624151053e-05, "loss": 0.8821, "step": 43200 }, { "epoch": 0.2760563740209294, "grad_norm": 1.0005087852478027, "learning_rate": 9.537615520504961e-05, "loss": 0.8263, "step": 43210 }, { "epoch": 0.2761202611706681, "grad_norm": 0.965392529964447, "learning_rate": 9.537404753801679e-05, "loss": 0.7032, "step": 43220 }, { "epoch": 0.27618414832040683, "grad_norm": 1.0671719312667847, "learning_rate": 9.537193941402805e-05, "loss": 0.6795, "step": 43230 }, { "epoch": 0.27624803547014554, "grad_norm": 0.8210242390632629, "learning_rate": 9.536983083310463e-05, "loss": 0.8035, "step": 43240 }, { "epoch": 0.27631192261988424, "grad_norm": 1.3356382846832275, "learning_rate": 9.536772179526774e-05, "loss": 0.8635, "step": 43250 }, { "epoch": 0.27637580976962295, "grad_norm": 0.6056402325630188, "learning_rate": 9.536561230053866e-05, "loss": 1.1843, "step": 43260 }, { "epoch": 0.27643969691936165, "grad_norm": 0.674022912979126, "learning_rate": 9.536350234893863e-05, "loss": 1.2542, "step": 43270 }, { "epoch": 0.27650358406910036, "grad_norm": 1.186140537261963, "learning_rate": 9.536139194048888e-05, "loss": 0.884, "step": 43280 }, { "epoch": 0.27656747121883907, "grad_norm": 0.8582965731620789, "learning_rate": 9.535928107521067e-05, "loss": 0.7672, "step": 43290 }, { "epoch": 0.2766313583685777, "grad_norm": 0.8430611491203308, "learning_rate": 9.535716975312524e-05, "loss": 0.8817, "step": 43300 }, { "epoch": 0.2766952455183164, "grad_norm": 0.9493967294692993, "learning_rate": 9.535505797425388e-05, "loss": 0.9554, "step": 43310 }, { "epoch": 0.27675913266805513, "grad_norm": 0.9920264482498169, "learning_rate": 9.535294573861786e-05, "loss": 0.9554, "step": 43320 }, { "epoch": 0.27682301981779384, "grad_norm": 0.8129369616508484, "learning_rate": 9.535083304623844e-05, "loss": 0.839, "step": 43330 }, { "epoch": 0.27688690696753254, "grad_norm": 1.0524061918258667, "learning_rate": 9.534871989713688e-05, "loss": 0.8456, "step": 43340 }, { "epoch": 0.27695079411727125, "grad_norm": 0.5314822793006897, "learning_rate": 9.53466062913345e-05, "loss": 0.8378, "step": 43350 }, { "epoch": 0.27701468126700995, "grad_norm": 0.574448823928833, "learning_rate": 9.534449222885254e-05, "loss": 0.8479, "step": 43360 }, { "epoch": 0.27707856841674866, "grad_norm": 0.622386634349823, "learning_rate": 9.534237770971233e-05, "loss": 1.2532, "step": 43370 }, { "epoch": 0.27714245556648737, "grad_norm": 0.7160522937774658, "learning_rate": 9.534026273393515e-05, "loss": 0.8913, "step": 43380 }, { "epoch": 0.2772063427162261, "grad_norm": 0.8195508122444153, "learning_rate": 9.533814730154229e-05, "loss": 0.8407, "step": 43390 }, { "epoch": 0.2772702298659648, "grad_norm": 2.375558614730835, "learning_rate": 9.533603141255508e-05, "loss": 0.8774, "step": 43400 }, { "epoch": 0.2773341170157035, "grad_norm": 1.008970022201538, "learning_rate": 9.533391506699481e-05, "loss": 0.8729, "step": 43410 }, { "epoch": 0.27739800416544214, "grad_norm": 1.062031865119934, "learning_rate": 9.533179826488278e-05, "loss": 0.7358, "step": 43420 }, { "epoch": 0.27746189131518084, "grad_norm": 1.110970139503479, "learning_rate": 9.532968100624034e-05, "loss": 0.9176, "step": 43430 }, { "epoch": 0.27752577846491955, "grad_norm": 0.7017518877983093, "learning_rate": 9.532756329108879e-05, "loss": 0.7531, "step": 43440 }, { "epoch": 0.27758966561465825, "grad_norm": 0.7108795046806335, "learning_rate": 9.532544511944945e-05, "loss": 1.2516, "step": 43450 }, { "epoch": 0.27765355276439696, "grad_norm": 1.1942083835601807, "learning_rate": 9.532332649134368e-05, "loss": 0.8122, "step": 43460 }, { "epoch": 0.27771743991413567, "grad_norm": 1.6900832653045654, "learning_rate": 9.53212074067928e-05, "loss": 0.9352, "step": 43470 }, { "epoch": 0.2777813270638744, "grad_norm": 0.6773011088371277, "learning_rate": 9.531908786581816e-05, "loss": 0.7606, "step": 43480 }, { "epoch": 0.2778452142136131, "grad_norm": 0.9404903054237366, "learning_rate": 9.53169678684411e-05, "loss": 0.8783, "step": 43490 }, { "epoch": 0.2779091013633518, "grad_norm": 0.9946535229682922, "learning_rate": 9.531484741468296e-05, "loss": 0.8917, "step": 43500 }, { "epoch": 0.2779729885130905, "grad_norm": 0.9458416104316711, "learning_rate": 9.531272650456508e-05, "loss": 1.1044, "step": 43510 }, { "epoch": 0.2780368756628292, "grad_norm": 0.5456779599189758, "learning_rate": 9.531060513810887e-05, "loss": 0.7003, "step": 43520 }, { "epoch": 0.2781007628125679, "grad_norm": 0.7244671583175659, "learning_rate": 9.530848331533569e-05, "loss": 0.8803, "step": 43530 }, { "epoch": 0.2781646499623066, "grad_norm": 0.7101406455039978, "learning_rate": 9.530636103626684e-05, "loss": 0.9651, "step": 43540 }, { "epoch": 0.27822853711204526, "grad_norm": 0.8346617221832275, "learning_rate": 9.530423830092376e-05, "loss": 1.1236, "step": 43550 }, { "epoch": 0.27829242426178397, "grad_norm": 1.0931973457336426, "learning_rate": 9.530211510932781e-05, "loss": 1.1875, "step": 43560 }, { "epoch": 0.27835631141152267, "grad_norm": 0.7076020240783691, "learning_rate": 9.529999146150037e-05, "loss": 0.9529, "step": 43570 }, { "epoch": 0.2784201985612614, "grad_norm": 0.8255470991134644, "learning_rate": 9.529786735746281e-05, "loss": 0.9156, "step": 43580 }, { "epoch": 0.2784840857110001, "grad_norm": 1.8320345878601074, "learning_rate": 9.529574279723655e-05, "loss": 1.1068, "step": 43590 }, { "epoch": 0.2785479728607388, "grad_norm": 0.8600050806999207, "learning_rate": 9.529361778084297e-05, "loss": 0.7782, "step": 43600 }, { "epoch": 0.2786118600104775, "grad_norm": 0.5279654860496521, "learning_rate": 9.529149230830348e-05, "loss": 0.6292, "step": 43610 }, { "epoch": 0.2786757471602162, "grad_norm": 1.0602446794509888, "learning_rate": 9.528936637963948e-05, "loss": 0.8949, "step": 43620 }, { "epoch": 0.2787396343099549, "grad_norm": 0.687105655670166, "learning_rate": 9.528723999487236e-05, "loss": 0.9358, "step": 43630 }, { "epoch": 0.2788035214596936, "grad_norm": 0.6915990114212036, "learning_rate": 9.528511315402358e-05, "loss": 1.011, "step": 43640 }, { "epoch": 0.2788674086094323, "grad_norm": 1.5329688787460327, "learning_rate": 9.528298585711453e-05, "loss": 0.8036, "step": 43650 }, { "epoch": 0.278931295759171, "grad_norm": 0.8263123035430908, "learning_rate": 9.528085810416663e-05, "loss": 0.8864, "step": 43660 }, { "epoch": 0.2789951829089097, "grad_norm": 1.231269121170044, "learning_rate": 9.52787298952013e-05, "loss": 0.8268, "step": 43670 }, { "epoch": 0.2790590700586484, "grad_norm": 0.7919381260871887, "learning_rate": 9.527660123024e-05, "loss": 1.0224, "step": 43680 }, { "epoch": 0.2791229572083871, "grad_norm": 0.850471556186676, "learning_rate": 9.527447210930417e-05, "loss": 0.7467, "step": 43690 }, { "epoch": 0.2791868443581258, "grad_norm": 1.2980278730392456, "learning_rate": 9.527234253241522e-05, "loss": 0.7765, "step": 43700 }, { "epoch": 0.2792507315078645, "grad_norm": 1.044519066810608, "learning_rate": 9.527021249959462e-05, "loss": 1.0149, "step": 43710 }, { "epoch": 0.2793146186576032, "grad_norm": 0.9248097538948059, "learning_rate": 9.526808201086382e-05, "loss": 1.0461, "step": 43720 }, { "epoch": 0.2793785058073419, "grad_norm": 0.7465457320213318, "learning_rate": 9.526595106624428e-05, "loss": 0.8779, "step": 43730 }, { "epoch": 0.2794423929570806, "grad_norm": 1.1304700374603271, "learning_rate": 9.526381966575744e-05, "loss": 0.9451, "step": 43740 }, { "epoch": 0.2795062801068193, "grad_norm": 0.8045159578323364, "learning_rate": 9.526168780942477e-05, "loss": 0.851, "step": 43750 }, { "epoch": 0.27957016725655803, "grad_norm": 0.8568775057792664, "learning_rate": 9.525955549726776e-05, "loss": 1.2149, "step": 43760 }, { "epoch": 0.27963405440629674, "grad_norm": 0.8600241541862488, "learning_rate": 9.525742272930787e-05, "loss": 1.2311, "step": 43770 }, { "epoch": 0.27969794155603545, "grad_norm": 0.9878765940666199, "learning_rate": 9.525528950556657e-05, "loss": 0.7023, "step": 43780 }, { "epoch": 0.2797618287057741, "grad_norm": 1.1209038496017456, "learning_rate": 9.525315582606537e-05, "loss": 0.7451, "step": 43790 }, { "epoch": 0.2798257158555128, "grad_norm": 0.8326137065887451, "learning_rate": 9.525102169082573e-05, "loss": 1.139, "step": 43800 }, { "epoch": 0.2798896030052515, "grad_norm": 0.7747043371200562, "learning_rate": 9.524888709986914e-05, "loss": 1.1918, "step": 43810 }, { "epoch": 0.2799534901549902, "grad_norm": 0.7023658752441406, "learning_rate": 9.524675205321713e-05, "loss": 1.1406, "step": 43820 }, { "epoch": 0.2800173773047289, "grad_norm": 0.9086745977401733, "learning_rate": 9.524461655089119e-05, "loss": 1.0247, "step": 43830 }, { "epoch": 0.2800812644544676, "grad_norm": 0.850036084651947, "learning_rate": 9.52424805929128e-05, "loss": 0.9891, "step": 43840 }, { "epoch": 0.28014515160420633, "grad_norm": 0.7269537448883057, "learning_rate": 9.52403441793035e-05, "loss": 0.917, "step": 43850 }, { "epoch": 0.28020903875394504, "grad_norm": 0.6691316962242126, "learning_rate": 9.523820731008479e-05, "loss": 0.8787, "step": 43860 }, { "epoch": 0.28027292590368375, "grad_norm": 0.9154207110404968, "learning_rate": 9.52360699852782e-05, "loss": 1.1603, "step": 43870 }, { "epoch": 0.28033681305342245, "grad_norm": 0.8188737034797668, "learning_rate": 9.523393220490526e-05, "loss": 1.0814, "step": 43880 }, { "epoch": 0.28040070020316116, "grad_norm": 0.873671293258667, "learning_rate": 9.523179396898748e-05, "loss": 0.8897, "step": 43890 }, { "epoch": 0.28046458735289986, "grad_norm": 1.1229972839355469, "learning_rate": 9.52296552775464e-05, "loss": 0.9826, "step": 43900 }, { "epoch": 0.2805284745026385, "grad_norm": 0.664357602596283, "learning_rate": 9.522751613060356e-05, "loss": 0.8484, "step": 43910 }, { "epoch": 0.2805923616523772, "grad_norm": 0.6779937148094177, "learning_rate": 9.522537652818051e-05, "loss": 0.8464, "step": 43920 }, { "epoch": 0.2806562488021159, "grad_norm": 0.7764692306518555, "learning_rate": 9.522323647029879e-05, "loss": 1.005, "step": 43930 }, { "epoch": 0.28072013595185463, "grad_norm": 0.6373327970504761, "learning_rate": 9.522109595697997e-05, "loss": 0.7112, "step": 43940 }, { "epoch": 0.28078402310159334, "grad_norm": 1.3876614570617676, "learning_rate": 9.521895498824558e-05, "loss": 0.7226, "step": 43950 }, { "epoch": 0.28084791025133204, "grad_norm": 1.1054093837738037, "learning_rate": 9.521681356411718e-05, "loss": 0.8765, "step": 43960 }, { "epoch": 0.28091179740107075, "grad_norm": 0.7157889008522034, "learning_rate": 9.521467168461637e-05, "loss": 0.9107, "step": 43970 }, { "epoch": 0.28097568455080946, "grad_norm": 0.7243021726608276, "learning_rate": 9.521252934976469e-05, "loss": 0.6519, "step": 43980 }, { "epoch": 0.28103957170054816, "grad_norm": 1.0799028873443604, "learning_rate": 9.521038655958373e-05, "loss": 0.9358, "step": 43990 }, { "epoch": 0.28110345885028687, "grad_norm": 0.8918923139572144, "learning_rate": 9.520824331409506e-05, "loss": 1.0831, "step": 44000 }, { "epoch": 0.2811673460000256, "grad_norm": 0.6121041774749756, "learning_rate": 9.520609961332027e-05, "loss": 0.9407, "step": 44010 }, { "epoch": 0.2812312331497643, "grad_norm": 0.8242114186286926, "learning_rate": 9.520395545728096e-05, "loss": 0.6712, "step": 44020 }, { "epoch": 0.28129512029950293, "grad_norm": 0.8655091524124146, "learning_rate": 9.52018108459987e-05, "loss": 0.8633, "step": 44030 }, { "epoch": 0.28135900744924164, "grad_norm": 0.7352428436279297, "learning_rate": 9.51996657794951e-05, "loss": 1.0033, "step": 44040 }, { "epoch": 0.28142289459898034, "grad_norm": 1.2268953323364258, "learning_rate": 9.519752025779177e-05, "loss": 1.0435, "step": 44050 }, { "epoch": 0.28148678174871905, "grad_norm": 0.766212522983551, "learning_rate": 9.51953742809103e-05, "loss": 0.8992, "step": 44060 }, { "epoch": 0.28155066889845776, "grad_norm": 0.9193620681762695, "learning_rate": 9.51932278488723e-05, "loss": 0.9638, "step": 44070 }, { "epoch": 0.28161455604819646, "grad_norm": 0.7695852518081665, "learning_rate": 9.519108096169943e-05, "loss": 0.875, "step": 44080 }, { "epoch": 0.28167844319793517, "grad_norm": 0.7172717452049255, "learning_rate": 9.518893361941326e-05, "loss": 1.106, "step": 44090 }, { "epoch": 0.2817423303476739, "grad_norm": 0.7750954031944275, "learning_rate": 9.518678582203542e-05, "loss": 1.0095, "step": 44100 }, { "epoch": 0.2818062174974126, "grad_norm": 1.0420329570770264, "learning_rate": 9.518463756958758e-05, "loss": 0.6998, "step": 44110 }, { "epoch": 0.2818701046471513, "grad_norm": 1.6733392477035522, "learning_rate": 9.518248886209134e-05, "loss": 0.733, "step": 44120 }, { "epoch": 0.28193399179689, "grad_norm": 0.9372019171714783, "learning_rate": 9.518033969956834e-05, "loss": 0.8278, "step": 44130 }, { "epoch": 0.2819978789466287, "grad_norm": 0.6449085474014282, "learning_rate": 9.517819008204025e-05, "loss": 0.8307, "step": 44140 }, { "epoch": 0.28206176609636735, "grad_norm": 1.0656044483184814, "learning_rate": 9.517604000952869e-05, "loss": 0.7596, "step": 44150 }, { "epoch": 0.28212565324610606, "grad_norm": 1.2960087060928345, "learning_rate": 9.517388948205532e-05, "loss": 0.7277, "step": 44160 }, { "epoch": 0.28218954039584476, "grad_norm": 0.7332021594047546, "learning_rate": 9.517173849964181e-05, "loss": 0.9438, "step": 44170 }, { "epoch": 0.28225342754558347, "grad_norm": 1.0289632081985474, "learning_rate": 9.516958706230981e-05, "loss": 1.1855, "step": 44180 }, { "epoch": 0.2823173146953222, "grad_norm": 0.628814697265625, "learning_rate": 9.516743517008099e-05, "loss": 1.0023, "step": 44190 }, { "epoch": 0.2823812018450609, "grad_norm": 0.8182355165481567, "learning_rate": 9.516528282297703e-05, "loss": 0.924, "step": 44200 }, { "epoch": 0.2824450889947996, "grad_norm": 0.7950728535652161, "learning_rate": 9.51631300210196e-05, "loss": 0.6805, "step": 44210 }, { "epoch": 0.2825089761445383, "grad_norm": 0.902574360370636, "learning_rate": 9.516097676423037e-05, "loss": 0.9775, "step": 44220 }, { "epoch": 0.282572863294277, "grad_norm": 0.8031740784645081, "learning_rate": 9.515882305263104e-05, "loss": 1.0566, "step": 44230 }, { "epoch": 0.2826367504440157, "grad_norm": 0.8170803189277649, "learning_rate": 9.515666888624329e-05, "loss": 0.9942, "step": 44240 }, { "epoch": 0.2827006375937544, "grad_norm": 0.6290708184242249, "learning_rate": 9.515451426508882e-05, "loss": 0.7682, "step": 44250 }, { "epoch": 0.2827645247434931, "grad_norm": 0.9795184135437012, "learning_rate": 9.515235918918932e-05, "loss": 0.7583, "step": 44260 }, { "epoch": 0.28282841189323177, "grad_norm": 0.7491911053657532, "learning_rate": 9.515020365856651e-05, "loss": 0.9229, "step": 44270 }, { "epoch": 0.2828922990429705, "grad_norm": 1.1259350776672363, "learning_rate": 9.51480476732421e-05, "loss": 1.0074, "step": 44280 }, { "epoch": 0.2829561861927092, "grad_norm": 1.4809681177139282, "learning_rate": 9.514589123323777e-05, "loss": 0.7639, "step": 44290 }, { "epoch": 0.2830200733424479, "grad_norm": 1.034775733947754, "learning_rate": 9.514373433857527e-05, "loss": 0.9323, "step": 44300 }, { "epoch": 0.2830839604921866, "grad_norm": 0.6214499473571777, "learning_rate": 9.51415769892763e-05, "loss": 0.7838, "step": 44310 }, { "epoch": 0.2831478476419253, "grad_norm": 3.2163054943084717, "learning_rate": 9.51394191853626e-05, "loss": 1.0207, "step": 44320 }, { "epoch": 0.283211734791664, "grad_norm": 0.6460834741592407, "learning_rate": 9.513726092685591e-05, "loss": 1.1491, "step": 44330 }, { "epoch": 0.2832756219414027, "grad_norm": 0.7531580924987793, "learning_rate": 9.513510221377793e-05, "loss": 0.7412, "step": 44340 }, { "epoch": 0.2833395090911414, "grad_norm": 1.0614440441131592, "learning_rate": 9.513294304615044e-05, "loss": 0.8966, "step": 44350 }, { "epoch": 0.2834033962408801, "grad_norm": 0.6449925303459167, "learning_rate": 9.513078342399517e-05, "loss": 0.9988, "step": 44360 }, { "epoch": 0.28346728339061883, "grad_norm": 0.7040312886238098, "learning_rate": 9.512862334733386e-05, "loss": 0.8929, "step": 44370 }, { "epoch": 0.28353117054035754, "grad_norm": 1.03850257396698, "learning_rate": 9.512646281618828e-05, "loss": 0.5284, "step": 44380 }, { "epoch": 0.28359505769009624, "grad_norm": 1.0077382326126099, "learning_rate": 9.512430183058016e-05, "loss": 0.8976, "step": 44390 }, { "epoch": 0.2836589448398349, "grad_norm": 1.8333910703659058, "learning_rate": 9.512214039053131e-05, "loss": 0.9276, "step": 44400 }, { "epoch": 0.2837228319895736, "grad_norm": 1.7474950551986694, "learning_rate": 9.511997849606344e-05, "loss": 0.9906, "step": 44410 }, { "epoch": 0.2837867191393123, "grad_norm": 0.6216913461685181, "learning_rate": 9.511781614719838e-05, "loss": 0.8697, "step": 44420 }, { "epoch": 0.283850606289051, "grad_norm": 0.8137566447257996, "learning_rate": 9.511565334395786e-05, "loss": 1.1427, "step": 44430 }, { "epoch": 0.2839144934387897, "grad_norm": 0.8825230598449707, "learning_rate": 9.51134900863637e-05, "loss": 0.8255, "step": 44440 }, { "epoch": 0.2839783805885284, "grad_norm": 1.51393461227417, "learning_rate": 9.511132637443765e-05, "loss": 0.9725, "step": 44450 }, { "epoch": 0.28404226773826713, "grad_norm": 1.1853009462356567, "learning_rate": 9.510916220820152e-05, "loss": 0.9037, "step": 44460 }, { "epoch": 0.28410615488800584, "grad_norm": 1.493323802947998, "learning_rate": 9.510699758767709e-05, "loss": 0.9787, "step": 44470 }, { "epoch": 0.28417004203774454, "grad_norm": 0.8198840022087097, "learning_rate": 9.510483251288619e-05, "loss": 0.8874, "step": 44480 }, { "epoch": 0.28423392918748325, "grad_norm": 0.7507383227348328, "learning_rate": 9.51026669838506e-05, "loss": 1.0607, "step": 44490 }, { "epoch": 0.28429781633722195, "grad_norm": 0.698621928691864, "learning_rate": 9.510050100059214e-05, "loss": 0.9481, "step": 44500 }, { "epoch": 0.28436170348696066, "grad_norm": 1.1146284341812134, "learning_rate": 9.50983345631326e-05, "loss": 1.0293, "step": 44510 }, { "epoch": 0.2844255906366993, "grad_norm": 0.7054140567779541, "learning_rate": 9.509616767149383e-05, "loss": 0.8634, "step": 44520 }, { "epoch": 0.284489477786438, "grad_norm": 0.7089869976043701, "learning_rate": 9.509400032569763e-05, "loss": 1.0544, "step": 44530 }, { "epoch": 0.2845533649361767, "grad_norm": 0.817032516002655, "learning_rate": 9.509183252576583e-05, "loss": 0.8768, "step": 44540 }, { "epoch": 0.28461725208591543, "grad_norm": 0.9883630275726318, "learning_rate": 9.508966427172028e-05, "loss": 0.8952, "step": 44550 }, { "epoch": 0.28468113923565413, "grad_norm": 0.8523919582366943, "learning_rate": 9.50874955635828e-05, "loss": 0.9583, "step": 44560 }, { "epoch": 0.28474502638539284, "grad_norm": 0.5503199696540833, "learning_rate": 9.508532640137522e-05, "loss": 0.986, "step": 44570 }, { "epoch": 0.28480891353513155, "grad_norm": 0.6643738150596619, "learning_rate": 9.50831567851194e-05, "loss": 0.9774, "step": 44580 }, { "epoch": 0.28487280068487025, "grad_norm": 0.8332018256187439, "learning_rate": 9.50809867148372e-05, "loss": 0.8159, "step": 44590 }, { "epoch": 0.28493668783460896, "grad_norm": 0.8186637759208679, "learning_rate": 9.507881619055046e-05, "loss": 0.8421, "step": 44600 }, { "epoch": 0.28500057498434767, "grad_norm": 0.5218867659568787, "learning_rate": 9.507664521228106e-05, "loss": 0.6236, "step": 44610 }, { "epoch": 0.28506446213408637, "grad_norm": 0.8833245038986206, "learning_rate": 9.507447378005083e-05, "loss": 0.7893, "step": 44620 }, { "epoch": 0.2851283492838251, "grad_norm": 0.9100516438484192, "learning_rate": 9.507230189388164e-05, "loss": 0.8338, "step": 44630 }, { "epoch": 0.28519223643356373, "grad_norm": 1.299561858177185, "learning_rate": 9.50701295537954e-05, "loss": 0.7506, "step": 44640 }, { "epoch": 0.28525612358330243, "grad_norm": 0.8499222993850708, "learning_rate": 9.506795675981394e-05, "loss": 0.8838, "step": 44650 }, { "epoch": 0.28532001073304114, "grad_norm": 0.7215855121612549, "learning_rate": 9.506578351195918e-05, "loss": 1.1284, "step": 44660 }, { "epoch": 0.28538389788277985, "grad_norm": 0.9105709791183472, "learning_rate": 9.5063609810253e-05, "loss": 0.7879, "step": 44670 }, { "epoch": 0.28544778503251855, "grad_norm": 1.2917571067810059, "learning_rate": 9.506165309069255e-05, "loss": 1.0344, "step": 44680 }, { "epoch": 0.28551167218225726, "grad_norm": 0.7351425886154175, "learning_rate": 9.505947852672896e-05, "loss": 0.9027, "step": 44690 }, { "epoch": 0.28557555933199597, "grad_norm": 1.4496943950653076, "learning_rate": 9.505730350897745e-05, "loss": 0.7425, "step": 44700 }, { "epoch": 0.28563944648173467, "grad_norm": 0.9381955862045288, "learning_rate": 9.505512803745991e-05, "loss": 1.0261, "step": 44710 }, { "epoch": 0.2857033336314734, "grad_norm": 1.0335243940353394, "learning_rate": 9.505295211219824e-05, "loss": 1.0252, "step": 44720 }, { "epoch": 0.2857672207812121, "grad_norm": 0.9310586452484131, "learning_rate": 9.505077573321438e-05, "loss": 0.947, "step": 44730 }, { "epoch": 0.2858311079309508, "grad_norm": 1.818731665611267, "learning_rate": 9.504859890053023e-05, "loss": 0.9851, "step": 44740 }, { "epoch": 0.2858949950806895, "grad_norm": 2.0556581020355225, "learning_rate": 9.504642161416773e-05, "loss": 0.7945, "step": 44750 }, { "epoch": 0.28595888223042815, "grad_norm": 0.9168753027915955, "learning_rate": 9.504424387414876e-05, "loss": 0.7114, "step": 44760 }, { "epoch": 0.28602276938016685, "grad_norm": 0.7781484723091125, "learning_rate": 9.504206568049532e-05, "loss": 0.8891, "step": 44770 }, { "epoch": 0.28608665652990556, "grad_norm": 2.3256382942199707, "learning_rate": 9.503988703322928e-05, "loss": 0.8655, "step": 44780 }, { "epoch": 0.28615054367964426, "grad_norm": 0.826259970664978, "learning_rate": 9.503770793237263e-05, "loss": 0.9931, "step": 44790 }, { "epoch": 0.28621443082938297, "grad_norm": 1.0867620706558228, "learning_rate": 9.50355283779473e-05, "loss": 0.9931, "step": 44800 }, { "epoch": 0.2862783179791217, "grad_norm": 0.6833561658859253, "learning_rate": 9.503334836997524e-05, "loss": 0.7724, "step": 44810 }, { "epoch": 0.2863422051288604, "grad_norm": 0.8544519543647766, "learning_rate": 9.503116790847839e-05, "loss": 0.8207, "step": 44820 }, { "epoch": 0.2864060922785991, "grad_norm": 0.5061067342758179, "learning_rate": 9.502898699347873e-05, "loss": 0.8357, "step": 44830 }, { "epoch": 0.2864699794283378, "grad_norm": 0.91792231798172, "learning_rate": 9.502680562499821e-05, "loss": 1.0274, "step": 44840 }, { "epoch": 0.2865338665780765, "grad_norm": 0.8766928911209106, "learning_rate": 9.502462380305881e-05, "loss": 0.8878, "step": 44850 }, { "epoch": 0.2865977537278152, "grad_norm": 0.5164894461631775, "learning_rate": 9.50224415276825e-05, "loss": 0.961, "step": 44860 }, { "epoch": 0.2866616408775539, "grad_norm": 0.7407921552658081, "learning_rate": 9.502025879889125e-05, "loss": 0.8303, "step": 44870 }, { "epoch": 0.28672552802729256, "grad_norm": 0.8378937244415283, "learning_rate": 9.501807561670703e-05, "loss": 0.831, "step": 44880 }, { "epoch": 0.28678941517703127, "grad_norm": 2.5379931926727295, "learning_rate": 9.501589198115186e-05, "loss": 0.8583, "step": 44890 }, { "epoch": 0.28685330232677, "grad_norm": 0.8692481517791748, "learning_rate": 9.501370789224772e-05, "loss": 0.8469, "step": 44900 }, { "epoch": 0.2869171894765087, "grad_norm": 0.7590452432632446, "learning_rate": 9.501152335001658e-05, "loss": 0.7567, "step": 44910 }, { "epoch": 0.2869810766262474, "grad_norm": 0.6347168684005737, "learning_rate": 9.500933835448047e-05, "loss": 0.898, "step": 44920 }, { "epoch": 0.2870449637759861, "grad_norm": 0.975532054901123, "learning_rate": 9.500715290566138e-05, "loss": 1.1958, "step": 44930 }, { "epoch": 0.2871088509257248, "grad_norm": 0.670184850692749, "learning_rate": 9.500496700358132e-05, "loss": 0.9185, "step": 44940 }, { "epoch": 0.2871727380754635, "grad_norm": 0.5493016839027405, "learning_rate": 9.500278064826232e-05, "loss": 0.9177, "step": 44950 }, { "epoch": 0.2872366252252022, "grad_norm": 1.360520839691162, "learning_rate": 9.500059383972638e-05, "loss": 0.9026, "step": 44960 }, { "epoch": 0.2873005123749409, "grad_norm": 0.6873490214347839, "learning_rate": 9.499840657799553e-05, "loss": 0.7064, "step": 44970 }, { "epoch": 0.2873643995246796, "grad_norm": 0.814471423625946, "learning_rate": 9.49962188630918e-05, "loss": 0.8996, "step": 44980 }, { "epoch": 0.28742828667441833, "grad_norm": 0.7156900763511658, "learning_rate": 9.49940306950372e-05, "loss": 0.841, "step": 44990 }, { "epoch": 0.287492173824157, "grad_norm": 0.6915486454963684, "learning_rate": 9.499184207385381e-05, "loss": 0.8996, "step": 45000 }, { "epoch": 0.2875560609738957, "grad_norm": 1.0259060859680176, "learning_rate": 9.498965299956364e-05, "loss": 0.9954, "step": 45010 }, { "epoch": 0.2876199481236344, "grad_norm": 0.5235810875892639, "learning_rate": 9.498746347218873e-05, "loss": 1.1643, "step": 45020 }, { "epoch": 0.2876838352733731, "grad_norm": 0.7001626491546631, "learning_rate": 9.498527349175115e-05, "loss": 1.0269, "step": 45030 }, { "epoch": 0.2877477224231118, "grad_norm": 1.0902423858642578, "learning_rate": 9.498308305827294e-05, "loss": 0.9768, "step": 45040 }, { "epoch": 0.2878116095728505, "grad_norm": 0.6482483744621277, "learning_rate": 9.49808921717762e-05, "loss": 1.0191, "step": 45050 }, { "epoch": 0.2878754967225892, "grad_norm": 1.0290073156356812, "learning_rate": 9.497870083228292e-05, "loss": 1.0096, "step": 45060 }, { "epoch": 0.2879393838723279, "grad_norm": 0.8370404243469238, "learning_rate": 9.497650903981524e-05, "loss": 1.0161, "step": 45070 }, { "epoch": 0.28800327102206663, "grad_norm": 0.8315509557723999, "learning_rate": 9.497431679439519e-05, "loss": 0.7909, "step": 45080 }, { "epoch": 0.28806715817180534, "grad_norm": 0.7571452856063843, "learning_rate": 9.497212409604487e-05, "loss": 0.9372, "step": 45090 }, { "epoch": 0.28813104532154404, "grad_norm": 0.9375543594360352, "learning_rate": 9.496993094478634e-05, "loss": 0.6588, "step": 45100 }, { "epoch": 0.28819493247128275, "grad_norm": 0.8192710876464844, "learning_rate": 9.496773734064171e-05, "loss": 0.9545, "step": 45110 }, { "epoch": 0.2882588196210214, "grad_norm": 0.8890470862388611, "learning_rate": 9.496554328363307e-05, "loss": 0.9824, "step": 45120 }, { "epoch": 0.2883227067707601, "grad_norm": 0.8460478186607361, "learning_rate": 9.49633487737825e-05, "loss": 0.66, "step": 45130 }, { "epoch": 0.2883865939204988, "grad_norm": 1.1381182670593262, "learning_rate": 9.496115381111211e-05, "loss": 0.788, "step": 45140 }, { "epoch": 0.2884504810702375, "grad_norm": 0.7069154977798462, "learning_rate": 9.495895839564401e-05, "loss": 0.7456, "step": 45150 }, { "epoch": 0.2885143682199762, "grad_norm": 0.9826921820640564, "learning_rate": 9.495676252740029e-05, "loss": 1.0517, "step": 45160 }, { "epoch": 0.28857825536971493, "grad_norm": 0.9633061289787292, "learning_rate": 9.495456620640308e-05, "loss": 1.0595, "step": 45170 }, { "epoch": 0.28864214251945364, "grad_norm": 0.8172567486763, "learning_rate": 9.495236943267451e-05, "loss": 0.676, "step": 45180 }, { "epoch": 0.28870602966919234, "grad_norm": 0.9763637185096741, "learning_rate": 9.495017220623669e-05, "loss": 0.7928, "step": 45190 }, { "epoch": 0.28876991681893105, "grad_norm": 1.778890609741211, "learning_rate": 9.494797452711174e-05, "loss": 0.699, "step": 45200 }, { "epoch": 0.28883380396866976, "grad_norm": 0.7678098678588867, "learning_rate": 9.49457763953218e-05, "loss": 0.8654, "step": 45210 }, { "epoch": 0.28889769111840846, "grad_norm": 0.7432067394256592, "learning_rate": 9.494357781088901e-05, "loss": 0.7378, "step": 45220 }, { "epoch": 0.28896157826814717, "grad_norm": 0.9834187030792236, "learning_rate": 9.494137877383551e-05, "loss": 0.9317, "step": 45230 }, { "epoch": 0.2890254654178859, "grad_norm": 0.6653081178665161, "learning_rate": 9.493917928418345e-05, "loss": 0.7968, "step": 45240 }, { "epoch": 0.2890893525676245, "grad_norm": 0.7635434865951538, "learning_rate": 9.493697934195499e-05, "loss": 0.8611, "step": 45250 }, { "epoch": 0.28915323971736323, "grad_norm": 0.9517902135848999, "learning_rate": 9.493477894717224e-05, "loss": 0.7536, "step": 45260 }, { "epoch": 0.28921712686710194, "grad_norm": 0.7886881232261658, "learning_rate": 9.49325780998574e-05, "loss": 0.9113, "step": 45270 }, { "epoch": 0.28928101401684064, "grad_norm": 0.7776336669921875, "learning_rate": 9.493037680003264e-05, "loss": 0.8193, "step": 45280 }, { "epoch": 0.28934490116657935, "grad_norm": 0.8685764670372009, "learning_rate": 9.492817504772012e-05, "loss": 0.9521, "step": 45290 }, { "epoch": 0.28940878831631806, "grad_norm": 1.247955322265625, "learning_rate": 9.492597284294198e-05, "loss": 0.9216, "step": 45300 }, { "epoch": 0.28947267546605676, "grad_norm": 0.9125822186470032, "learning_rate": 9.492377018572046e-05, "loss": 1.003, "step": 45310 }, { "epoch": 0.28953656261579547, "grad_norm": 0.7521454095840454, "learning_rate": 9.492156707607769e-05, "loss": 0.806, "step": 45320 }, { "epoch": 0.2896004497655342, "grad_norm": 0.8048921823501587, "learning_rate": 9.491936351403588e-05, "loss": 1.0567, "step": 45330 }, { "epoch": 0.2896643369152729, "grad_norm": 0.6293105483055115, "learning_rate": 9.491715949961721e-05, "loss": 0.6753, "step": 45340 }, { "epoch": 0.2897282240650116, "grad_norm": 0.7665662169456482, "learning_rate": 9.491495503284391e-05, "loss": 1.1162, "step": 45350 }, { "epoch": 0.2897921112147503, "grad_norm": 1.389918327331543, "learning_rate": 9.491275011373813e-05, "loss": 0.8464, "step": 45360 }, { "epoch": 0.28985599836448894, "grad_norm": 0.5303570628166199, "learning_rate": 9.491054474232212e-05, "loss": 0.8697, "step": 45370 }, { "epoch": 0.28991988551422765, "grad_norm": 0.7896818518638611, "learning_rate": 9.490833891861806e-05, "loss": 0.8274, "step": 45380 }, { "epoch": 0.28998377266396635, "grad_norm": 1.085740566253662, "learning_rate": 9.490613264264818e-05, "loss": 1.2138, "step": 45390 }, { "epoch": 0.29004765981370506, "grad_norm": 0.9836480617523193, "learning_rate": 9.490392591443469e-05, "loss": 1.0133, "step": 45400 }, { "epoch": 0.29011154696344377, "grad_norm": 1.2857035398483276, "learning_rate": 9.490171873399982e-05, "loss": 0.7627, "step": 45410 }, { "epoch": 0.2901754341131825, "grad_norm": 0.7839180827140808, "learning_rate": 9.489951110136581e-05, "loss": 0.8626, "step": 45420 }, { "epoch": 0.2902393212629212, "grad_norm": 0.6946144104003906, "learning_rate": 9.489730301655486e-05, "loss": 0.8252, "step": 45430 }, { "epoch": 0.2903032084126599, "grad_norm": 0.7816669344902039, "learning_rate": 9.489509447958924e-05, "loss": 0.8103, "step": 45440 }, { "epoch": 0.2903670955623986, "grad_norm": 1.0374782085418701, "learning_rate": 9.489288549049118e-05, "loss": 0.9284, "step": 45450 }, { "epoch": 0.2904309827121373, "grad_norm": 1.0329042673110962, "learning_rate": 9.489067604928293e-05, "loss": 1.0252, "step": 45460 }, { "epoch": 0.290494869861876, "grad_norm": 1.062635898590088, "learning_rate": 9.488846615598671e-05, "loss": 0.8388, "step": 45470 }, { "epoch": 0.2905587570116147, "grad_norm": 1.2873570919036865, "learning_rate": 9.488625581062483e-05, "loss": 0.8721, "step": 45480 }, { "epoch": 0.29062264416135336, "grad_norm": 1.4806243181228638, "learning_rate": 9.48840450132195e-05, "loss": 1.1295, "step": 45490 }, { "epoch": 0.29068653131109207, "grad_norm": 0.9083405137062073, "learning_rate": 9.488183376379302e-05, "loss": 0.782, "step": 45500 }, { "epoch": 0.2907504184608308, "grad_norm": 0.43349987268447876, "learning_rate": 9.487962206236765e-05, "loss": 0.9368, "step": 45510 }, { "epoch": 0.2908143056105695, "grad_norm": 0.6599463224411011, "learning_rate": 9.487740990896564e-05, "loss": 0.7841, "step": 45520 }, { "epoch": 0.2908781927603082, "grad_norm": 0.6311991810798645, "learning_rate": 9.48751973036093e-05, "loss": 0.9138, "step": 45530 }, { "epoch": 0.2909420799100469, "grad_norm": 0.5348168015480042, "learning_rate": 9.487298424632089e-05, "loss": 0.7043, "step": 45540 }, { "epoch": 0.2910059670597856, "grad_norm": 0.8502787947654724, "learning_rate": 9.487077073712273e-05, "loss": 1.0872, "step": 45550 }, { "epoch": 0.2910698542095243, "grad_norm": 0.8174751400947571, "learning_rate": 9.486855677603707e-05, "loss": 0.9294, "step": 45560 }, { "epoch": 0.291133741359263, "grad_norm": 0.7692357897758484, "learning_rate": 9.486634236308624e-05, "loss": 0.9752, "step": 45570 }, { "epoch": 0.2911976285090017, "grad_norm": 0.43835484981536865, "learning_rate": 9.486412749829251e-05, "loss": 0.8376, "step": 45580 }, { "epoch": 0.2912615156587404, "grad_norm": 1.0766656398773193, "learning_rate": 9.486191218167823e-05, "loss": 0.95, "step": 45590 }, { "epoch": 0.29132540280847913, "grad_norm": 0.7746816277503967, "learning_rate": 9.485969641326566e-05, "loss": 1.114, "step": 45600 }, { "epoch": 0.2913892899582178, "grad_norm": 0.8561303019523621, "learning_rate": 9.485748019307716e-05, "loss": 1.125, "step": 45610 }, { "epoch": 0.2914531771079565, "grad_norm": 0.7320988774299622, "learning_rate": 9.4855263521135e-05, "loss": 0.8814, "step": 45620 }, { "epoch": 0.2915170642576952, "grad_norm": 0.9033998847007751, "learning_rate": 9.485304639746155e-05, "loss": 0.8253, "step": 45630 }, { "epoch": 0.2915809514074339, "grad_norm": 1.71013605594635, "learning_rate": 9.485082882207911e-05, "loss": 0.632, "step": 45640 }, { "epoch": 0.2916448385571726, "grad_norm": 0.5615208148956299, "learning_rate": 9.484861079501003e-05, "loss": 0.7828, "step": 45650 }, { "epoch": 0.2917087257069113, "grad_norm": 1.236778974533081, "learning_rate": 9.484639231627664e-05, "loss": 0.9695, "step": 45660 }, { "epoch": 0.29177261285665, "grad_norm": 0.9685949683189392, "learning_rate": 9.484417338590127e-05, "loss": 0.6323, "step": 45670 }, { "epoch": 0.2918365000063887, "grad_norm": 1.1921647787094116, "learning_rate": 9.484195400390629e-05, "loss": 1.1795, "step": 45680 }, { "epoch": 0.2919003871561274, "grad_norm": 1.2076773643493652, "learning_rate": 9.483973417031404e-05, "loss": 0.9146, "step": 45690 }, { "epoch": 0.29196427430586613, "grad_norm": 1.2398484945297241, "learning_rate": 9.483751388514685e-05, "loss": 0.9077, "step": 45700 }, { "epoch": 0.29202816145560484, "grad_norm": 0.9221826195716858, "learning_rate": 9.483529314842715e-05, "loss": 0.6419, "step": 45710 }, { "epoch": 0.29209204860534355, "grad_norm": 0.9597351551055908, "learning_rate": 9.483307196017722e-05, "loss": 0.8304, "step": 45720 }, { "epoch": 0.2921559357550822, "grad_norm": 0.8210084438323975, "learning_rate": 9.483085032041949e-05, "loss": 0.5964, "step": 45730 }, { "epoch": 0.2922198229048209, "grad_norm": 0.8063592910766602, "learning_rate": 9.48286282291763e-05, "loss": 1.1034, "step": 45740 }, { "epoch": 0.2922837100545596, "grad_norm": 1.1515041589736938, "learning_rate": 9.482640568647006e-05, "loss": 1.1599, "step": 45750 }, { "epoch": 0.2923475972042983, "grad_norm": 0.5701981782913208, "learning_rate": 9.482418269232311e-05, "loss": 0.8986, "step": 45760 }, { "epoch": 0.292411484354037, "grad_norm": 1.5599360466003418, "learning_rate": 9.482195924675789e-05, "loss": 1.0177, "step": 45770 }, { "epoch": 0.2924753715037757, "grad_norm": 0.7682929635047913, "learning_rate": 9.481973534979674e-05, "loss": 1.104, "step": 45780 }, { "epoch": 0.29253925865351443, "grad_norm": 0.5851641297340393, "learning_rate": 9.481751100146209e-05, "loss": 1.0031, "step": 45790 }, { "epoch": 0.29260314580325314, "grad_norm": 0.8387541174888611, "learning_rate": 9.481528620177633e-05, "loss": 0.868, "step": 45800 }, { "epoch": 0.29266703295299185, "grad_norm": 0.9967763423919678, "learning_rate": 9.481306095076188e-05, "loss": 0.7826, "step": 45810 }, { "epoch": 0.29273092010273055, "grad_norm": 0.5963412523269653, "learning_rate": 9.481083524844113e-05, "loss": 0.8424, "step": 45820 }, { "epoch": 0.29279480725246926, "grad_norm": 0.5736973285675049, "learning_rate": 9.480860909483649e-05, "loss": 0.8342, "step": 45830 }, { "epoch": 0.29285869440220796, "grad_norm": 1.3335403203964233, "learning_rate": 9.480638248997039e-05, "loss": 1.086, "step": 45840 }, { "epoch": 0.2929225815519466, "grad_norm": 0.5996566414833069, "learning_rate": 9.480415543386528e-05, "loss": 0.8788, "step": 45850 }, { "epoch": 0.2929864687016853, "grad_norm": 0.8202914595603943, "learning_rate": 9.480192792654355e-05, "loss": 0.9448, "step": 45860 }, { "epoch": 0.293050355851424, "grad_norm": 1.0648213624954224, "learning_rate": 9.479969996802763e-05, "loss": 1.1049, "step": 45870 }, { "epoch": 0.29311424300116273, "grad_norm": 0.8735106587409973, "learning_rate": 9.479747155833999e-05, "loss": 0.7924, "step": 45880 }, { "epoch": 0.29317813015090144, "grad_norm": 0.7611486315727234, "learning_rate": 9.479524269750306e-05, "loss": 0.7604, "step": 45890 }, { "epoch": 0.29324201730064015, "grad_norm": 1.0947805643081665, "learning_rate": 9.479301338553927e-05, "loss": 0.8645, "step": 45900 }, { "epoch": 0.29330590445037885, "grad_norm": 1.0103657245635986, "learning_rate": 9.479078362247109e-05, "loss": 1.0796, "step": 45910 }, { "epoch": 0.29336979160011756, "grad_norm": 0.4850558042526245, "learning_rate": 9.478855340832097e-05, "loss": 0.9143, "step": 45920 }, { "epoch": 0.29343367874985626, "grad_norm": 1.094140648841858, "learning_rate": 9.478632274311137e-05, "loss": 0.8126, "step": 45930 }, { "epoch": 0.29349756589959497, "grad_norm": 0.43484973907470703, "learning_rate": 9.478409162686475e-05, "loss": 0.8784, "step": 45940 }, { "epoch": 0.2935614530493337, "grad_norm": 0.7616863250732422, "learning_rate": 9.478186005960359e-05, "loss": 0.9753, "step": 45950 }, { "epoch": 0.2936253401990724, "grad_norm": 0.625148594379425, "learning_rate": 9.477962804135037e-05, "loss": 0.7565, "step": 45960 }, { "epoch": 0.29368922734881103, "grad_norm": 0.5480995178222656, "learning_rate": 9.477739557212753e-05, "loss": 0.9026, "step": 45970 }, { "epoch": 0.29375311449854974, "grad_norm": 2.2931747436523438, "learning_rate": 9.477516265195759e-05, "loss": 0.7589, "step": 45980 }, { "epoch": 0.29381700164828845, "grad_norm": 0.734191358089447, "learning_rate": 9.477292928086303e-05, "loss": 0.9262, "step": 45990 }, { "epoch": 0.29388088879802715, "grad_norm": 0.8451043367385864, "learning_rate": 9.477069545886633e-05, "loss": 0.9716, "step": 46000 }, { "epoch": 0.29394477594776586, "grad_norm": 0.7809146046638489, "learning_rate": 9.476846118599e-05, "loss": 1.0449, "step": 46010 }, { "epoch": 0.29400866309750456, "grad_norm": 0.930077314376831, "learning_rate": 9.476622646225653e-05, "loss": 0.7448, "step": 46020 }, { "epoch": 0.29407255024724327, "grad_norm": 0.7774382829666138, "learning_rate": 9.476399128768845e-05, "loss": 0.9692, "step": 46030 }, { "epoch": 0.294136437396982, "grad_norm": 0.678877592086792, "learning_rate": 9.476175566230822e-05, "loss": 0.8851, "step": 46040 }, { "epoch": 0.2942003245467207, "grad_norm": 0.7753483653068542, "learning_rate": 9.475951958613842e-05, "loss": 0.7935, "step": 46050 }, { "epoch": 0.2942642116964594, "grad_norm": 1.3318039178848267, "learning_rate": 9.475728305920151e-05, "loss": 0.9516, "step": 46060 }, { "epoch": 0.2943280988461981, "grad_norm": 3.3444128036499023, "learning_rate": 9.475504608152005e-05, "loss": 0.9883, "step": 46070 }, { "epoch": 0.2943919859959368, "grad_norm": 1.5764853954315186, "learning_rate": 9.475280865311656e-05, "loss": 0.8666, "step": 46080 }, { "epoch": 0.2944558731456755, "grad_norm": 1.4029241800308228, "learning_rate": 9.475057077401356e-05, "loss": 0.9805, "step": 46090 }, { "epoch": 0.29451976029541416, "grad_norm": 0.5334384441375732, "learning_rate": 9.47483324442336e-05, "loss": 0.8014, "step": 46100 }, { "epoch": 0.29458364744515286, "grad_norm": 0.8421732783317566, "learning_rate": 9.474609366379923e-05, "loss": 0.8272, "step": 46110 }, { "epoch": 0.29464753459489157, "grad_norm": 0.5735695362091064, "learning_rate": 9.474385443273296e-05, "loss": 0.9271, "step": 46120 }, { "epoch": 0.2947114217446303, "grad_norm": 0.8526939749717712, "learning_rate": 9.47416147510574e-05, "loss": 0.9938, "step": 46130 }, { "epoch": 0.294775308894369, "grad_norm": 0.6834962964057922, "learning_rate": 9.473937461879505e-05, "loss": 1.1168, "step": 46140 }, { "epoch": 0.2948391960441077, "grad_norm": 1.1148639917373657, "learning_rate": 9.47371340359685e-05, "loss": 0.9541, "step": 46150 }, { "epoch": 0.2949030831938464, "grad_norm": 0.8598116040229797, "learning_rate": 9.47348930026003e-05, "loss": 0.9374, "step": 46160 }, { "epoch": 0.2949669703435851, "grad_norm": 0.6423646211624146, "learning_rate": 9.473265151871304e-05, "loss": 0.8231, "step": 46170 }, { "epoch": 0.2950308574933238, "grad_norm": 2.0000832080841064, "learning_rate": 9.473040958432927e-05, "loss": 0.8936, "step": 46180 }, { "epoch": 0.2950947446430625, "grad_norm": 1.143376350402832, "learning_rate": 9.472816719947159e-05, "loss": 0.6661, "step": 46190 }, { "epoch": 0.2951586317928012, "grad_norm": 0.7327792048454285, "learning_rate": 9.472592436416255e-05, "loss": 0.8819, "step": 46200 }, { "epoch": 0.2952225189425399, "grad_norm": 0.8125030994415283, "learning_rate": 9.472368107842477e-05, "loss": 0.9795, "step": 46210 }, { "epoch": 0.2952864060922786, "grad_norm": 0.8501039743423462, "learning_rate": 9.472143734228083e-05, "loss": 1.1246, "step": 46220 }, { "epoch": 0.2953502932420173, "grad_norm": 0.4900776743888855, "learning_rate": 9.471919315575333e-05, "loss": 0.6896, "step": 46230 }, { "epoch": 0.295414180391756, "grad_norm": 1.3538086414337158, "learning_rate": 9.471694851886487e-05, "loss": 0.961, "step": 46240 }, { "epoch": 0.2954780675414947, "grad_norm": 0.9380719661712646, "learning_rate": 9.471470343163804e-05, "loss": 1.1836, "step": 46250 }, { "epoch": 0.2955419546912334, "grad_norm": 0.9986345767974854, "learning_rate": 9.471245789409548e-05, "loss": 0.8949, "step": 46260 }, { "epoch": 0.2956058418409721, "grad_norm": 0.35391414165496826, "learning_rate": 9.471021190625977e-05, "loss": 0.8161, "step": 46270 }, { "epoch": 0.2956697289907108, "grad_norm": 0.7981874942779541, "learning_rate": 9.470796546815354e-05, "loss": 0.9282, "step": 46280 }, { "epoch": 0.2957336161404495, "grad_norm": 0.6027029752731323, "learning_rate": 9.470571857979945e-05, "loss": 0.9214, "step": 46290 }, { "epoch": 0.2957975032901882, "grad_norm": 0.973746657371521, "learning_rate": 9.470347124122008e-05, "loss": 0.8735, "step": 46300 }, { "epoch": 0.29586139043992693, "grad_norm": 0.944004476070404, "learning_rate": 9.470122345243809e-05, "loss": 0.9898, "step": 46310 }, { "epoch": 0.29592527758966564, "grad_norm": 0.9042976498603821, "learning_rate": 9.469897521347609e-05, "loss": 1.0455, "step": 46320 }, { "epoch": 0.29598916473940434, "grad_norm": 0.7813184857368469, "learning_rate": 9.469672652435675e-05, "loss": 0.879, "step": 46330 }, { "epoch": 0.296053051889143, "grad_norm": 1.1560348272323608, "learning_rate": 9.469447738510269e-05, "loss": 0.8168, "step": 46340 }, { "epoch": 0.2961169390388817, "grad_norm": 0.8251795768737793, "learning_rate": 9.46922277957366e-05, "loss": 0.9762, "step": 46350 }, { "epoch": 0.2961808261886204, "grad_norm": 1.086754560470581, "learning_rate": 9.46899777562811e-05, "loss": 0.9877, "step": 46360 }, { "epoch": 0.2962447133383591, "grad_norm": 1.2580642700195312, "learning_rate": 9.468772726675887e-05, "loss": 0.808, "step": 46370 }, { "epoch": 0.2963086004880978, "grad_norm": 0.946445107460022, "learning_rate": 9.468547632719255e-05, "loss": 0.7862, "step": 46380 }, { "epoch": 0.2963724876378365, "grad_norm": 1.1934231519699097, "learning_rate": 9.468322493760484e-05, "loss": 1.1795, "step": 46390 }, { "epoch": 0.29643637478757523, "grad_norm": 0.9049299955368042, "learning_rate": 9.46809730980184e-05, "loss": 0.8541, "step": 46400 }, { "epoch": 0.29650026193731394, "grad_norm": 1.3336893320083618, "learning_rate": 9.46787208084559e-05, "loss": 1.0398, "step": 46410 }, { "epoch": 0.29656414908705264, "grad_norm": 0.9916601181030273, "learning_rate": 9.467646806894001e-05, "loss": 0.8907, "step": 46420 }, { "epoch": 0.29662803623679135, "grad_norm": 0.9866839051246643, "learning_rate": 9.467421487949347e-05, "loss": 1.1556, "step": 46430 }, { "epoch": 0.29669192338653005, "grad_norm": 0.7323521971702576, "learning_rate": 9.467196124013893e-05, "loss": 0.9656, "step": 46440 }, { "epoch": 0.29675581053626876, "grad_norm": 1.1069689989089966, "learning_rate": 9.466970715089907e-05, "loss": 0.6297, "step": 46450 }, { "epoch": 0.2968196976860074, "grad_norm": 0.5628019571304321, "learning_rate": 9.466745261179664e-05, "loss": 0.806, "step": 46460 }, { "epoch": 0.2968835848357461, "grad_norm": 1.0032429695129395, "learning_rate": 9.466519762285431e-05, "loss": 0.9214, "step": 46470 }, { "epoch": 0.2969474719854848, "grad_norm": 0.7182255983352661, "learning_rate": 9.466294218409479e-05, "loss": 0.9303, "step": 46480 }, { "epoch": 0.29701135913522353, "grad_norm": 1.8324652910232544, "learning_rate": 9.466068629554082e-05, "loss": 1.1856, "step": 46490 }, { "epoch": 0.29707524628496224, "grad_norm": 0.7303147315979004, "learning_rate": 9.46584299572151e-05, "loss": 0.7481, "step": 46500 }, { "epoch": 0.29713913343470094, "grad_norm": 2.1872732639312744, "learning_rate": 9.465617316914033e-05, "loss": 0.7029, "step": 46510 }, { "epoch": 0.29720302058443965, "grad_norm": 1.0133579969406128, "learning_rate": 9.46539159313393e-05, "loss": 0.8906, "step": 46520 }, { "epoch": 0.29726690773417835, "grad_norm": 0.7005990147590637, "learning_rate": 9.465165824383468e-05, "loss": 0.725, "step": 46530 }, { "epoch": 0.29733079488391706, "grad_norm": 1.0312696695327759, "learning_rate": 9.464940010664925e-05, "loss": 0.9634, "step": 46540 }, { "epoch": 0.29739468203365577, "grad_norm": 0.36605343222618103, "learning_rate": 9.464714151980571e-05, "loss": 0.7082, "step": 46550 }, { "epoch": 0.2974585691833945, "grad_norm": 1.1739599704742432, "learning_rate": 9.464488248332685e-05, "loss": 1.4234, "step": 46560 }, { "epoch": 0.2975224563331332, "grad_norm": 0.8871263265609741, "learning_rate": 9.464262299723539e-05, "loss": 0.7826, "step": 46570 }, { "epoch": 0.29758634348287183, "grad_norm": 0.652490496635437, "learning_rate": 9.46403630615541e-05, "loss": 0.9345, "step": 46580 }, { "epoch": 0.29765023063261054, "grad_norm": 0.8714577555656433, "learning_rate": 9.463810267630573e-05, "loss": 1.096, "step": 46590 }, { "epoch": 0.29771411778234924, "grad_norm": 0.48764970898628235, "learning_rate": 9.463584184151305e-05, "loss": 0.9833, "step": 46600 }, { "epoch": 0.29777800493208795, "grad_norm": 0.5805774331092834, "learning_rate": 9.463358055719883e-05, "loss": 0.6249, "step": 46610 }, { "epoch": 0.29784189208182665, "grad_norm": 1.5289901494979858, "learning_rate": 9.463131882338583e-05, "loss": 1.0411, "step": 46620 }, { "epoch": 0.29790577923156536, "grad_norm": 0.9983165264129639, "learning_rate": 9.462905664009685e-05, "loss": 0.9297, "step": 46630 }, { "epoch": 0.29796966638130407, "grad_norm": 0.5943264961242676, "learning_rate": 9.462679400735466e-05, "loss": 1.0207, "step": 46640 }, { "epoch": 0.29803355353104277, "grad_norm": 0.9419231414794922, "learning_rate": 9.462453092518204e-05, "loss": 0.8072, "step": 46650 }, { "epoch": 0.2980974406807815, "grad_norm": 0.6155195832252502, "learning_rate": 9.46222673936018e-05, "loss": 0.7672, "step": 46660 }, { "epoch": 0.2981613278305202, "grad_norm": 0.5580214858055115, "learning_rate": 9.462000341263671e-05, "loss": 0.9204, "step": 46670 }, { "epoch": 0.2982252149802589, "grad_norm": 1.5612927675247192, "learning_rate": 9.46177389823096e-05, "loss": 1.2065, "step": 46680 }, { "epoch": 0.2982891021299976, "grad_norm": 1.112136960029602, "learning_rate": 9.461547410264324e-05, "loss": 0.7437, "step": 46690 }, { "epoch": 0.29835298927973625, "grad_norm": 0.646933913230896, "learning_rate": 9.461320877366047e-05, "loss": 0.8512, "step": 46700 }, { "epoch": 0.29841687642947495, "grad_norm": 0.7455711960792542, "learning_rate": 9.461094299538408e-05, "loss": 1.0632, "step": 46710 }, { "epoch": 0.29848076357921366, "grad_norm": 1.7939437627792358, "learning_rate": 9.460867676783691e-05, "loss": 0.9103, "step": 46720 }, { "epoch": 0.29854465072895237, "grad_norm": 0.8458738923072815, "learning_rate": 9.460641009104177e-05, "loss": 0.8318, "step": 46730 }, { "epoch": 0.29860853787869107, "grad_norm": 1.1365669965744019, "learning_rate": 9.460414296502149e-05, "loss": 0.94, "step": 46740 }, { "epoch": 0.2986724250284298, "grad_norm": 0.8920236229896545, "learning_rate": 9.46018753897989e-05, "loss": 0.8648, "step": 46750 }, { "epoch": 0.2987363121781685, "grad_norm": 1.0041251182556152, "learning_rate": 9.459960736539683e-05, "loss": 0.6963, "step": 46760 }, { "epoch": 0.2988001993279072, "grad_norm": 0.6039364337921143, "learning_rate": 9.459733889183815e-05, "loss": 0.8719, "step": 46770 }, { "epoch": 0.2988640864776459, "grad_norm": 1.0676556825637817, "learning_rate": 9.459506996914568e-05, "loss": 0.7705, "step": 46780 }, { "epoch": 0.2989279736273846, "grad_norm": 1.1080639362335205, "learning_rate": 9.459280059734226e-05, "loss": 0.9965, "step": 46790 }, { "epoch": 0.2989918607771233, "grad_norm": 1.0551854372024536, "learning_rate": 9.459053077645077e-05, "loss": 0.8556, "step": 46800 }, { "epoch": 0.299055747926862, "grad_norm": 0.8783060908317566, "learning_rate": 9.458826050649407e-05, "loss": 1.329, "step": 46810 }, { "epoch": 0.29911963507660067, "grad_norm": 0.8185521960258484, "learning_rate": 9.4585989787495e-05, "loss": 1.0131, "step": 46820 }, { "epoch": 0.29918352222633937, "grad_norm": 0.5932868719100952, "learning_rate": 9.458371861947645e-05, "loss": 1.0617, "step": 46830 }, { "epoch": 0.2992474093760781, "grad_norm": 1.5922162532806396, "learning_rate": 9.458144700246127e-05, "loss": 0.8565, "step": 46840 }, { "epoch": 0.2993112965258168, "grad_norm": 0.603920042514801, "learning_rate": 9.457917493647235e-05, "loss": 0.7725, "step": 46850 }, { "epoch": 0.2993751836755555, "grad_norm": 0.9906972646713257, "learning_rate": 9.457690242153258e-05, "loss": 1.0442, "step": 46860 }, { "epoch": 0.2994390708252942, "grad_norm": 0.756675124168396, "learning_rate": 9.457462945766484e-05, "loss": 0.8007, "step": 46870 }, { "epoch": 0.2995029579750329, "grad_norm": 0.6027681827545166, "learning_rate": 9.4572356044892e-05, "loss": 1.1144, "step": 46880 }, { "epoch": 0.2995668451247716, "grad_norm": 0.542198896408081, "learning_rate": 9.457008218323699e-05, "loss": 0.9496, "step": 46890 }, { "epoch": 0.2996307322745103, "grad_norm": 1.004642367362976, "learning_rate": 9.45678078727227e-05, "loss": 0.8916, "step": 46900 }, { "epoch": 0.299694619424249, "grad_norm": 0.5443822741508484, "learning_rate": 9.456553311337202e-05, "loss": 1.0492, "step": 46910 }, { "epoch": 0.2997585065739877, "grad_norm": 0.562498927116394, "learning_rate": 9.456325790520789e-05, "loss": 1.0578, "step": 46920 }, { "epoch": 0.29982239372372643, "grad_norm": 0.7859065532684326, "learning_rate": 9.456098224825316e-05, "loss": 1.046, "step": 46930 }, { "epoch": 0.29988628087346514, "grad_norm": 0.7627017498016357, "learning_rate": 9.455870614253081e-05, "loss": 0.8198, "step": 46940 }, { "epoch": 0.2999501680232038, "grad_norm": 0.6485910415649414, "learning_rate": 9.455642958806374e-05, "loss": 0.9887, "step": 46950 }, { "epoch": 0.3000140551729425, "grad_norm": 1.4447276592254639, "learning_rate": 9.455415258487487e-05, "loss": 0.7989, "step": 46960 }, { "epoch": 0.3000779423226812, "grad_norm": 0.9059609770774841, "learning_rate": 9.455187513298714e-05, "loss": 0.9545, "step": 46970 }, { "epoch": 0.3001418294724199, "grad_norm": 1.1355173587799072, "learning_rate": 9.454959723242349e-05, "loss": 0.825, "step": 46980 }, { "epoch": 0.3002057166221586, "grad_norm": 1.0973711013793945, "learning_rate": 9.454731888320684e-05, "loss": 0.9209, "step": 46990 }, { "epoch": 0.3002696037718973, "grad_norm": 0.9574286341667175, "learning_rate": 9.454504008536017e-05, "loss": 0.8564, "step": 47000 }, { "epoch": 0.300333490921636, "grad_norm": 1.527851939201355, "learning_rate": 9.454276083890641e-05, "loss": 1.3292, "step": 47010 }, { "epoch": 0.30039737807137473, "grad_norm": 0.8139092326164246, "learning_rate": 9.454048114386848e-05, "loss": 0.8496, "step": 47020 }, { "epoch": 0.30046126522111344, "grad_norm": 1.5546993017196655, "learning_rate": 9.453820100026942e-05, "loss": 1.1378, "step": 47030 }, { "epoch": 0.30052515237085214, "grad_norm": 0.7080782055854797, "learning_rate": 9.45359204081321e-05, "loss": 0.7672, "step": 47040 }, { "epoch": 0.30058903952059085, "grad_norm": 0.6548307538032532, "learning_rate": 9.453363936747957e-05, "loss": 1.0312, "step": 47050 }, { "epoch": 0.30065292667032956, "grad_norm": 0.640304684638977, "learning_rate": 9.453135787833473e-05, "loss": 0.9947, "step": 47060 }, { "epoch": 0.3007168138200682, "grad_norm": 0.9930755496025085, "learning_rate": 9.452907594072062e-05, "loss": 0.7912, "step": 47070 }, { "epoch": 0.3007807009698069, "grad_norm": 0.8189347386360168, "learning_rate": 9.452679355466018e-05, "loss": 1.1204, "step": 47080 }, { "epoch": 0.3008445881195456, "grad_norm": 0.8146405220031738, "learning_rate": 9.45245107201764e-05, "loss": 1.0947, "step": 47090 }, { "epoch": 0.3009084752692843, "grad_norm": 0.9201721549034119, "learning_rate": 9.45222274372923e-05, "loss": 0.7746, "step": 47100 }, { "epoch": 0.30097236241902303, "grad_norm": 0.7247973680496216, "learning_rate": 9.451994370603084e-05, "loss": 0.8073, "step": 47110 }, { "epoch": 0.30103624956876174, "grad_norm": 3.4702842235565186, "learning_rate": 9.451765952641502e-05, "loss": 0.8341, "step": 47120 }, { "epoch": 0.30110013671850044, "grad_norm": 2.2234277725219727, "learning_rate": 9.451537489846787e-05, "loss": 0.8992, "step": 47130 }, { "epoch": 0.30116402386823915, "grad_norm": 0.8516297340393066, "learning_rate": 9.451308982221238e-05, "loss": 1.1016, "step": 47140 }, { "epoch": 0.30122791101797786, "grad_norm": 0.6456612348556519, "learning_rate": 9.451080429767157e-05, "loss": 0.878, "step": 47150 }, { "epoch": 0.30129179816771656, "grad_norm": 1.235134482383728, "learning_rate": 9.450851832486844e-05, "loss": 0.8274, "step": 47160 }, { "epoch": 0.30135568531745527, "grad_norm": 1.2965903282165527, "learning_rate": 9.450623190382604e-05, "loss": 1.0011, "step": 47170 }, { "epoch": 0.301419572467194, "grad_norm": 0.6325692534446716, "learning_rate": 9.450394503456739e-05, "loss": 0.8392, "step": 47180 }, { "epoch": 0.3014834596169326, "grad_norm": 0.9124320149421692, "learning_rate": 9.45016577171155e-05, "loss": 1.0633, "step": 47190 }, { "epoch": 0.30154734676667133, "grad_norm": 0.5959859490394592, "learning_rate": 9.44993699514934e-05, "loss": 0.993, "step": 47200 }, { "epoch": 0.30161123391641004, "grad_norm": 0.5984769463539124, "learning_rate": 9.449708173772417e-05, "loss": 0.8204, "step": 47210 }, { "epoch": 0.30167512106614874, "grad_norm": 1.2055346965789795, "learning_rate": 9.449479307583082e-05, "loss": 1.1527, "step": 47220 }, { "epoch": 0.30173900821588745, "grad_norm": 0.8771409392356873, "learning_rate": 9.449250396583642e-05, "loss": 0.7836, "step": 47230 }, { "epoch": 0.30180289536562616, "grad_norm": 1.1012285947799683, "learning_rate": 9.4490214407764e-05, "loss": 0.8842, "step": 47240 }, { "epoch": 0.30186678251536486, "grad_norm": 2.031371831893921, "learning_rate": 9.448792440163664e-05, "loss": 0.8747, "step": 47250 }, { "epoch": 0.30193066966510357, "grad_norm": 1.0956002473831177, "learning_rate": 9.44856339474774e-05, "loss": 0.9346, "step": 47260 }, { "epoch": 0.3019945568148423, "grad_norm": 1.060286521911621, "learning_rate": 9.448334304530932e-05, "loss": 0.9462, "step": 47270 }, { "epoch": 0.302058443964581, "grad_norm": 0.9044703841209412, "learning_rate": 9.448105169515551e-05, "loss": 0.8297, "step": 47280 }, { "epoch": 0.3021223311143197, "grad_norm": 0.7445279955863953, "learning_rate": 9.447875989703902e-05, "loss": 0.9671, "step": 47290 }, { "epoch": 0.3021862182640584, "grad_norm": 0.9023739099502563, "learning_rate": 9.447646765098294e-05, "loss": 0.9307, "step": 47300 }, { "epoch": 0.30225010541379704, "grad_norm": 1.161218523979187, "learning_rate": 9.447417495701036e-05, "loss": 0.8708, "step": 47310 }, { "epoch": 0.30231399256353575, "grad_norm": 0.9403544068336487, "learning_rate": 9.447188181514437e-05, "loss": 1.0087, "step": 47320 }, { "epoch": 0.30237787971327446, "grad_norm": 1.1664180755615234, "learning_rate": 9.446958822540803e-05, "loss": 0.9059, "step": 47330 }, { "epoch": 0.30244176686301316, "grad_norm": 1.02223539352417, "learning_rate": 9.446729418782448e-05, "loss": 1.0916, "step": 47340 }, { "epoch": 0.30250565401275187, "grad_norm": 0.7959775924682617, "learning_rate": 9.446499970241682e-05, "loss": 0.9342, "step": 47350 }, { "epoch": 0.3025695411624906, "grad_norm": 0.938345730304718, "learning_rate": 9.446270476920813e-05, "loss": 1.0355, "step": 47360 }, { "epoch": 0.3026334283122293, "grad_norm": 0.8003197908401489, "learning_rate": 9.446040938822154e-05, "loss": 0.6568, "step": 47370 }, { "epoch": 0.302697315461968, "grad_norm": 1.0039125680923462, "learning_rate": 9.445811355948016e-05, "loss": 0.7738, "step": 47380 }, { "epoch": 0.3027612026117067, "grad_norm": 0.7357406616210938, "learning_rate": 9.44558172830071e-05, "loss": 0.9009, "step": 47390 }, { "epoch": 0.3028250897614454, "grad_norm": 0.9000012874603271, "learning_rate": 9.445352055882552e-05, "loss": 0.6797, "step": 47400 }, { "epoch": 0.3028889769111841, "grad_norm": 0.7020642161369324, "learning_rate": 9.445122338695853e-05, "loss": 1.0587, "step": 47410 }, { "epoch": 0.3029528640609228, "grad_norm": 0.9596700668334961, "learning_rate": 9.444892576742927e-05, "loss": 0.8238, "step": 47420 }, { "epoch": 0.30301675121066146, "grad_norm": 0.9670388698577881, "learning_rate": 9.444662770026087e-05, "loss": 1.0262, "step": 47430 }, { "epoch": 0.30308063836040017, "grad_norm": 0.6952800154685974, "learning_rate": 9.444432918547648e-05, "loss": 0.6862, "step": 47440 }, { "epoch": 0.3031445255101389, "grad_norm": 1.1551501750946045, "learning_rate": 9.444203022309923e-05, "loss": 0.8036, "step": 47450 }, { "epoch": 0.3032084126598776, "grad_norm": 0.7595165371894836, "learning_rate": 9.44397308131523e-05, "loss": 0.7412, "step": 47460 }, { "epoch": 0.3032722998096163, "grad_norm": 1.8203938007354736, "learning_rate": 9.443743095565882e-05, "loss": 0.8287, "step": 47470 }, { "epoch": 0.303336186959355, "grad_norm": 0.9286074042320251, "learning_rate": 9.443513065064198e-05, "loss": 0.7826, "step": 47480 }, { "epoch": 0.3034000741090937, "grad_norm": 0.6284856200218201, "learning_rate": 9.443282989812495e-05, "loss": 0.8863, "step": 47490 }, { "epoch": 0.3034639612588324, "grad_norm": 0.5591778755187988, "learning_rate": 9.443052869813085e-05, "loss": 0.8473, "step": 47500 }, { "epoch": 0.3035278484085711, "grad_norm": 0.854895293712616, "learning_rate": 9.44282270506829e-05, "loss": 0.9199, "step": 47510 }, { "epoch": 0.3035917355583098, "grad_norm": 0.6344938278198242, "learning_rate": 9.442592495580427e-05, "loss": 0.8679, "step": 47520 }, { "epoch": 0.3036556227080485, "grad_norm": 0.9397995471954346, "learning_rate": 9.442362241351815e-05, "loss": 0.8879, "step": 47530 }, { "epoch": 0.30371950985778723, "grad_norm": 0.9806991219520569, "learning_rate": 9.442131942384769e-05, "loss": 1.0208, "step": 47540 }, { "epoch": 0.3037833970075259, "grad_norm": 0.7757532000541687, "learning_rate": 9.441901598681615e-05, "loss": 1.0113, "step": 47550 }, { "epoch": 0.3038472841572646, "grad_norm": 1.0031111240386963, "learning_rate": 9.441671210244667e-05, "loss": 0.8251, "step": 47560 }, { "epoch": 0.3039111713070033, "grad_norm": 0.7999134659767151, "learning_rate": 9.441440777076248e-05, "loss": 0.836, "step": 47570 }, { "epoch": 0.303975058456742, "grad_norm": 1.0625855922698975, "learning_rate": 9.441210299178677e-05, "loss": 0.87, "step": 47580 }, { "epoch": 0.3040389456064807, "grad_norm": 1.3165303468704224, "learning_rate": 9.440979776554278e-05, "loss": 0.7388, "step": 47590 }, { "epoch": 0.3041028327562194, "grad_norm": 1.166496992111206, "learning_rate": 9.44074920920537e-05, "loss": 0.7745, "step": 47600 }, { "epoch": 0.3041667199059581, "grad_norm": 0.9159298539161682, "learning_rate": 9.440518597134275e-05, "loss": 0.932, "step": 47610 }, { "epoch": 0.3042306070556968, "grad_norm": 1.0953913927078247, "learning_rate": 9.440287940343317e-05, "loss": 0.9399, "step": 47620 }, { "epoch": 0.30429449420543553, "grad_norm": 0.7524245977401733, "learning_rate": 9.440057238834816e-05, "loss": 0.7907, "step": 47630 }, { "epoch": 0.30435838135517423, "grad_norm": 0.6022171974182129, "learning_rate": 9.4398264926111e-05, "loss": 0.9259, "step": 47640 }, { "epoch": 0.30442226850491294, "grad_norm": 0.8347755074501038, "learning_rate": 9.439595701674488e-05, "loss": 1.0379, "step": 47650 }, { "epoch": 0.30448615565465165, "grad_norm": 0.7987663149833679, "learning_rate": 9.43936486602731e-05, "loss": 0.926, "step": 47660 }, { "epoch": 0.3045500428043903, "grad_norm": 0.762823760509491, "learning_rate": 9.439133985671884e-05, "loss": 1.031, "step": 47670 }, { "epoch": 0.304613929954129, "grad_norm": 5.046191692352295, "learning_rate": 9.438903060610539e-05, "loss": 0.8839, "step": 47680 }, { "epoch": 0.3046778171038677, "grad_norm": 0.8193703293800354, "learning_rate": 9.438672090845599e-05, "loss": 0.9656, "step": 47690 }, { "epoch": 0.3047417042536064, "grad_norm": 1.1891075372695923, "learning_rate": 9.438441076379395e-05, "loss": 0.8131, "step": 47700 }, { "epoch": 0.3048055914033451, "grad_norm": 0.6901410222053528, "learning_rate": 9.438210017214245e-05, "loss": 0.9052, "step": 47710 }, { "epoch": 0.30486947855308383, "grad_norm": 0.8975858092308044, "learning_rate": 9.437978913352483e-05, "loss": 0.9107, "step": 47720 }, { "epoch": 0.30493336570282253, "grad_norm": 0.71076500415802, "learning_rate": 9.437747764796432e-05, "loss": 0.9106, "step": 47730 }, { "epoch": 0.30499725285256124, "grad_norm": 0.6818621158599854, "learning_rate": 9.437516571548424e-05, "loss": 1.0038, "step": 47740 }, { "epoch": 0.30506114000229995, "grad_norm": 1.3536254167556763, "learning_rate": 9.437285333610784e-05, "loss": 1.0431, "step": 47750 }, { "epoch": 0.30512502715203865, "grad_norm": 0.7278540730476379, "learning_rate": 9.437054050985842e-05, "loss": 0.67, "step": 47760 }, { "epoch": 0.30518891430177736, "grad_norm": 0.8322495222091675, "learning_rate": 9.436822723675926e-05, "loss": 0.8593, "step": 47770 }, { "epoch": 0.30525280145151606, "grad_norm": 0.8993383646011353, "learning_rate": 9.436591351683368e-05, "loss": 0.9672, "step": 47780 }, { "epoch": 0.30531668860125477, "grad_norm": 0.9851189851760864, "learning_rate": 9.436359935010498e-05, "loss": 0.775, "step": 47790 }, { "epoch": 0.3053805757509934, "grad_norm": 0.7736380696296692, "learning_rate": 9.436128473659644e-05, "loss": 0.838, "step": 47800 }, { "epoch": 0.3054444629007321, "grad_norm": 0.7815120816230774, "learning_rate": 9.43589696763314e-05, "loss": 0.6829, "step": 47810 }, { "epoch": 0.30550835005047083, "grad_norm": 0.9725918769836426, "learning_rate": 9.435665416933315e-05, "loss": 0.7912, "step": 47820 }, { "epoch": 0.30557223720020954, "grad_norm": 0.9616202712059021, "learning_rate": 9.4354338215625e-05, "loss": 0.8527, "step": 47830 }, { "epoch": 0.30563612434994825, "grad_norm": 0.7812166810035706, "learning_rate": 9.435202181523031e-05, "loss": 0.5296, "step": 47840 }, { "epoch": 0.30570001149968695, "grad_norm": 0.5943650603294373, "learning_rate": 9.43497049681724e-05, "loss": 0.982, "step": 47850 }, { "epoch": 0.30576389864942566, "grad_norm": 1.0416995286941528, "learning_rate": 9.434738767447458e-05, "loss": 1.0534, "step": 47860 }, { "epoch": 0.30582778579916436, "grad_norm": 0.5415847301483154, "learning_rate": 9.43450699341602e-05, "loss": 0.939, "step": 47870 }, { "epoch": 0.30589167294890307, "grad_norm": 1.0292586088180542, "learning_rate": 9.43427517472526e-05, "loss": 1.0046, "step": 47880 }, { "epoch": 0.3059555600986418, "grad_norm": 0.9215097427368164, "learning_rate": 9.434043311377513e-05, "loss": 0.977, "step": 47890 }, { "epoch": 0.3060194472483805, "grad_norm": 0.45443835854530334, "learning_rate": 9.433811403375114e-05, "loss": 0.8409, "step": 47900 }, { "epoch": 0.3060833343981192, "grad_norm": 1.0631471872329712, "learning_rate": 9.433579450720398e-05, "loss": 0.9141, "step": 47910 }, { "epoch": 0.30614722154785784, "grad_norm": 1.0696526765823364, "learning_rate": 9.433347453415702e-05, "loss": 1.0058, "step": 47920 }, { "epoch": 0.30621110869759655, "grad_norm": 1.055720329284668, "learning_rate": 9.433115411463361e-05, "loss": 0.7988, "step": 47930 }, { "epoch": 0.30627499584733525, "grad_norm": 0.7248536944389343, "learning_rate": 9.432883324865713e-05, "loss": 0.9809, "step": 47940 }, { "epoch": 0.30633888299707396, "grad_norm": 0.7505012154579163, "learning_rate": 9.432651193625095e-05, "loss": 1.0935, "step": 47950 }, { "epoch": 0.30640277014681266, "grad_norm": 1.8567789793014526, "learning_rate": 9.432419017743845e-05, "loss": 0.8664, "step": 47960 }, { "epoch": 0.30646665729655137, "grad_norm": 0.6086595058441162, "learning_rate": 9.432186797224301e-05, "loss": 1.1033, "step": 47970 }, { "epoch": 0.3065305444462901, "grad_norm": 1.0013806819915771, "learning_rate": 9.431954532068801e-05, "loss": 0.8595, "step": 47980 }, { "epoch": 0.3065944315960288, "grad_norm": 0.9920240640640259, "learning_rate": 9.431722222279684e-05, "loss": 0.9719, "step": 47990 }, { "epoch": 0.3066583187457675, "grad_norm": 1.207126498222351, "learning_rate": 9.43148986785929e-05, "loss": 0.8834, "step": 48000 }, { "epoch": 0.3067222058955062, "grad_norm": 0.6420753598213196, "learning_rate": 9.431257468809961e-05, "loss": 1.1191, "step": 48010 }, { "epoch": 0.3067860930452449, "grad_norm": 0.9169009327888489, "learning_rate": 9.431025025134036e-05, "loss": 1.0277, "step": 48020 }, { "epoch": 0.3068499801949836, "grad_norm": 1.1910425424575806, "learning_rate": 9.430792536833855e-05, "loss": 0.7912, "step": 48030 }, { "epoch": 0.30691386734472226, "grad_norm": 1.0869101285934448, "learning_rate": 9.43056000391176e-05, "loss": 1.1964, "step": 48040 }, { "epoch": 0.30697775449446096, "grad_norm": 0.8634042739868164, "learning_rate": 9.430327426370091e-05, "loss": 0.7144, "step": 48050 }, { "epoch": 0.30704164164419967, "grad_norm": 1.0796973705291748, "learning_rate": 9.430094804211195e-05, "loss": 0.8565, "step": 48060 }, { "epoch": 0.3071055287939384, "grad_norm": 0.871731698513031, "learning_rate": 9.42986213743741e-05, "loss": 0.9328, "step": 48070 }, { "epoch": 0.3071694159436771, "grad_norm": 1.139355182647705, "learning_rate": 9.429629426051081e-05, "loss": 0.8468, "step": 48080 }, { "epoch": 0.3072333030934158, "grad_norm": 0.9230227470397949, "learning_rate": 9.429396670054551e-05, "loss": 1.0648, "step": 48090 }, { "epoch": 0.3072971902431545, "grad_norm": 0.9664996862411499, "learning_rate": 9.429163869450166e-05, "loss": 0.9543, "step": 48100 }, { "epoch": 0.3073610773928932, "grad_norm": 0.7057569026947021, "learning_rate": 9.428931024240267e-05, "loss": 0.978, "step": 48110 }, { "epoch": 0.3074249645426319, "grad_norm": 0.6868560314178467, "learning_rate": 9.428698134427202e-05, "loss": 0.8156, "step": 48120 }, { "epoch": 0.3074888516923706, "grad_norm": 1.3215396404266357, "learning_rate": 9.428465200013317e-05, "loss": 1.176, "step": 48130 }, { "epoch": 0.3075527388421093, "grad_norm": 0.767733633518219, "learning_rate": 9.428232221000954e-05, "loss": 1.0589, "step": 48140 }, { "epoch": 0.307616625991848, "grad_norm": 0.9023085832595825, "learning_rate": 9.427999197392463e-05, "loss": 0.861, "step": 48150 }, { "epoch": 0.3076805131415867, "grad_norm": 0.7275156378746033, "learning_rate": 9.427766129190189e-05, "loss": 0.9598, "step": 48160 }, { "epoch": 0.3077444002913254, "grad_norm": 1.1125576496124268, "learning_rate": 9.427533016396479e-05, "loss": 0.7333, "step": 48170 }, { "epoch": 0.3078082874410641, "grad_norm": 2.008270025253296, "learning_rate": 9.427299859013682e-05, "loss": 1.134, "step": 48180 }, { "epoch": 0.3078721745908028, "grad_norm": 0.5112677216529846, "learning_rate": 9.427066657044144e-05, "loss": 0.7755, "step": 48190 }, { "epoch": 0.3079360617405415, "grad_norm": 0.8975897431373596, "learning_rate": 9.426833410490215e-05, "loss": 0.791, "step": 48200 }, { "epoch": 0.3079999488902802, "grad_norm": 0.7356785535812378, "learning_rate": 9.426600119354245e-05, "loss": 0.8472, "step": 48210 }, { "epoch": 0.3080638360400189, "grad_norm": 0.8324338793754578, "learning_rate": 9.426366783638582e-05, "loss": 0.9064, "step": 48220 }, { "epoch": 0.3081277231897576, "grad_norm": 0.9921901226043701, "learning_rate": 9.426133403345576e-05, "loss": 0.9901, "step": 48230 }, { "epoch": 0.3081916103394963, "grad_norm": 1.4877877235412598, "learning_rate": 9.425899978477577e-05, "loss": 0.7397, "step": 48240 }, { "epoch": 0.30825549748923503, "grad_norm": 1.276802897453308, "learning_rate": 9.425666509036936e-05, "loss": 0.9455, "step": 48250 }, { "epoch": 0.30831938463897374, "grad_norm": 1.7324568033218384, "learning_rate": 9.425432995026005e-05, "loss": 0.8014, "step": 48260 }, { "epoch": 0.30838327178871244, "grad_norm": 1.0053337812423706, "learning_rate": 9.425199436447135e-05, "loss": 1.1581, "step": 48270 }, { "epoch": 0.3084471589384511, "grad_norm": 1.1063930988311768, "learning_rate": 9.424965833302679e-05, "loss": 1.1048, "step": 48280 }, { "epoch": 0.3085110460881898, "grad_norm": 1.1341273784637451, "learning_rate": 9.424732185594989e-05, "loss": 1.0374, "step": 48290 }, { "epoch": 0.3085749332379285, "grad_norm": 0.6320347785949707, "learning_rate": 9.424498493326417e-05, "loss": 0.9549, "step": 48300 }, { "epoch": 0.3086388203876672, "grad_norm": 0.48150819540023804, "learning_rate": 9.424264756499317e-05, "loss": 0.8902, "step": 48310 }, { "epoch": 0.3087027075374059, "grad_norm": 0.6222485899925232, "learning_rate": 9.424030975116045e-05, "loss": 0.8407, "step": 48320 }, { "epoch": 0.3087665946871446, "grad_norm": 0.6241324543952942, "learning_rate": 9.423797149178952e-05, "loss": 0.8781, "step": 48330 }, { "epoch": 0.30883048183688333, "grad_norm": 0.8804172277450562, "learning_rate": 9.423563278690397e-05, "loss": 1.0311, "step": 48340 }, { "epoch": 0.30889436898662204, "grad_norm": 1.6444405317306519, "learning_rate": 9.423329363652731e-05, "loss": 0.7865, "step": 48350 }, { "epoch": 0.30895825613636074, "grad_norm": 0.8753210306167603, "learning_rate": 9.423095404068312e-05, "loss": 1.0464, "step": 48360 }, { "epoch": 0.30902214328609945, "grad_norm": 0.8216173052787781, "learning_rate": 9.422861399939495e-05, "loss": 0.8443, "step": 48370 }, { "epoch": 0.30908603043583815, "grad_norm": 1.1134603023529053, "learning_rate": 9.422627351268638e-05, "loss": 1.1639, "step": 48380 }, { "epoch": 0.30914991758557686, "grad_norm": 0.8974233269691467, "learning_rate": 9.422393258058098e-05, "loss": 0.8378, "step": 48390 }, { "epoch": 0.3092138047353155, "grad_norm": 0.8469827175140381, "learning_rate": 9.422159120310232e-05, "loss": 0.8669, "step": 48400 }, { "epoch": 0.3092776918850542, "grad_norm": 0.701692521572113, "learning_rate": 9.421924938027397e-05, "loss": 0.9278, "step": 48410 }, { "epoch": 0.3093415790347929, "grad_norm": 0.7484457492828369, "learning_rate": 9.421690711211952e-05, "loss": 0.9045, "step": 48420 }, { "epoch": 0.30940546618453163, "grad_norm": 0.7951037883758545, "learning_rate": 9.421456439866257e-05, "loss": 0.7149, "step": 48430 }, { "epoch": 0.30946935333427034, "grad_norm": 0.6220124363899231, "learning_rate": 9.421222123992671e-05, "loss": 0.8828, "step": 48440 }, { "epoch": 0.30953324048400904, "grad_norm": 0.8480835556983948, "learning_rate": 9.420987763593554e-05, "loss": 0.8722, "step": 48450 }, { "epoch": 0.30959712763374775, "grad_norm": 0.8057517409324646, "learning_rate": 9.420753358671264e-05, "loss": 1.0229, "step": 48460 }, { "epoch": 0.30966101478348645, "grad_norm": 0.7954405546188354, "learning_rate": 9.420518909228164e-05, "loss": 0.7711, "step": 48470 }, { "epoch": 0.30972490193322516, "grad_norm": 1.0141898393630981, "learning_rate": 9.420284415266613e-05, "loss": 0.7272, "step": 48480 }, { "epoch": 0.30978878908296387, "grad_norm": 0.8430118560791016, "learning_rate": 9.420049876788974e-05, "loss": 0.9584, "step": 48490 }, { "epoch": 0.3098526762327026, "grad_norm": 0.703395426273346, "learning_rate": 9.419815293797611e-05, "loss": 0.7518, "step": 48500 }, { "epoch": 0.3099165633824413, "grad_norm": 0.6851431727409363, "learning_rate": 9.419580666294883e-05, "loss": 0.6678, "step": 48510 }, { "epoch": 0.30998045053217993, "grad_norm": 0.6793634295463562, "learning_rate": 9.419345994283153e-05, "loss": 0.902, "step": 48520 }, { "epoch": 0.31004433768191864, "grad_norm": 0.7479285597801208, "learning_rate": 9.419111277764788e-05, "loss": 0.8441, "step": 48530 }, { "epoch": 0.31010822483165734, "grad_norm": 1.3613170385360718, "learning_rate": 9.418876516742148e-05, "loss": 1.0158, "step": 48540 }, { "epoch": 0.31017211198139605, "grad_norm": 1.1104499101638794, "learning_rate": 9.418665193772571e-05, "loss": 1.0577, "step": 48550 }, { "epoch": 0.31023599913113475, "grad_norm": 1.6109135150909424, "learning_rate": 9.418430348198326e-05, "loss": 0.8694, "step": 48560 }, { "epoch": 0.31029988628087346, "grad_norm": 1.1885582208633423, "learning_rate": 9.418195458126664e-05, "loss": 0.8233, "step": 48570 }, { "epoch": 0.31036377343061217, "grad_norm": 1.3938848972320557, "learning_rate": 9.41796052355995e-05, "loss": 0.824, "step": 48580 }, { "epoch": 0.3104276605803509, "grad_norm": 0.72528076171875, "learning_rate": 9.417725544500552e-05, "loss": 0.912, "step": 48590 }, { "epoch": 0.3104915477300896, "grad_norm": 0.7403073310852051, "learning_rate": 9.417490520950838e-05, "loss": 0.8565, "step": 48600 }, { "epoch": 0.3105554348798283, "grad_norm": 1.263451337814331, "learning_rate": 9.417255452913171e-05, "loss": 0.9227, "step": 48610 }, { "epoch": 0.310619322029567, "grad_norm": 1.1956753730773926, "learning_rate": 9.417020340389922e-05, "loss": 0.9026, "step": 48620 }, { "epoch": 0.3106832091793057, "grad_norm": 0.985579252243042, "learning_rate": 9.416785183383454e-05, "loss": 0.9717, "step": 48630 }, { "epoch": 0.3107470963290444, "grad_norm": 1.1130268573760986, "learning_rate": 9.416549981896141e-05, "loss": 0.6567, "step": 48640 }, { "epoch": 0.31081098347878305, "grad_norm": 0.6500244736671448, "learning_rate": 9.416314735930347e-05, "loss": 1.0039, "step": 48650 }, { "epoch": 0.31087487062852176, "grad_norm": 1.0111130475997925, "learning_rate": 9.416079445488444e-05, "loss": 1.0116, "step": 48660 }, { "epoch": 0.31093875777826047, "grad_norm": 0.8683274984359741, "learning_rate": 9.4158441105728e-05, "loss": 0.9201, "step": 48670 }, { "epoch": 0.3110026449279992, "grad_norm": 0.9501914381980896, "learning_rate": 9.415608731185786e-05, "loss": 1.0651, "step": 48680 }, { "epoch": 0.3110665320777379, "grad_norm": 1.045398473739624, "learning_rate": 9.415373307329771e-05, "loss": 1.1339, "step": 48690 }, { "epoch": 0.3111304192274766, "grad_norm": 0.6909173727035522, "learning_rate": 9.415137839007127e-05, "loss": 0.8049, "step": 48700 }, { "epoch": 0.3111943063772153, "grad_norm": 0.929655909538269, "learning_rate": 9.414902326220225e-05, "loss": 0.8028, "step": 48710 }, { "epoch": 0.311258193526954, "grad_norm": 0.9743746519088745, "learning_rate": 9.414666768971438e-05, "loss": 0.8393, "step": 48720 }, { "epoch": 0.3113220806766927, "grad_norm": 0.9275550246238708, "learning_rate": 9.414431167263139e-05, "loss": 0.9987, "step": 48730 }, { "epoch": 0.3113859678264314, "grad_norm": 0.9921037554740906, "learning_rate": 9.414195521097697e-05, "loss": 0.87, "step": 48740 }, { "epoch": 0.3114498549761701, "grad_norm": 0.6033504605293274, "learning_rate": 9.413959830477488e-05, "loss": 1.0159, "step": 48750 }, { "epoch": 0.3115137421259088, "grad_norm": 0.709073543548584, "learning_rate": 9.413724095404884e-05, "loss": 0.8106, "step": 48760 }, { "epoch": 0.31157762927564747, "grad_norm": 0.8493859171867371, "learning_rate": 9.413488315882261e-05, "loss": 0.7047, "step": 48770 }, { "epoch": 0.3116415164253862, "grad_norm": 1.3952586650848389, "learning_rate": 9.413252491911993e-05, "loss": 0.8132, "step": 48780 }, { "epoch": 0.3117054035751249, "grad_norm": 1.9814708232879639, "learning_rate": 9.413016623496452e-05, "loss": 0.709, "step": 48790 }, { "epoch": 0.3117692907248636, "grad_norm": 0.6453489661216736, "learning_rate": 9.412780710638017e-05, "loss": 0.6557, "step": 48800 }, { "epoch": 0.3118331778746023, "grad_norm": 0.954566478729248, "learning_rate": 9.412544753339063e-05, "loss": 0.9015, "step": 48810 }, { "epoch": 0.311897065024341, "grad_norm": 1.347273588180542, "learning_rate": 9.412308751601967e-05, "loss": 0.7975, "step": 48820 }, { "epoch": 0.3119609521740797, "grad_norm": 1.4250802993774414, "learning_rate": 9.412072705429103e-05, "loss": 0.9073, "step": 48830 }, { "epoch": 0.3120248393238184, "grad_norm": 0.8338466286659241, "learning_rate": 9.41183661482285e-05, "loss": 1.0889, "step": 48840 }, { "epoch": 0.3120887264735571, "grad_norm": 0.8401825428009033, "learning_rate": 9.411600479785586e-05, "loss": 0.9772, "step": 48850 }, { "epoch": 0.3121526136232958, "grad_norm": 0.7773457169532776, "learning_rate": 9.411364300319688e-05, "loss": 0.6319, "step": 48860 }, { "epoch": 0.31221650077303453, "grad_norm": 0.6651679873466492, "learning_rate": 9.411128076427536e-05, "loss": 0.7987, "step": 48870 }, { "epoch": 0.31228038792277324, "grad_norm": 1.0372742414474487, "learning_rate": 9.410891808111508e-05, "loss": 0.8707, "step": 48880 }, { "epoch": 0.3123442750725119, "grad_norm": 1.1569018363952637, "learning_rate": 9.410655495373983e-05, "loss": 0.875, "step": 48890 }, { "epoch": 0.3124081622222506, "grad_norm": 0.7065810561180115, "learning_rate": 9.41041913821734e-05, "loss": 1.1122, "step": 48900 }, { "epoch": 0.3124720493719893, "grad_norm": 0.7592551112174988, "learning_rate": 9.410182736643964e-05, "loss": 0.9027, "step": 48910 }, { "epoch": 0.312535936521728, "grad_norm": 0.6045570969581604, "learning_rate": 9.40994629065623e-05, "loss": 1.0845, "step": 48920 }, { "epoch": 0.3125998236714667, "grad_norm": 1.0101828575134277, "learning_rate": 9.409709800256523e-05, "loss": 0.7681, "step": 48930 }, { "epoch": 0.3126637108212054, "grad_norm": 0.5947315692901611, "learning_rate": 9.409473265447224e-05, "loss": 0.8502, "step": 48940 }, { "epoch": 0.3127275979709441, "grad_norm": 0.9828523397445679, "learning_rate": 9.409236686230713e-05, "loss": 0.8704, "step": 48950 }, { "epoch": 0.31279148512068283, "grad_norm": 0.731641948223114, "learning_rate": 9.409000062609374e-05, "loss": 1.0281, "step": 48960 }, { "epoch": 0.31285537227042154, "grad_norm": 0.9199566841125488, "learning_rate": 9.408763394585592e-05, "loss": 0.95, "step": 48970 }, { "epoch": 0.31291925942016025, "grad_norm": 1.1518223285675049, "learning_rate": 9.408526682161746e-05, "loss": 0.8526, "step": 48980 }, { "epoch": 0.31298314656989895, "grad_norm": 0.8787213563919067, "learning_rate": 9.408289925340224e-05, "loss": 0.9255, "step": 48990 }, { "epoch": 0.31304703371963766, "grad_norm": 0.9983569979667664, "learning_rate": 9.408053124123408e-05, "loss": 1.1062, "step": 49000 }, { "epoch": 0.3131109208693763, "grad_norm": 1.392701268196106, "learning_rate": 9.407816278513683e-05, "loss": 0.6401, "step": 49010 }, { "epoch": 0.313174808019115, "grad_norm": 0.5991133451461792, "learning_rate": 9.407579388513434e-05, "loss": 0.8792, "step": 49020 }, { "epoch": 0.3132386951688537, "grad_norm": 0.7458668947219849, "learning_rate": 9.40734245412505e-05, "loss": 1.1298, "step": 49030 }, { "epoch": 0.3133025823185924, "grad_norm": 1.072922706604004, "learning_rate": 9.407105475350914e-05, "loss": 0.8286, "step": 49040 }, { "epoch": 0.31336646946833113, "grad_norm": 2.292825698852539, "learning_rate": 9.406868452193411e-05, "loss": 1.0391, "step": 49050 }, { "epoch": 0.31343035661806984, "grad_norm": 0.8459042906761169, "learning_rate": 9.406631384654934e-05, "loss": 0.8123, "step": 49060 }, { "epoch": 0.31349424376780854, "grad_norm": 1.0259894132614136, "learning_rate": 9.406394272737863e-05, "loss": 0.8378, "step": 49070 }, { "epoch": 0.31355813091754725, "grad_norm": 0.6724763512611389, "learning_rate": 9.406157116444592e-05, "loss": 0.7612, "step": 49080 }, { "epoch": 0.31362201806728596, "grad_norm": 0.6208413243293762, "learning_rate": 9.405919915777506e-05, "loss": 0.8256, "step": 49090 }, { "epoch": 0.31368590521702466, "grad_norm": 0.5845530033111572, "learning_rate": 9.405682670738995e-05, "loss": 0.8746, "step": 49100 }, { "epoch": 0.31374979236676337, "grad_norm": 1.0131624937057495, "learning_rate": 9.405445381331449e-05, "loss": 0.7601, "step": 49110 }, { "epoch": 0.3138136795165021, "grad_norm": 0.8902072310447693, "learning_rate": 9.405208047557255e-05, "loss": 1.0531, "step": 49120 }, { "epoch": 0.3138775666662407, "grad_norm": 1.196207880973816, "learning_rate": 9.404970669418804e-05, "loss": 1.023, "step": 49130 }, { "epoch": 0.31394145381597943, "grad_norm": 0.9805600643157959, "learning_rate": 9.404733246918489e-05, "loss": 0.7877, "step": 49140 }, { "epoch": 0.31400534096571814, "grad_norm": 0.7303978204727173, "learning_rate": 9.404495780058701e-05, "loss": 0.8739, "step": 49150 }, { "epoch": 0.31406922811545684, "grad_norm": 0.754725456237793, "learning_rate": 9.404258268841827e-05, "loss": 0.8963, "step": 49160 }, { "epoch": 0.31413311526519555, "grad_norm": 0.8432728052139282, "learning_rate": 9.404020713270265e-05, "loss": 0.8278, "step": 49170 }, { "epoch": 0.31419700241493426, "grad_norm": 1.7309197187423706, "learning_rate": 9.403783113346402e-05, "loss": 0.7055, "step": 49180 }, { "epoch": 0.31426088956467296, "grad_norm": 1.0485713481903076, "learning_rate": 9.403545469072636e-05, "loss": 1.0848, "step": 49190 }, { "epoch": 0.31432477671441167, "grad_norm": 1.0555837154388428, "learning_rate": 9.403307780451356e-05, "loss": 0.9272, "step": 49200 }, { "epoch": 0.3143886638641504, "grad_norm": 0.642327070236206, "learning_rate": 9.403070047484957e-05, "loss": 1.0311, "step": 49210 }, { "epoch": 0.3144525510138891, "grad_norm": 1.0337790250778198, "learning_rate": 9.402832270175833e-05, "loss": 0.6988, "step": 49220 }, { "epoch": 0.3145164381636278, "grad_norm": 0.6804295182228088, "learning_rate": 9.40259444852638e-05, "loss": 0.9084, "step": 49230 }, { "epoch": 0.3145803253133665, "grad_norm": 0.9639153480529785, "learning_rate": 9.402356582538991e-05, "loss": 0.9531, "step": 49240 }, { "epoch": 0.31464421246310514, "grad_norm": 1.2512791156768799, "learning_rate": 9.402118672216064e-05, "loss": 0.8388, "step": 49250 }, { "epoch": 0.31470809961284385, "grad_norm": 1.065996527671814, "learning_rate": 9.401880717559993e-05, "loss": 1.0074, "step": 49260 }, { "epoch": 0.31477198676258256, "grad_norm": 0.7025091052055359, "learning_rate": 9.401642718573175e-05, "loss": 0.9072, "step": 49270 }, { "epoch": 0.31483587391232126, "grad_norm": 0.6397770047187805, "learning_rate": 9.401404675258006e-05, "loss": 0.9634, "step": 49280 }, { "epoch": 0.31489976106205997, "grad_norm": 0.938177764415741, "learning_rate": 9.401166587616885e-05, "loss": 1.015, "step": 49290 }, { "epoch": 0.3149636482117987, "grad_norm": 0.6089216470718384, "learning_rate": 9.400928455652209e-05, "loss": 0.8248, "step": 49300 }, { "epoch": 0.3150275353615374, "grad_norm": 0.5597997307777405, "learning_rate": 9.400690279366377e-05, "loss": 0.7582, "step": 49310 }, { "epoch": 0.3150914225112761, "grad_norm": 1.5323448181152344, "learning_rate": 9.400452058761784e-05, "loss": 0.8959, "step": 49320 }, { "epoch": 0.3151553096610148, "grad_norm": 0.6819522976875305, "learning_rate": 9.400213793840835e-05, "loss": 0.8379, "step": 49330 }, { "epoch": 0.3152191968107535, "grad_norm": 1.2610814571380615, "learning_rate": 9.399975484605925e-05, "loss": 1.0078, "step": 49340 }, { "epoch": 0.3152830839604922, "grad_norm": 0.9983810782432556, "learning_rate": 9.399737131059454e-05, "loss": 0.9125, "step": 49350 }, { "epoch": 0.3153469711102309, "grad_norm": 0.8259413838386536, "learning_rate": 9.399498733203827e-05, "loss": 0.921, "step": 49360 }, { "epoch": 0.31541085825996956, "grad_norm": 1.0133284330368042, "learning_rate": 9.399260291041439e-05, "loss": 0.9468, "step": 49370 }, { "epoch": 0.31547474540970827, "grad_norm": 0.9398267269134521, "learning_rate": 9.399021804574694e-05, "loss": 0.8747, "step": 49380 }, { "epoch": 0.315538632559447, "grad_norm": 0.7082047462463379, "learning_rate": 9.398783273805995e-05, "loss": 0.8867, "step": 49390 }, { "epoch": 0.3156025197091857, "grad_norm": 0.8589842915534973, "learning_rate": 9.398544698737743e-05, "loss": 0.713, "step": 49400 }, { "epoch": 0.3156664068589244, "grad_norm": 1.1471587419509888, "learning_rate": 9.398306079372339e-05, "loss": 0.9788, "step": 49410 }, { "epoch": 0.3157302940086631, "grad_norm": 0.49813616275787354, "learning_rate": 9.398067415712188e-05, "loss": 0.7366, "step": 49420 }, { "epoch": 0.3157941811584018, "grad_norm": 1.774720311164856, "learning_rate": 9.397828707759695e-05, "loss": 0.685, "step": 49430 }, { "epoch": 0.3158580683081405, "grad_norm": 1.121861219406128, "learning_rate": 9.397589955517261e-05, "loss": 0.9251, "step": 49440 }, { "epoch": 0.3159219554578792, "grad_norm": 0.8228227496147156, "learning_rate": 9.397351158987293e-05, "loss": 0.9838, "step": 49450 }, { "epoch": 0.3159858426076179, "grad_norm": 0.9918887615203857, "learning_rate": 9.397112318172192e-05, "loss": 0.8684, "step": 49460 }, { "epoch": 0.3160497297573566, "grad_norm": 0.8907731175422668, "learning_rate": 9.396873433074367e-05, "loss": 0.8603, "step": 49470 }, { "epoch": 0.31611361690709533, "grad_norm": 0.9690269827842712, "learning_rate": 9.396634503696225e-05, "loss": 0.931, "step": 49480 }, { "epoch": 0.31617750405683404, "grad_norm": 0.7284924983978271, "learning_rate": 9.396395530040167e-05, "loss": 0.8224, "step": 49490 }, { "epoch": 0.3162413912065727, "grad_norm": 0.8061665296554565, "learning_rate": 9.396156512108603e-05, "loss": 0.8418, "step": 49500 }, { "epoch": 0.3163052783563114, "grad_norm": 0.7680371999740601, "learning_rate": 9.39591744990394e-05, "loss": 0.5664, "step": 49510 }, { "epoch": 0.3163691655060501, "grad_norm": 0.8830310702323914, "learning_rate": 9.395678343428586e-05, "loss": 0.7123, "step": 49520 }, { "epoch": 0.3164330526557888, "grad_norm": 0.9915767908096313, "learning_rate": 9.395439192684947e-05, "loss": 0.9406, "step": 49530 }, { "epoch": 0.3164969398055275, "grad_norm": 1.3470020294189453, "learning_rate": 9.395199997675435e-05, "loss": 0.8661, "step": 49540 }, { "epoch": 0.3165608269552662, "grad_norm": 0.934509813785553, "learning_rate": 9.394960758402455e-05, "loss": 1.0339, "step": 49550 }, { "epoch": 0.3166247141050049, "grad_norm": 0.5762020349502563, "learning_rate": 9.394721474868418e-05, "loss": 0.9045, "step": 49560 }, { "epoch": 0.31668860125474363, "grad_norm": 0.8978357315063477, "learning_rate": 9.394482147075734e-05, "loss": 0.8767, "step": 49570 }, { "epoch": 0.31675248840448234, "grad_norm": 0.7672891020774841, "learning_rate": 9.394242775026812e-05, "loss": 0.9131, "step": 49580 }, { "epoch": 0.31681637555422104, "grad_norm": 0.9509188532829285, "learning_rate": 9.394003358724067e-05, "loss": 0.9731, "step": 49590 }, { "epoch": 0.31688026270395975, "grad_norm": 0.6911554336547852, "learning_rate": 9.393763898169903e-05, "loss": 0.7586, "step": 49600 }, { "epoch": 0.31694414985369845, "grad_norm": 0.9574739933013916, "learning_rate": 9.393524393366737e-05, "loss": 0.9106, "step": 49610 }, { "epoch": 0.3170080370034371, "grad_norm": 0.7374638915061951, "learning_rate": 9.393284844316979e-05, "loss": 0.7652, "step": 49620 }, { "epoch": 0.3170719241531758, "grad_norm": 0.971748948097229, "learning_rate": 9.393045251023042e-05, "loss": 0.9203, "step": 49630 }, { "epoch": 0.3171358113029145, "grad_norm": 0.8712042570114136, "learning_rate": 9.392805613487339e-05, "loss": 0.6788, "step": 49640 }, { "epoch": 0.3171996984526532, "grad_norm": 0.782733142375946, "learning_rate": 9.392565931712282e-05, "loss": 0.936, "step": 49650 }, { "epoch": 0.31726358560239193, "grad_norm": 0.6605043411254883, "learning_rate": 9.392326205700288e-05, "loss": 0.8659, "step": 49660 }, { "epoch": 0.31732747275213063, "grad_norm": 0.831889808177948, "learning_rate": 9.392086435453769e-05, "loss": 0.8596, "step": 49670 }, { "epoch": 0.31739135990186934, "grad_norm": 0.6261256337165833, "learning_rate": 9.391846620975139e-05, "loss": 0.9768, "step": 49680 }, { "epoch": 0.31745524705160805, "grad_norm": 0.8194597959518433, "learning_rate": 9.391606762266814e-05, "loss": 0.7694, "step": 49690 }, { "epoch": 0.31751913420134675, "grad_norm": 0.9790688157081604, "learning_rate": 9.39136685933121e-05, "loss": 1.0057, "step": 49700 }, { "epoch": 0.31758302135108546, "grad_norm": 0.8990379571914673, "learning_rate": 9.391126912170742e-05, "loss": 0.9249, "step": 49710 }, { "epoch": 0.31764690850082417, "grad_norm": 0.9338327050209045, "learning_rate": 9.390886920787828e-05, "loss": 0.99, "step": 49720 }, { "epoch": 0.31771079565056287, "grad_norm": 0.7837555408477783, "learning_rate": 9.390646885184884e-05, "loss": 0.966, "step": 49730 }, { "epoch": 0.3177746828003015, "grad_norm": 0.7973876595497131, "learning_rate": 9.390406805364327e-05, "loss": 0.9153, "step": 49740 }, { "epoch": 0.31783856995004023, "grad_norm": 0.8525822758674622, "learning_rate": 9.390166681328575e-05, "loss": 0.8138, "step": 49750 }, { "epoch": 0.31790245709977893, "grad_norm": 1.3615291118621826, "learning_rate": 9.389926513080047e-05, "loss": 0.887, "step": 49760 }, { "epoch": 0.31796634424951764, "grad_norm": 1.1276623010635376, "learning_rate": 9.389686300621162e-05, "loss": 0.9642, "step": 49770 }, { "epoch": 0.31803023139925635, "grad_norm": 0.7759473323822021, "learning_rate": 9.389446043954336e-05, "loss": 0.7954, "step": 49780 }, { "epoch": 0.31809411854899505, "grad_norm": 0.9552074074745178, "learning_rate": 9.389205743081992e-05, "loss": 0.8348, "step": 49790 }, { "epoch": 0.31815800569873376, "grad_norm": 0.6165658831596375, "learning_rate": 9.388965398006549e-05, "loss": 0.9181, "step": 49800 }, { "epoch": 0.31822189284847247, "grad_norm": 0.7126082181930542, "learning_rate": 9.388725008730428e-05, "loss": 1.1795, "step": 49810 }, { "epoch": 0.31828577999821117, "grad_norm": 0.8872388005256653, "learning_rate": 9.388484575256049e-05, "loss": 0.9206, "step": 49820 }, { "epoch": 0.3183496671479499, "grad_norm": 0.8513489365577698, "learning_rate": 9.388244097585835e-05, "loss": 1.0587, "step": 49830 }, { "epoch": 0.3184135542976886, "grad_norm": 0.6428020596504211, "learning_rate": 9.388003575722204e-05, "loss": 0.7731, "step": 49840 }, { "epoch": 0.3184774414474273, "grad_norm": 1.0599162578582764, "learning_rate": 9.387763009667583e-05, "loss": 0.8681, "step": 49850 }, { "epoch": 0.31854132859716594, "grad_norm": 0.6444383859634399, "learning_rate": 9.387522399424391e-05, "loss": 0.79, "step": 49860 }, { "epoch": 0.31860521574690465, "grad_norm": 0.6530598998069763, "learning_rate": 9.387281744995053e-05, "loss": 1.1558, "step": 49870 }, { "epoch": 0.31866910289664335, "grad_norm": 0.7804102897644043, "learning_rate": 9.387041046381994e-05, "loss": 0.9238, "step": 49880 }, { "epoch": 0.31873299004638206, "grad_norm": 0.8872278332710266, "learning_rate": 9.386800303587635e-05, "loss": 0.7599, "step": 49890 }, { "epoch": 0.31879687719612076, "grad_norm": 0.5427918434143066, "learning_rate": 9.386559516614401e-05, "loss": 0.8869, "step": 49900 }, { "epoch": 0.31886076434585947, "grad_norm": 0.8014676570892334, "learning_rate": 9.386318685464719e-05, "loss": 0.885, "step": 49910 }, { "epoch": 0.3189246514955982, "grad_norm": 0.7238976359367371, "learning_rate": 9.386077810141013e-05, "loss": 0.8402, "step": 49920 }, { "epoch": 0.3189885386453369, "grad_norm": 1.0209499597549438, "learning_rate": 9.385836890645708e-05, "loss": 0.7001, "step": 49930 }, { "epoch": 0.3190524257950756, "grad_norm": 0.978877067565918, "learning_rate": 9.385595926981232e-05, "loss": 0.8989, "step": 49940 }, { "epoch": 0.3191163129448143, "grad_norm": 1.336305856704712, "learning_rate": 9.385354919150011e-05, "loss": 1.0186, "step": 49950 }, { "epoch": 0.319180200094553, "grad_norm": 0.9015640616416931, "learning_rate": 9.385113867154473e-05, "loss": 0.9387, "step": 49960 }, { "epoch": 0.3192440872442917, "grad_norm": 1.013190507888794, "learning_rate": 9.384872770997043e-05, "loss": 1.1722, "step": 49970 }, { "epoch": 0.31930797439403036, "grad_norm": 1.667493224143982, "learning_rate": 9.384631630680152e-05, "loss": 0.9188, "step": 49980 }, { "epoch": 0.31937186154376906, "grad_norm": 0.7360647320747375, "learning_rate": 9.384390446206226e-05, "loss": 0.8126, "step": 49990 }, { "epoch": 0.31943574869350777, "grad_norm": 1.0479674339294434, "learning_rate": 9.384149217577695e-05, "loss": 0.8874, "step": 50000 }, { "epoch": 0.3194996358432465, "grad_norm": 0.7030632495880127, "learning_rate": 9.38390794479699e-05, "loss": 1.0218, "step": 50010 }, { "epoch": 0.3195635229929852, "grad_norm": 0.8451624512672424, "learning_rate": 9.383666627866539e-05, "loss": 0.8403, "step": 50020 }, { "epoch": 0.3196274101427239, "grad_norm": 0.5618483424186707, "learning_rate": 9.383425266788772e-05, "loss": 0.7409, "step": 50030 }, { "epoch": 0.3196912972924626, "grad_norm": 0.7848330736160278, "learning_rate": 9.383183861566121e-05, "loss": 0.9107, "step": 50040 }, { "epoch": 0.3197551844422013, "grad_norm": 0.7237081527709961, "learning_rate": 9.382942412201016e-05, "loss": 0.6871, "step": 50050 }, { "epoch": 0.31981907159194, "grad_norm": 1.093504786491394, "learning_rate": 9.382700918695889e-05, "loss": 0.8252, "step": 50060 }, { "epoch": 0.3198829587416787, "grad_norm": 0.733311653137207, "learning_rate": 9.382459381053173e-05, "loss": 0.7627, "step": 50070 }, { "epoch": 0.3199468458914174, "grad_norm": 0.8467728495597839, "learning_rate": 9.3822177992753e-05, "loss": 0.8656, "step": 50080 }, { "epoch": 0.3200107330411561, "grad_norm": 1.3215608596801758, "learning_rate": 9.381976173364702e-05, "loss": 0.7438, "step": 50090 }, { "epoch": 0.3200746201908948, "grad_norm": 0.7115028500556946, "learning_rate": 9.381734503323812e-05, "loss": 1.1696, "step": 50100 }, { "epoch": 0.3201385073406335, "grad_norm": 0.7195971608161926, "learning_rate": 9.381492789155066e-05, "loss": 0.7597, "step": 50110 }, { "epoch": 0.3202023944903722, "grad_norm": 0.7704067230224609, "learning_rate": 9.381251030860896e-05, "loss": 0.8963, "step": 50120 }, { "epoch": 0.3202662816401109, "grad_norm": 0.7331963181495667, "learning_rate": 9.381009228443737e-05, "loss": 0.7832, "step": 50130 }, { "epoch": 0.3203301687898496, "grad_norm": 0.8845292329788208, "learning_rate": 9.380767381906029e-05, "loss": 0.8378, "step": 50140 }, { "epoch": 0.3203940559395883, "grad_norm": 0.714884340763092, "learning_rate": 9.380525491250201e-05, "loss": 0.9028, "step": 50150 }, { "epoch": 0.320457943089327, "grad_norm": 0.6993659138679504, "learning_rate": 9.380283556478691e-05, "loss": 1.1563, "step": 50160 }, { "epoch": 0.3205218302390657, "grad_norm": 1.0421202182769775, "learning_rate": 9.380041577593937e-05, "loss": 0.9022, "step": 50170 }, { "epoch": 0.3205857173888044, "grad_norm": 1.632599115371704, "learning_rate": 9.379799554598374e-05, "loss": 0.8848, "step": 50180 }, { "epoch": 0.32064960453854313, "grad_norm": 0.9513861536979675, "learning_rate": 9.379557487494442e-05, "loss": 1.0339, "step": 50190 }, { "epoch": 0.32071349168828184, "grad_norm": 0.803616464138031, "learning_rate": 9.379315376284576e-05, "loss": 0.852, "step": 50200 }, { "epoch": 0.32077737883802054, "grad_norm": 0.938457727432251, "learning_rate": 9.379073220971215e-05, "loss": 0.9651, "step": 50210 }, { "epoch": 0.3208412659877592, "grad_norm": 1.7576130628585815, "learning_rate": 9.3788310215568e-05, "loss": 0.8734, "step": 50220 }, { "epoch": 0.3209051531374979, "grad_norm": 0.8921381831169128, "learning_rate": 9.378588778043766e-05, "loss": 0.9362, "step": 50230 }, { "epoch": 0.3209690402872366, "grad_norm": 0.6942434906959534, "learning_rate": 9.378346490434558e-05, "loss": 0.8118, "step": 50240 }, { "epoch": 0.3210329274369753, "grad_norm": 0.9267338514328003, "learning_rate": 9.378104158731611e-05, "loss": 0.8815, "step": 50250 }, { "epoch": 0.321096814586714, "grad_norm": 0.8614099621772766, "learning_rate": 9.377861782937369e-05, "loss": 0.8871, "step": 50260 }, { "epoch": 0.3211607017364527, "grad_norm": 0.944468080997467, "learning_rate": 9.37761936305427e-05, "loss": 1.0129, "step": 50270 }, { "epoch": 0.32122458888619143, "grad_norm": 1.3810163736343384, "learning_rate": 9.377376899084757e-05, "loss": 0.6677, "step": 50280 }, { "epoch": 0.32128847603593014, "grad_norm": 0.8450731039047241, "learning_rate": 9.377134391031272e-05, "loss": 1.0518, "step": 50290 }, { "epoch": 0.32135236318566884, "grad_norm": 1.011494755744934, "learning_rate": 9.376891838896258e-05, "loss": 0.7787, "step": 50300 }, { "epoch": 0.32141625033540755, "grad_norm": 0.8098331093788147, "learning_rate": 9.376649242682154e-05, "loss": 0.9035, "step": 50310 }, { "epoch": 0.32148013748514626, "grad_norm": 0.7809407711029053, "learning_rate": 9.376406602391407e-05, "loss": 0.8507, "step": 50320 }, { "epoch": 0.32154402463488496, "grad_norm": 1.0858904123306274, "learning_rate": 9.376163918026461e-05, "loss": 1.109, "step": 50330 }, { "epoch": 0.32160791178462367, "grad_norm": 1.2475218772888184, "learning_rate": 9.375921189589756e-05, "loss": 0.7929, "step": 50340 }, { "epoch": 0.3216717989343623, "grad_norm": 0.9990367293357849, "learning_rate": 9.375678417083741e-05, "loss": 0.9663, "step": 50350 }, { "epoch": 0.321735686084101, "grad_norm": 0.7379415035247803, "learning_rate": 9.375435600510858e-05, "loss": 0.8639, "step": 50360 }, { "epoch": 0.32179957323383973, "grad_norm": 0.7513096928596497, "learning_rate": 9.375192739873553e-05, "loss": 0.9025, "step": 50370 }, { "epoch": 0.32186346038357844, "grad_norm": 1.1147832870483398, "learning_rate": 9.374949835174273e-05, "loss": 1.0522, "step": 50380 }, { "epoch": 0.32192734753331714, "grad_norm": 1.0712778568267822, "learning_rate": 9.374706886415462e-05, "loss": 0.8807, "step": 50390 }, { "epoch": 0.32199123468305585, "grad_norm": 1.0778201818466187, "learning_rate": 9.374463893599568e-05, "loss": 1.0531, "step": 50400 }, { "epoch": 0.32205512183279456, "grad_norm": 0.7178623676300049, "learning_rate": 9.374220856729039e-05, "loss": 0.7271, "step": 50410 }, { "epoch": 0.32211900898253326, "grad_norm": 0.9530071020126343, "learning_rate": 9.373977775806321e-05, "loss": 0.7916, "step": 50420 }, { "epoch": 0.32218289613227197, "grad_norm": 0.8260478377342224, "learning_rate": 9.373734650833862e-05, "loss": 0.9391, "step": 50430 }, { "epoch": 0.3222467832820107, "grad_norm": 0.9254810810089111, "learning_rate": 9.373491481814114e-05, "loss": 1.0402, "step": 50440 }, { "epoch": 0.3223106704317494, "grad_norm": 1.080959439277649, "learning_rate": 9.373248268749521e-05, "loss": 0.8513, "step": 50450 }, { "epoch": 0.3223745575814881, "grad_norm": 1.447561264038086, "learning_rate": 9.373005011642534e-05, "loss": 0.8074, "step": 50460 }, { "epoch": 0.32243844473122674, "grad_norm": 0.7180037498474121, "learning_rate": 9.372761710495605e-05, "loss": 0.9142, "step": 50470 }, { "epoch": 0.32250233188096544, "grad_norm": 0.8241226077079773, "learning_rate": 9.372518365311183e-05, "loss": 0.7491, "step": 50480 }, { "epoch": 0.32256621903070415, "grad_norm": 0.647534966468811, "learning_rate": 9.372274976091718e-05, "loss": 0.988, "step": 50490 }, { "epoch": 0.32263010618044285, "grad_norm": 0.953140139579773, "learning_rate": 9.372031542839658e-05, "loss": 1.0637, "step": 50500 }, { "epoch": 0.32269399333018156, "grad_norm": 1.1263874769210815, "learning_rate": 9.371788065557463e-05, "loss": 0.8773, "step": 50510 }, { "epoch": 0.32275788047992027, "grad_norm": 0.9044056534767151, "learning_rate": 9.371544544247577e-05, "loss": 0.8032, "step": 50520 }, { "epoch": 0.322821767629659, "grad_norm": 0.7417857646942139, "learning_rate": 9.371300978912456e-05, "loss": 0.8008, "step": 50530 }, { "epoch": 0.3228856547793977, "grad_norm": 0.6616377830505371, "learning_rate": 9.371057369554552e-05, "loss": 0.9739, "step": 50540 }, { "epoch": 0.3229495419291364, "grad_norm": 0.9354878664016724, "learning_rate": 9.370813716176321e-05, "loss": 0.9562, "step": 50550 }, { "epoch": 0.3230134290788751, "grad_norm": 0.9551581740379333, "learning_rate": 9.370570018780213e-05, "loss": 0.9345, "step": 50560 }, { "epoch": 0.3230773162286138, "grad_norm": 0.7437955737113953, "learning_rate": 9.370326277368684e-05, "loss": 0.8398, "step": 50570 }, { "epoch": 0.3231412033783525, "grad_norm": 0.7190837264060974, "learning_rate": 9.370082491944188e-05, "loss": 0.978, "step": 50580 }, { "epoch": 0.32320509052809115, "grad_norm": 1.749056100845337, "learning_rate": 9.36983866250918e-05, "loss": 1.241, "step": 50590 }, { "epoch": 0.32326897767782986, "grad_norm": 0.8506900072097778, "learning_rate": 9.369594789066119e-05, "loss": 1.1666, "step": 50600 }, { "epoch": 0.32333286482756857, "grad_norm": 0.8432791829109192, "learning_rate": 9.369350871617454e-05, "loss": 1.1055, "step": 50610 }, { "epoch": 0.3233967519773073, "grad_norm": 0.8245983123779297, "learning_rate": 9.36910691016565e-05, "loss": 0.8282, "step": 50620 }, { "epoch": 0.323460639127046, "grad_norm": 0.7552667856216431, "learning_rate": 9.368862904713158e-05, "loss": 0.8247, "step": 50630 }, { "epoch": 0.3235245262767847, "grad_norm": 1.2741241455078125, "learning_rate": 9.368618855262437e-05, "loss": 0.8029, "step": 50640 }, { "epoch": 0.3235884134265234, "grad_norm": 0.47164657711982727, "learning_rate": 9.368374761815943e-05, "loss": 1.1075, "step": 50650 }, { "epoch": 0.3236523005762621, "grad_norm": 0.9931887984275818, "learning_rate": 9.368130624376139e-05, "loss": 0.7937, "step": 50660 }, { "epoch": 0.3237161877260008, "grad_norm": 0.6441866755485535, "learning_rate": 9.36788644294548e-05, "loss": 0.748, "step": 50670 }, { "epoch": 0.3237800748757395, "grad_norm": 1.0310367345809937, "learning_rate": 9.367642217526423e-05, "loss": 0.7168, "step": 50680 }, { "epoch": 0.3238439620254782, "grad_norm": 1.0950703620910645, "learning_rate": 9.367397948121433e-05, "loss": 0.722, "step": 50690 }, { "epoch": 0.3239078491752169, "grad_norm": 0.7349279522895813, "learning_rate": 9.367153634732966e-05, "loss": 0.9114, "step": 50700 }, { "epoch": 0.3239717363249556, "grad_norm": 0.6076371073722839, "learning_rate": 9.366909277363484e-05, "loss": 0.9256, "step": 50710 }, { "epoch": 0.3240356234746943, "grad_norm": 0.8350464105606079, "learning_rate": 9.366664876015448e-05, "loss": 0.7421, "step": 50720 }, { "epoch": 0.324099510624433, "grad_norm": 0.6974174976348877, "learning_rate": 9.36642043069132e-05, "loss": 0.9282, "step": 50730 }, { "epoch": 0.3241633977741717, "grad_norm": 0.8667670488357544, "learning_rate": 9.36617594139356e-05, "loss": 1.0113, "step": 50740 }, { "epoch": 0.3242272849239104, "grad_norm": 0.8927708864212036, "learning_rate": 9.365931408124631e-05, "loss": 1.0419, "step": 50750 }, { "epoch": 0.3242911720736491, "grad_norm": 0.6961429119110107, "learning_rate": 9.365686830886995e-05, "loss": 0.9287, "step": 50760 }, { "epoch": 0.3243550592233878, "grad_norm": 0.638645350933075, "learning_rate": 9.365442209683116e-05, "loss": 0.8599, "step": 50770 }, { "epoch": 0.3244189463731265, "grad_norm": 1.2358146905899048, "learning_rate": 9.365197544515456e-05, "loss": 0.7256, "step": 50780 }, { "epoch": 0.3244828335228652, "grad_norm": 0.6738364696502686, "learning_rate": 9.364952835386482e-05, "loss": 1.0181, "step": 50790 }, { "epoch": 0.32454672067260393, "grad_norm": 1.1015185117721558, "learning_rate": 9.364708082298656e-05, "loss": 0.6716, "step": 50800 }, { "epoch": 0.32461060782234263, "grad_norm": 0.45196080207824707, "learning_rate": 9.364463285254446e-05, "loss": 0.8246, "step": 50810 }, { "epoch": 0.32467449497208134, "grad_norm": 0.5970223546028137, "learning_rate": 9.364218444256312e-05, "loss": 1.08, "step": 50820 }, { "epoch": 0.32473838212182, "grad_norm": 0.6557901501655579, "learning_rate": 9.363973559306724e-05, "loss": 0.8581, "step": 50830 }, { "epoch": 0.3248022692715587, "grad_norm": 1.3220319747924805, "learning_rate": 9.363728630408146e-05, "loss": 0.8748, "step": 50840 }, { "epoch": 0.3248661564212974, "grad_norm": 0.6746674180030823, "learning_rate": 9.363483657563046e-05, "loss": 0.9018, "step": 50850 }, { "epoch": 0.3249300435710361, "grad_norm": 0.8742882013320923, "learning_rate": 9.363238640773891e-05, "loss": 0.8107, "step": 50860 }, { "epoch": 0.3249939307207748, "grad_norm": 0.8248798847198486, "learning_rate": 9.362993580043148e-05, "loss": 0.9382, "step": 50870 }, { "epoch": 0.3250578178705135, "grad_norm": 0.7204467058181763, "learning_rate": 9.362748475373284e-05, "loss": 1.0355, "step": 50880 }, { "epoch": 0.3251217050202522, "grad_norm": 2.2558605670928955, "learning_rate": 9.36250332676677e-05, "loss": 1.1596, "step": 50890 }, { "epoch": 0.32518559216999093, "grad_norm": 0.6045457124710083, "learning_rate": 9.362258134226074e-05, "loss": 0.905, "step": 50900 }, { "epoch": 0.32524947931972964, "grad_norm": 0.6380605101585388, "learning_rate": 9.362012897753662e-05, "loss": 0.969, "step": 50910 }, { "epoch": 0.32531336646946835, "grad_norm": 1.1410925388336182, "learning_rate": 9.361767617352008e-05, "loss": 0.7254, "step": 50920 }, { "epoch": 0.32537725361920705, "grad_norm": 1.1019296646118164, "learning_rate": 9.361522293023581e-05, "loss": 0.9216, "step": 50930 }, { "epoch": 0.32544114076894576, "grad_norm": 1.174682378768921, "learning_rate": 9.361276924770853e-05, "loss": 0.8368, "step": 50940 }, { "epoch": 0.3255050279186844, "grad_norm": 0.5832127928733826, "learning_rate": 9.36103151259629e-05, "loss": 0.7741, "step": 50950 }, { "epoch": 0.3255689150684231, "grad_norm": 0.9537053108215332, "learning_rate": 9.360786056502367e-05, "loss": 0.9866, "step": 50960 }, { "epoch": 0.3256328022181618, "grad_norm": 1.0229930877685547, "learning_rate": 9.360540556491558e-05, "loss": 0.8135, "step": 50970 }, { "epoch": 0.3256966893679005, "grad_norm": 1.187549352645874, "learning_rate": 9.360295012566332e-05, "loss": 0.9386, "step": 50980 }, { "epoch": 0.32576057651763923, "grad_norm": 1.09153151512146, "learning_rate": 9.360049424729162e-05, "loss": 0.8265, "step": 50990 }, { "epoch": 0.32582446366737794, "grad_norm": 0.7309248447418213, "learning_rate": 9.359803792982525e-05, "loss": 0.8341, "step": 51000 }, { "epoch": 0.32588835081711665, "grad_norm": 0.45413196086883545, "learning_rate": 9.359558117328891e-05, "loss": 0.9438, "step": 51010 }, { "epoch": 0.32595223796685535, "grad_norm": 0.8382761478424072, "learning_rate": 9.359312397770733e-05, "loss": 0.8004, "step": 51020 }, { "epoch": 0.32601612511659406, "grad_norm": 0.8672879338264465, "learning_rate": 9.359066634310529e-05, "loss": 0.9812, "step": 51030 }, { "epoch": 0.32608001226633276, "grad_norm": 1.0492173433303833, "learning_rate": 9.358820826950754e-05, "loss": 0.6533, "step": 51040 }, { "epoch": 0.32614389941607147, "grad_norm": 0.9843623042106628, "learning_rate": 9.358574975693882e-05, "loss": 0.8744, "step": 51050 }, { "epoch": 0.3262077865658102, "grad_norm": 0.9067795872688293, "learning_rate": 9.358329080542389e-05, "loss": 0.8468, "step": 51060 }, { "epoch": 0.3262716737155489, "grad_norm": 0.6147485971450806, "learning_rate": 9.358083141498751e-05, "loss": 0.9103, "step": 51070 }, { "epoch": 0.32633556086528753, "grad_norm": 0.8605659008026123, "learning_rate": 9.357837158565446e-05, "loss": 0.7366, "step": 51080 }, { "epoch": 0.32639944801502624, "grad_norm": 0.6923385858535767, "learning_rate": 9.357591131744952e-05, "loss": 1.0148, "step": 51090 }, { "epoch": 0.32646333516476495, "grad_norm": 0.8151227235794067, "learning_rate": 9.357345061039745e-05, "loss": 0.9406, "step": 51100 }, { "epoch": 0.32652722231450365, "grad_norm": 0.7649495005607605, "learning_rate": 9.357098946452301e-05, "loss": 0.8339, "step": 51110 }, { "epoch": 0.32659110946424236, "grad_norm": 1.2730705738067627, "learning_rate": 9.356852787985105e-05, "loss": 1.0306, "step": 51120 }, { "epoch": 0.32665499661398106, "grad_norm": 0.6461998820304871, "learning_rate": 9.35660658564063e-05, "loss": 0.7584, "step": 51130 }, { "epoch": 0.32671888376371977, "grad_norm": 0.999034583568573, "learning_rate": 9.356360339421357e-05, "loss": 0.9405, "step": 51140 }, { "epoch": 0.3267827709134585, "grad_norm": 1.0120235681533813, "learning_rate": 9.356114049329767e-05, "loss": 0.8825, "step": 51150 }, { "epoch": 0.3268466580631972, "grad_norm": 0.9906069040298462, "learning_rate": 9.35586771536834e-05, "loss": 0.8241, "step": 51160 }, { "epoch": 0.3269105452129359, "grad_norm": 0.6129631996154785, "learning_rate": 9.355621337539558e-05, "loss": 0.8977, "step": 51170 }, { "epoch": 0.3269744323626746, "grad_norm": 0.9414940476417542, "learning_rate": 9.3553749158459e-05, "loss": 0.9349, "step": 51180 }, { "epoch": 0.3270383195124133, "grad_norm": 0.7236936092376709, "learning_rate": 9.35512845028985e-05, "loss": 0.8081, "step": 51190 }, { "epoch": 0.32710220666215195, "grad_norm": 1.0342822074890137, "learning_rate": 9.354881940873888e-05, "loss": 0.8802, "step": 51200 }, { "epoch": 0.32716609381189066, "grad_norm": 0.7799363136291504, "learning_rate": 9.354635387600497e-05, "loss": 0.8126, "step": 51210 }, { "epoch": 0.32722998096162936, "grad_norm": 0.5983444452285767, "learning_rate": 9.35438879047216e-05, "loss": 0.909, "step": 51220 }, { "epoch": 0.32729386811136807, "grad_norm": 0.9435120820999146, "learning_rate": 9.35414214949136e-05, "loss": 0.8125, "step": 51230 }, { "epoch": 0.3273577552611068, "grad_norm": 0.7243615388870239, "learning_rate": 9.353895464660585e-05, "loss": 0.9006, "step": 51240 }, { "epoch": 0.3274216424108455, "grad_norm": 0.7836112976074219, "learning_rate": 9.353648735982312e-05, "loss": 0.8456, "step": 51250 }, { "epoch": 0.3274855295605842, "grad_norm": 0.8060475587844849, "learning_rate": 9.353401963459032e-05, "loss": 0.8903, "step": 51260 }, { "epoch": 0.3275494167103229, "grad_norm": 0.49010974168777466, "learning_rate": 9.353155147093228e-05, "loss": 0.7631, "step": 51270 }, { "epoch": 0.3276133038600616, "grad_norm": 1.089375615119934, "learning_rate": 9.352908286887385e-05, "loss": 0.8264, "step": 51280 }, { "epoch": 0.3276771910098003, "grad_norm": 0.7374083399772644, "learning_rate": 9.35266138284399e-05, "loss": 0.7873, "step": 51290 }, { "epoch": 0.327741078159539, "grad_norm": 0.8920226693153381, "learning_rate": 9.352414434965531e-05, "loss": 0.8032, "step": 51300 }, { "epoch": 0.3278049653092777, "grad_norm": 0.8124125599861145, "learning_rate": 9.35216744325449e-05, "loss": 0.7542, "step": 51310 }, { "epoch": 0.32786885245901637, "grad_norm": 0.7306677103042603, "learning_rate": 9.35192040771336e-05, "loss": 0.7191, "step": 51320 }, { "epoch": 0.3279327396087551, "grad_norm": 1.0035959482192993, "learning_rate": 9.351673328344626e-05, "loss": 0.8754, "step": 51330 }, { "epoch": 0.3279966267584938, "grad_norm": 1.0333647727966309, "learning_rate": 9.351426205150777e-05, "loss": 1.091, "step": 51340 }, { "epoch": 0.3280605139082325, "grad_norm": 0.8857297897338867, "learning_rate": 9.351179038134301e-05, "loss": 0.8794, "step": 51350 }, { "epoch": 0.3281244010579712, "grad_norm": 0.7112529873847961, "learning_rate": 9.350931827297689e-05, "loss": 0.9096, "step": 51360 }, { "epoch": 0.3281882882077099, "grad_norm": 1.19603431224823, "learning_rate": 9.350684572643427e-05, "loss": 1.207, "step": 51370 }, { "epoch": 0.3282521753574486, "grad_norm": 0.8583690524101257, "learning_rate": 9.350437274174009e-05, "loss": 0.9503, "step": 51380 }, { "epoch": 0.3283160625071873, "grad_norm": 1.2817461490631104, "learning_rate": 9.350189931891925e-05, "loss": 0.8277, "step": 51390 }, { "epoch": 0.328379949656926, "grad_norm": 1.0474090576171875, "learning_rate": 9.349942545799664e-05, "loss": 0.6875, "step": 51400 }, { "epoch": 0.3284438368066647, "grad_norm": 2.7098753452301025, "learning_rate": 9.349695115899717e-05, "loss": 1.1116, "step": 51410 }, { "epoch": 0.32850772395640343, "grad_norm": 0.9274638295173645, "learning_rate": 9.349447642194578e-05, "loss": 0.9108, "step": 51420 }, { "epoch": 0.32857161110614214, "grad_norm": 0.9244821667671204, "learning_rate": 9.34920012468674e-05, "loss": 1.0516, "step": 51430 }, { "epoch": 0.3286354982558808, "grad_norm": 0.7755671143531799, "learning_rate": 9.348952563378693e-05, "loss": 0.9229, "step": 51440 }, { "epoch": 0.3286993854056195, "grad_norm": 0.8693557381629944, "learning_rate": 9.348704958272931e-05, "loss": 1.0101, "step": 51450 }, { "epoch": 0.3287632725553582, "grad_norm": 1.1465810537338257, "learning_rate": 9.348457309371948e-05, "loss": 0.6397, "step": 51460 }, { "epoch": 0.3288271597050969, "grad_norm": 1.0010018348693848, "learning_rate": 9.348209616678238e-05, "loss": 0.6065, "step": 51470 }, { "epoch": 0.3288910468548356, "grad_norm": 0.7339697480201721, "learning_rate": 9.347961880194296e-05, "loss": 0.8538, "step": 51480 }, { "epoch": 0.3289549340045743, "grad_norm": 0.5943730473518372, "learning_rate": 9.347714099922616e-05, "loss": 0.7904, "step": 51490 }, { "epoch": 0.329018821154313, "grad_norm": 0.6852931380271912, "learning_rate": 9.347466275865694e-05, "loss": 0.8648, "step": 51500 }, { "epoch": 0.32908270830405173, "grad_norm": 0.9251651167869568, "learning_rate": 9.347218408026025e-05, "loss": 0.9211, "step": 51510 }, { "epoch": 0.32914659545379044, "grad_norm": 0.7798057794570923, "learning_rate": 9.346970496406105e-05, "loss": 0.6644, "step": 51520 }, { "epoch": 0.32921048260352914, "grad_norm": 0.7447190880775452, "learning_rate": 9.346722541008432e-05, "loss": 0.7424, "step": 51530 }, { "epoch": 0.32927436975326785, "grad_norm": 0.6202836036682129, "learning_rate": 9.346474541835504e-05, "loss": 1.0315, "step": 51540 }, { "epoch": 0.32933825690300655, "grad_norm": 0.8937282562255859, "learning_rate": 9.346226498889817e-05, "loss": 0.8993, "step": 51550 }, { "epoch": 0.3294021440527452, "grad_norm": 0.7935136556625366, "learning_rate": 9.345978412173866e-05, "loss": 0.8778, "step": 51560 }, { "epoch": 0.3294660312024839, "grad_norm": 0.5474129319190979, "learning_rate": 9.345730281690156e-05, "loss": 0.9576, "step": 51570 }, { "epoch": 0.3295299183522226, "grad_norm": 0.7357134819030762, "learning_rate": 9.345482107441182e-05, "loss": 0.7586, "step": 51580 }, { "epoch": 0.3295938055019613, "grad_norm": 1.1119588613510132, "learning_rate": 9.345233889429442e-05, "loss": 0.9351, "step": 51590 }, { "epoch": 0.32965769265170003, "grad_norm": 1.2632428407669067, "learning_rate": 9.344985627657439e-05, "loss": 0.7127, "step": 51600 }, { "epoch": 0.32972157980143874, "grad_norm": 0.9762067198753357, "learning_rate": 9.344737322127671e-05, "loss": 1.2152, "step": 51610 }, { "epoch": 0.32978546695117744, "grad_norm": 0.6457685232162476, "learning_rate": 9.34448897284264e-05, "loss": 0.7824, "step": 51620 }, { "epoch": 0.32984935410091615, "grad_norm": 0.8155549764633179, "learning_rate": 9.344240579804846e-05, "loss": 1.0339, "step": 51630 }, { "epoch": 0.32991324125065485, "grad_norm": 0.7448208332061768, "learning_rate": 9.343992143016791e-05, "loss": 0.8618, "step": 51640 }, { "epoch": 0.32997712840039356, "grad_norm": 1.0054072141647339, "learning_rate": 9.343743662480977e-05, "loss": 1.0246, "step": 51650 }, { "epoch": 0.33004101555013227, "grad_norm": 0.7192854285240173, "learning_rate": 9.343495138199907e-05, "loss": 0.8324, "step": 51660 }, { "epoch": 0.330104902699871, "grad_norm": 1.023016095161438, "learning_rate": 9.343246570176083e-05, "loss": 0.9012, "step": 51670 }, { "epoch": 0.3301687898496096, "grad_norm": 0.9066780209541321, "learning_rate": 9.34299795841201e-05, "loss": 0.7939, "step": 51680 }, { "epoch": 0.33023267699934833, "grad_norm": 0.9442875385284424, "learning_rate": 9.342749302910188e-05, "loss": 0.8326, "step": 51690 }, { "epoch": 0.33029656414908704, "grad_norm": 0.7106100916862488, "learning_rate": 9.342500603673125e-05, "loss": 0.9531, "step": 51700 }, { "epoch": 0.33036045129882574, "grad_norm": 1.2981065511703491, "learning_rate": 9.342251860703324e-05, "loss": 0.9319, "step": 51710 }, { "epoch": 0.33042433844856445, "grad_norm": 0.6678254008293152, "learning_rate": 9.34200307400329e-05, "loss": 0.7245, "step": 51720 }, { "epoch": 0.33048822559830315, "grad_norm": 0.716221809387207, "learning_rate": 9.341754243575528e-05, "loss": 0.9938, "step": 51730 }, { "epoch": 0.33055211274804186, "grad_norm": 0.9788273572921753, "learning_rate": 9.341505369422546e-05, "loss": 0.9796, "step": 51740 }, { "epoch": 0.33061599989778057, "grad_norm": 0.6634423136711121, "learning_rate": 9.341256451546848e-05, "loss": 0.9807, "step": 51750 }, { "epoch": 0.33067988704751927, "grad_norm": 1.2706855535507202, "learning_rate": 9.341007489950942e-05, "loss": 0.6877, "step": 51760 }, { "epoch": 0.330743774197258, "grad_norm": 2.7023708820343018, "learning_rate": 9.340758484637334e-05, "loss": 0.7223, "step": 51770 }, { "epoch": 0.3308076613469967, "grad_norm": 0.543978214263916, "learning_rate": 9.340509435608534e-05, "loss": 0.9397, "step": 51780 }, { "epoch": 0.3308715484967354, "grad_norm": 0.8344589471817017, "learning_rate": 9.340260342867049e-05, "loss": 0.884, "step": 51790 }, { "epoch": 0.33093543564647404, "grad_norm": 0.6055552959442139, "learning_rate": 9.340011206415386e-05, "loss": 0.6553, "step": 51800 }, { "epoch": 0.33099932279621275, "grad_norm": 1.077162265777588, "learning_rate": 9.339762026256058e-05, "loss": 0.7583, "step": 51810 }, { "epoch": 0.33106320994595145, "grad_norm": 1.1166653633117676, "learning_rate": 9.33951280239157e-05, "loss": 0.8469, "step": 51820 }, { "epoch": 0.33112709709569016, "grad_norm": 0.7520068287849426, "learning_rate": 9.339263534824436e-05, "loss": 0.8934, "step": 51830 }, { "epoch": 0.33119098424542887, "grad_norm": 0.8291226029396057, "learning_rate": 9.339014223557163e-05, "loss": 0.9665, "step": 51840 }, { "epoch": 0.33125487139516757, "grad_norm": 0.8782137036323547, "learning_rate": 9.338764868592262e-05, "loss": 0.8251, "step": 51850 }, { "epoch": 0.3313187585449063, "grad_norm": 0.6978154182434082, "learning_rate": 9.338515469932246e-05, "loss": 0.7853, "step": 51860 }, { "epoch": 0.331382645694645, "grad_norm": 0.7604345679283142, "learning_rate": 9.338266027579626e-05, "loss": 0.8233, "step": 51870 }, { "epoch": 0.3314465328443837, "grad_norm": 0.9156827926635742, "learning_rate": 9.338016541536914e-05, "loss": 0.7708, "step": 51880 }, { "epoch": 0.3315104199941224, "grad_norm": 0.6973231434822083, "learning_rate": 9.337767011806622e-05, "loss": 0.82, "step": 51890 }, { "epoch": 0.3315743071438611, "grad_norm": 0.7553335428237915, "learning_rate": 9.337517438391263e-05, "loss": 0.9323, "step": 51900 }, { "epoch": 0.3316381942935998, "grad_norm": 1.0353292226791382, "learning_rate": 9.337267821293351e-05, "loss": 1.0569, "step": 51910 }, { "epoch": 0.3317020814433385, "grad_norm": 0.6070806980133057, "learning_rate": 9.3370181605154e-05, "loss": 0.7626, "step": 51920 }, { "epoch": 0.33176596859307717, "grad_norm": 1.3806092739105225, "learning_rate": 9.336768456059925e-05, "loss": 0.8368, "step": 51930 }, { "epoch": 0.33182985574281587, "grad_norm": 0.8327397108078003, "learning_rate": 9.33651870792944e-05, "loss": 0.8303, "step": 51940 }, { "epoch": 0.3318937428925546, "grad_norm": 0.916780948638916, "learning_rate": 9.33626891612646e-05, "loss": 0.8053, "step": 51950 }, { "epoch": 0.3319576300422933, "grad_norm": 0.7326523065567017, "learning_rate": 9.3360190806535e-05, "loss": 0.8407, "step": 51960 }, { "epoch": 0.332021517192032, "grad_norm": 1.0814404487609863, "learning_rate": 9.335769201513075e-05, "loss": 1.0026, "step": 51970 }, { "epoch": 0.3320854043417707, "grad_norm": 0.9184064865112305, "learning_rate": 9.335519278707705e-05, "loss": 1.1877, "step": 51980 }, { "epoch": 0.3321492914915094, "grad_norm": 0.7729029655456543, "learning_rate": 9.335269312239904e-05, "loss": 1.0875, "step": 51990 }, { "epoch": 0.3322131786412481, "grad_norm": 1.2618939876556396, "learning_rate": 9.335019302112193e-05, "loss": 0.9594, "step": 52000 }, { "epoch": 0.3322770657909868, "grad_norm": 0.6286314725875854, "learning_rate": 9.334769248327085e-05, "loss": 0.8619, "step": 52010 }, { "epoch": 0.3323409529407255, "grad_norm": 1.7984399795532227, "learning_rate": 9.334519150887103e-05, "loss": 0.9147, "step": 52020 }, { "epoch": 0.3324048400904642, "grad_norm": 1.0144270658493042, "learning_rate": 9.33426900979476e-05, "loss": 0.8399, "step": 52030 }, { "epoch": 0.33246872724020293, "grad_norm": 1.9516681432724, "learning_rate": 9.33401882505258e-05, "loss": 0.8073, "step": 52040 }, { "epoch": 0.3325326143899416, "grad_norm": 0.5465503931045532, "learning_rate": 9.333768596663082e-05, "loss": 0.8589, "step": 52050 }, { "epoch": 0.3325965015396803, "grad_norm": 0.9213358759880066, "learning_rate": 9.333518324628783e-05, "loss": 0.6787, "step": 52060 }, { "epoch": 0.332660388689419, "grad_norm": 0.7872808575630188, "learning_rate": 9.333268008952206e-05, "loss": 0.8307, "step": 52070 }, { "epoch": 0.3327242758391577, "grad_norm": 0.9161990284919739, "learning_rate": 9.333017649635871e-05, "loss": 1.1748, "step": 52080 }, { "epoch": 0.3327881629888964, "grad_norm": 0.7564883232116699, "learning_rate": 9.332767246682301e-05, "loss": 0.735, "step": 52090 }, { "epoch": 0.3328520501386351, "grad_norm": 0.7654510140419006, "learning_rate": 9.332516800094015e-05, "loss": 0.5545, "step": 52100 }, { "epoch": 0.3329159372883738, "grad_norm": 0.5725670456886292, "learning_rate": 9.332266309873538e-05, "loss": 1.294, "step": 52110 }, { "epoch": 0.3329798244381125, "grad_norm": 1.0625219345092773, "learning_rate": 9.332015776023391e-05, "loss": 0.9125, "step": 52120 }, { "epoch": 0.33304371158785123, "grad_norm": 0.9181973934173584, "learning_rate": 9.331765198546097e-05, "loss": 0.8822, "step": 52130 }, { "epoch": 0.33310759873758994, "grad_norm": 1.5193865299224854, "learning_rate": 9.33151457744418e-05, "loss": 0.8638, "step": 52140 }, { "epoch": 0.33317148588732864, "grad_norm": 1.0229812860488892, "learning_rate": 9.331263912720165e-05, "loss": 0.8326, "step": 52150 }, { "epoch": 0.33323537303706735, "grad_norm": 1.2124236822128296, "learning_rate": 9.331013204376573e-05, "loss": 0.7525, "step": 52160 }, { "epoch": 0.333299260186806, "grad_norm": 0.8149605393409729, "learning_rate": 9.330762452415934e-05, "loss": 0.9571, "step": 52170 }, { "epoch": 0.3333631473365447, "grad_norm": 1.2210596799850464, "learning_rate": 9.330511656840768e-05, "loss": 1.0235, "step": 52180 }, { "epoch": 0.3334270344862834, "grad_norm": 0.6121252775192261, "learning_rate": 9.330260817653604e-05, "loss": 0.8165, "step": 52190 }, { "epoch": 0.3334909216360221, "grad_norm": 0.770204484462738, "learning_rate": 9.330009934856967e-05, "loss": 1.0588, "step": 52200 }, { "epoch": 0.3335548087857608, "grad_norm": 0.6882258057594299, "learning_rate": 9.329759008453385e-05, "loss": 0.7148, "step": 52210 }, { "epoch": 0.33361869593549953, "grad_norm": 1.0399905443191528, "learning_rate": 9.329508038445382e-05, "loss": 0.7462, "step": 52220 }, { "epoch": 0.33368258308523824, "grad_norm": 1.3440240621566772, "learning_rate": 9.32925702483549e-05, "loss": 0.8031, "step": 52230 }, { "epoch": 0.33374647023497694, "grad_norm": 0.5900636911392212, "learning_rate": 9.329005967626234e-05, "loss": 0.8395, "step": 52240 }, { "epoch": 0.33381035738471565, "grad_norm": 0.8768534660339355, "learning_rate": 9.328754866820142e-05, "loss": 1.068, "step": 52250 }, { "epoch": 0.33387424453445436, "grad_norm": 1.3895585536956787, "learning_rate": 9.328503722419744e-05, "loss": 0.9927, "step": 52260 }, { "epoch": 0.33393813168419306, "grad_norm": 0.9054799675941467, "learning_rate": 9.328252534427568e-05, "loss": 0.7075, "step": 52270 }, { "epoch": 0.33400201883393177, "grad_norm": 0.9763078689575195, "learning_rate": 9.328001302846145e-05, "loss": 1.0979, "step": 52280 }, { "epoch": 0.3340659059836704, "grad_norm": 0.9339504241943359, "learning_rate": 9.327750027678005e-05, "loss": 0.8496, "step": 52290 }, { "epoch": 0.3341297931334091, "grad_norm": 1.0181572437286377, "learning_rate": 9.327498708925677e-05, "loss": 0.9902, "step": 52300 }, { "epoch": 0.33419368028314783, "grad_norm": 0.7839872241020203, "learning_rate": 9.327247346591694e-05, "loss": 0.9367, "step": 52310 }, { "epoch": 0.33425756743288654, "grad_norm": 0.5195721387863159, "learning_rate": 9.326995940678587e-05, "loss": 0.8934, "step": 52320 }, { "epoch": 0.33432145458262524, "grad_norm": 0.8356695175170898, "learning_rate": 9.326744491188888e-05, "loss": 0.9758, "step": 52330 }, { "epoch": 0.33438534173236395, "grad_norm": 1.7605299949645996, "learning_rate": 9.326492998125128e-05, "loss": 0.7481, "step": 52340 }, { "epoch": 0.33444922888210266, "grad_norm": 1.1781415939331055, "learning_rate": 9.326241461489839e-05, "loss": 0.8391, "step": 52350 }, { "epoch": 0.33451311603184136, "grad_norm": 1.0908046960830688, "learning_rate": 9.325989881285559e-05, "loss": 0.6456, "step": 52360 }, { "epoch": 0.33457700318158007, "grad_norm": 0.6815122961997986, "learning_rate": 9.325738257514816e-05, "loss": 0.7305, "step": 52370 }, { "epoch": 0.3346408903313188, "grad_norm": 0.8093428611755371, "learning_rate": 9.325486590180149e-05, "loss": 0.778, "step": 52380 }, { "epoch": 0.3347047774810575, "grad_norm": 0.8250554800033569, "learning_rate": 9.325234879284086e-05, "loss": 0.6891, "step": 52390 }, { "epoch": 0.3347686646307962, "grad_norm": 0.8145758509635925, "learning_rate": 9.324983124829169e-05, "loss": 0.925, "step": 52400 }, { "epoch": 0.33483255178053484, "grad_norm": 0.7351551651954651, "learning_rate": 9.324731326817928e-05, "loss": 1.0542, "step": 52410 }, { "epoch": 0.33489643893027354, "grad_norm": 0.9697402119636536, "learning_rate": 9.324479485252904e-05, "loss": 0.9113, "step": 52420 }, { "epoch": 0.33496032608001225, "grad_norm": 0.8043109774589539, "learning_rate": 9.324227600136628e-05, "loss": 0.9284, "step": 52430 }, { "epoch": 0.33502421322975096, "grad_norm": 0.6603406667709351, "learning_rate": 9.32397567147164e-05, "loss": 0.9909, "step": 52440 }, { "epoch": 0.33508810037948966, "grad_norm": 0.5201127529144287, "learning_rate": 9.323723699260476e-05, "loss": 0.7742, "step": 52450 }, { "epoch": 0.33515198752922837, "grad_norm": 1.1055912971496582, "learning_rate": 9.323471683505674e-05, "loss": 0.8968, "step": 52460 }, { "epoch": 0.3352158746789671, "grad_norm": 1.6916980743408203, "learning_rate": 9.323219624209772e-05, "loss": 0.8835, "step": 52470 }, { "epoch": 0.3352797618287058, "grad_norm": 0.889218270778656, "learning_rate": 9.322967521375307e-05, "loss": 0.8445, "step": 52480 }, { "epoch": 0.3353436489784445, "grad_norm": 0.9384592175483704, "learning_rate": 9.32271537500482e-05, "loss": 0.701, "step": 52490 }, { "epoch": 0.3354075361281832, "grad_norm": 1.5930566787719727, "learning_rate": 9.322463185100849e-05, "loss": 0.8175, "step": 52500 }, { "epoch": 0.3354714232779219, "grad_norm": 0.6644344925880432, "learning_rate": 9.322210951665935e-05, "loss": 1.0025, "step": 52510 }, { "epoch": 0.3355353104276606, "grad_norm": 0.9203514456748962, "learning_rate": 9.321958674702617e-05, "loss": 0.7474, "step": 52520 }, { "epoch": 0.33559919757739926, "grad_norm": 0.5946767330169678, "learning_rate": 9.321706354213438e-05, "loss": 0.7824, "step": 52530 }, { "epoch": 0.33566308472713796, "grad_norm": 0.7163698077201843, "learning_rate": 9.321453990200935e-05, "loss": 0.778, "step": 52540 }, { "epoch": 0.33572697187687667, "grad_norm": 0.8819127678871155, "learning_rate": 9.321201582667653e-05, "loss": 0.8468, "step": 52550 }, { "epoch": 0.3357908590266154, "grad_norm": 0.8515467643737793, "learning_rate": 9.320949131616132e-05, "loss": 0.9673, "step": 52560 }, { "epoch": 0.3358547461763541, "grad_norm": 0.7722886204719543, "learning_rate": 9.320696637048915e-05, "loss": 0.8924, "step": 52570 }, { "epoch": 0.3359186333260928, "grad_norm": 0.7204701900482178, "learning_rate": 9.320444098968545e-05, "loss": 0.9221, "step": 52580 }, { "epoch": 0.3359825204758315, "grad_norm": 1.010270595550537, "learning_rate": 9.320191517377566e-05, "loss": 1.2194, "step": 52590 }, { "epoch": 0.3360464076255702, "grad_norm": 0.7149573564529419, "learning_rate": 9.319938892278519e-05, "loss": 0.9444, "step": 52600 }, { "epoch": 0.3361102947753089, "grad_norm": 0.908594012260437, "learning_rate": 9.31968622367395e-05, "loss": 0.8676, "step": 52610 }, { "epoch": 0.3361741819250476, "grad_norm": 0.9813511371612549, "learning_rate": 9.319433511566406e-05, "loss": 1.0841, "step": 52620 }, { "epoch": 0.3362380690747863, "grad_norm": 0.6576645374298096, "learning_rate": 9.31918075595843e-05, "loss": 0.968, "step": 52630 }, { "epoch": 0.336301956224525, "grad_norm": 0.9117244482040405, "learning_rate": 9.318927956852566e-05, "loss": 0.9304, "step": 52640 }, { "epoch": 0.3363658433742637, "grad_norm": 0.8400249481201172, "learning_rate": 9.318675114251361e-05, "loss": 1.0644, "step": 52650 }, { "epoch": 0.3364297305240024, "grad_norm": 0.950006365776062, "learning_rate": 9.31842222815736e-05, "loss": 0.8254, "step": 52660 }, { "epoch": 0.3364936176737411, "grad_norm": 0.9387775659561157, "learning_rate": 9.318169298573112e-05, "loss": 0.9178, "step": 52670 }, { "epoch": 0.3365575048234798, "grad_norm": 0.5320703387260437, "learning_rate": 9.317916325501165e-05, "loss": 0.5815, "step": 52680 }, { "epoch": 0.3366213919732185, "grad_norm": 1.047491192817688, "learning_rate": 9.317663308944064e-05, "loss": 1.2488, "step": 52690 }, { "epoch": 0.3366852791229572, "grad_norm": 2.0185956954956055, "learning_rate": 9.317410248904358e-05, "loss": 0.8944, "step": 52700 }, { "epoch": 0.3367491662726959, "grad_norm": 0.8261764049530029, "learning_rate": 9.317157145384596e-05, "loss": 0.818, "step": 52710 }, { "epoch": 0.3368130534224346, "grad_norm": 0.9799476265907288, "learning_rate": 9.316903998387326e-05, "loss": 0.7601, "step": 52720 }, { "epoch": 0.3368769405721733, "grad_norm": 0.8541726469993591, "learning_rate": 9.3166508079151e-05, "loss": 0.9305, "step": 52730 }, { "epoch": 0.33694082772191203, "grad_norm": 0.7811595797538757, "learning_rate": 9.316397573970464e-05, "loss": 0.82, "step": 52740 }, { "epoch": 0.33700471487165073, "grad_norm": 0.7470584511756897, "learning_rate": 9.316144296555971e-05, "loss": 0.8631, "step": 52750 }, { "epoch": 0.33706860202138944, "grad_norm": 0.8616728782653809, "learning_rate": 9.315890975674169e-05, "loss": 0.8319, "step": 52760 }, { "epoch": 0.33713248917112815, "grad_norm": 0.6505323052406311, "learning_rate": 9.315637611327614e-05, "loss": 0.9409, "step": 52770 }, { "epoch": 0.3371963763208668, "grad_norm": 1.1408954858779907, "learning_rate": 9.315384203518853e-05, "loss": 0.8325, "step": 52780 }, { "epoch": 0.3372602634706055, "grad_norm": 0.8268606066703796, "learning_rate": 9.31513075225044e-05, "loss": 0.947, "step": 52790 }, { "epoch": 0.3373241506203442, "grad_norm": 1.4688328504562378, "learning_rate": 9.314877257524928e-05, "loss": 0.9942, "step": 52800 }, { "epoch": 0.3373880377700829, "grad_norm": 0.8979589343070984, "learning_rate": 9.314623719344869e-05, "loss": 0.8571, "step": 52810 }, { "epoch": 0.3374519249198216, "grad_norm": 0.6567512154579163, "learning_rate": 9.314370137712816e-05, "loss": 0.6655, "step": 52820 }, { "epoch": 0.33751581206956033, "grad_norm": 0.8439179062843323, "learning_rate": 9.314116512631324e-05, "loss": 0.8662, "step": 52830 }, { "epoch": 0.33757969921929903, "grad_norm": 0.7378790378570557, "learning_rate": 9.313862844102946e-05, "loss": 0.8929, "step": 52840 }, { "epoch": 0.33764358636903774, "grad_norm": 0.5747960209846497, "learning_rate": 9.313609132130235e-05, "loss": 0.8469, "step": 52850 }, { "epoch": 0.33770747351877645, "grad_norm": 2.188962459564209, "learning_rate": 9.313355376715751e-05, "loss": 0.7715, "step": 52860 }, { "epoch": 0.33777136066851515, "grad_norm": 1.6319129467010498, "learning_rate": 9.313101577862046e-05, "loss": 0.851, "step": 52870 }, { "epoch": 0.33783524781825386, "grad_norm": 0.8833417892456055, "learning_rate": 9.312847735571676e-05, "loss": 0.9841, "step": 52880 }, { "epoch": 0.33789913496799256, "grad_norm": 0.771787703037262, "learning_rate": 9.312593849847198e-05, "loss": 0.6755, "step": 52890 }, { "epoch": 0.3379630221177312, "grad_norm": 1.289760947227478, "learning_rate": 9.31233992069117e-05, "loss": 1.1848, "step": 52900 }, { "epoch": 0.3380269092674699, "grad_norm": 0.8547393083572388, "learning_rate": 9.312085948106148e-05, "loss": 1.0867, "step": 52910 }, { "epoch": 0.3380907964172086, "grad_norm": 1.357723593711853, "learning_rate": 9.311831932094691e-05, "loss": 0.821, "step": 52920 }, { "epoch": 0.33815468356694733, "grad_norm": 0.9254101514816284, "learning_rate": 9.311577872659355e-05, "loss": 1.188, "step": 52930 }, { "epoch": 0.33821857071668604, "grad_norm": 0.9655906558036804, "learning_rate": 9.311323769802701e-05, "loss": 1.1519, "step": 52940 }, { "epoch": 0.33828245786642475, "grad_norm": 0.9837827682495117, "learning_rate": 9.311069623527285e-05, "loss": 0.9612, "step": 52950 }, { "epoch": 0.33834634501616345, "grad_norm": 0.7545758485794067, "learning_rate": 9.310840854758487e-05, "loss": 1.1672, "step": 52960 }, { "epoch": 0.33841023216590216, "grad_norm": 0.9971650838851929, "learning_rate": 9.31058662599448e-05, "loss": 0.9516, "step": 52970 }, { "epoch": 0.33847411931564086, "grad_norm": 0.8151521682739258, "learning_rate": 9.310332353819136e-05, "loss": 0.6755, "step": 52980 }, { "epoch": 0.33853800646537957, "grad_norm": 1.5848335027694702, "learning_rate": 9.310078038235014e-05, "loss": 0.8026, "step": 52990 }, { "epoch": 0.3386018936151183, "grad_norm": 1.3594563007354736, "learning_rate": 9.30982367924468e-05, "loss": 0.6692, "step": 53000 }, { "epoch": 0.338665780764857, "grad_norm": 1.4335222244262695, "learning_rate": 9.309569276850692e-05, "loss": 0.8874, "step": 53010 }, { "epoch": 0.33872966791459563, "grad_norm": 1.4923986196517944, "learning_rate": 9.309314831055615e-05, "loss": 1.0218, "step": 53020 }, { "epoch": 0.33879355506433434, "grad_norm": 0.6935365796089172, "learning_rate": 9.309060341862008e-05, "loss": 0.8023, "step": 53030 }, { "epoch": 0.33885744221407305, "grad_norm": 1.7542939186096191, "learning_rate": 9.308805809272434e-05, "loss": 0.7334, "step": 53040 }, { "epoch": 0.33892132936381175, "grad_norm": 2.078371286392212, "learning_rate": 9.30855123328946e-05, "loss": 0.9812, "step": 53050 }, { "epoch": 0.33898521651355046, "grad_norm": 0.6690249443054199, "learning_rate": 9.308296613915647e-05, "loss": 0.9794, "step": 53060 }, { "epoch": 0.33904910366328916, "grad_norm": 0.8142697215080261, "learning_rate": 9.30804195115356e-05, "loss": 0.9776, "step": 53070 }, { "epoch": 0.33911299081302787, "grad_norm": 0.7654648423194885, "learning_rate": 9.307787245005764e-05, "loss": 0.842, "step": 53080 }, { "epoch": 0.3391768779627666, "grad_norm": 0.5504037141799927, "learning_rate": 9.307532495474822e-05, "loss": 0.7776, "step": 53090 }, { "epoch": 0.3392407651125053, "grad_norm": 1.0997267961502075, "learning_rate": 9.307277702563302e-05, "loss": 0.685, "step": 53100 }, { "epoch": 0.339304652262244, "grad_norm": 0.9791783690452576, "learning_rate": 9.307022866273771e-05, "loss": 1.1581, "step": 53110 }, { "epoch": 0.3393685394119827, "grad_norm": 0.6219057440757751, "learning_rate": 9.306767986608791e-05, "loss": 0.9069, "step": 53120 }, { "epoch": 0.3394324265617214, "grad_norm": 0.5955463647842407, "learning_rate": 9.306513063570933e-05, "loss": 1.112, "step": 53130 }, { "epoch": 0.33949631371146005, "grad_norm": 0.7455695867538452, "learning_rate": 9.306258097162763e-05, "loss": 0.7857, "step": 53140 }, { "epoch": 0.33956020086119876, "grad_norm": 0.9764438271522522, "learning_rate": 9.306003087386848e-05, "loss": 0.9552, "step": 53150 }, { "epoch": 0.33962408801093746, "grad_norm": 0.6675849556922913, "learning_rate": 9.305748034245756e-05, "loss": 0.7883, "step": 53160 }, { "epoch": 0.33968797516067617, "grad_norm": 0.9111708998680115, "learning_rate": 9.305492937742057e-05, "loss": 0.8918, "step": 53170 }, { "epoch": 0.3397518623104149, "grad_norm": 2.7284460067749023, "learning_rate": 9.30523779787832e-05, "loss": 0.8114, "step": 53180 }, { "epoch": 0.3398157494601536, "grad_norm": 0.586710512638092, "learning_rate": 9.304982614657114e-05, "loss": 0.856, "step": 53190 }, { "epoch": 0.3398796366098923, "grad_norm": 0.644350528717041, "learning_rate": 9.304727388081007e-05, "loss": 0.8175, "step": 53200 }, { "epoch": 0.339943523759631, "grad_norm": 0.6203905940055847, "learning_rate": 9.304472118152572e-05, "loss": 1.0128, "step": 53210 }, { "epoch": 0.3400074109093697, "grad_norm": 0.840505063533783, "learning_rate": 9.304216804874379e-05, "loss": 0.8672, "step": 53220 }, { "epoch": 0.3400712980591084, "grad_norm": 0.750717282295227, "learning_rate": 9.303961448248998e-05, "loss": 0.8607, "step": 53230 }, { "epoch": 0.3401351852088471, "grad_norm": 0.7886949181556702, "learning_rate": 9.303706048279004e-05, "loss": 1.2132, "step": 53240 }, { "epoch": 0.3401990723585858, "grad_norm": 0.9253231883049011, "learning_rate": 9.303450604966966e-05, "loss": 1.0289, "step": 53250 }, { "epoch": 0.34026295950832447, "grad_norm": 1.0587670803070068, "learning_rate": 9.303195118315455e-05, "loss": 0.9249, "step": 53260 }, { "epoch": 0.3403268466580632, "grad_norm": 1.1579573154449463, "learning_rate": 9.302939588327048e-05, "loss": 0.8702, "step": 53270 }, { "epoch": 0.3403907338078019, "grad_norm": 1.4637956619262695, "learning_rate": 9.302684015004318e-05, "loss": 0.8417, "step": 53280 }, { "epoch": 0.3404546209575406, "grad_norm": 2.425816774368286, "learning_rate": 9.302428398349836e-05, "loss": 0.8657, "step": 53290 }, { "epoch": 0.3405185081072793, "grad_norm": 0.530267596244812, "learning_rate": 9.30217273836618e-05, "loss": 0.9432, "step": 53300 }, { "epoch": 0.340582395257018, "grad_norm": 1.081075668334961, "learning_rate": 9.30191703505592e-05, "loss": 1.262, "step": 53310 }, { "epoch": 0.3406462824067567, "grad_norm": 0.7147884964942932, "learning_rate": 9.301661288421636e-05, "loss": 0.8376, "step": 53320 }, { "epoch": 0.3407101695564954, "grad_norm": 0.8092734217643738, "learning_rate": 9.301405498465901e-05, "loss": 0.8306, "step": 53330 }, { "epoch": 0.3407740567062341, "grad_norm": 1.257656216621399, "learning_rate": 9.30114966519129e-05, "loss": 0.6576, "step": 53340 }, { "epoch": 0.3408379438559728, "grad_norm": 0.7588216066360474, "learning_rate": 9.30089378860038e-05, "loss": 0.8001, "step": 53350 }, { "epoch": 0.34090183100571153, "grad_norm": 2.2834153175354004, "learning_rate": 9.300637868695752e-05, "loss": 0.7371, "step": 53360 }, { "epoch": 0.34096571815545024, "grad_norm": 1.2148463726043701, "learning_rate": 9.300381905479978e-05, "loss": 0.7611, "step": 53370 }, { "epoch": 0.3410296053051889, "grad_norm": 0.7011250853538513, "learning_rate": 9.300125898955639e-05, "loss": 0.7491, "step": 53380 }, { "epoch": 0.3410934924549276, "grad_norm": 0.9669275879859924, "learning_rate": 9.299869849125311e-05, "loss": 0.9306, "step": 53390 }, { "epoch": 0.3411573796046663, "grad_norm": 0.8897387981414795, "learning_rate": 9.299613755991573e-05, "loss": 1.0307, "step": 53400 }, { "epoch": 0.341221266754405, "grad_norm": 0.9630199670791626, "learning_rate": 9.299357619557005e-05, "loss": 1.1292, "step": 53410 }, { "epoch": 0.3412851539041437, "grad_norm": 0.8969447016716003, "learning_rate": 9.299101439824188e-05, "loss": 0.9029, "step": 53420 }, { "epoch": 0.3413490410538824, "grad_norm": 1.110783338546753, "learning_rate": 9.298845216795699e-05, "loss": 0.9651, "step": 53430 }, { "epoch": 0.3414129282036211, "grad_norm": 0.8235384225845337, "learning_rate": 9.29858895047412e-05, "loss": 0.5702, "step": 53440 }, { "epoch": 0.34147681535335983, "grad_norm": 1.1357210874557495, "learning_rate": 9.298332640862032e-05, "loss": 0.8345, "step": 53450 }, { "epoch": 0.34154070250309854, "grad_norm": 0.7951391935348511, "learning_rate": 9.298076287962016e-05, "loss": 0.7113, "step": 53460 }, { "epoch": 0.34160458965283724, "grad_norm": 0.9098735451698303, "learning_rate": 9.297819891776651e-05, "loss": 0.9365, "step": 53470 }, { "epoch": 0.34166847680257595, "grad_norm": 0.6273751854896545, "learning_rate": 9.297563452308525e-05, "loss": 0.7352, "step": 53480 }, { "epoch": 0.34173236395231466, "grad_norm": 0.4580266773700714, "learning_rate": 9.297306969560213e-05, "loss": 0.9588, "step": 53490 }, { "epoch": 0.3417962511020533, "grad_norm": 1.1689975261688232, "learning_rate": 9.297050443534305e-05, "loss": 0.7314, "step": 53500 }, { "epoch": 0.341860138251792, "grad_norm": 0.8858540058135986, "learning_rate": 9.29679387423338e-05, "loss": 0.8758, "step": 53510 }, { "epoch": 0.3419240254015307, "grad_norm": 0.7352036833763123, "learning_rate": 9.296537261660026e-05, "loss": 0.9193, "step": 53520 }, { "epoch": 0.3419879125512694, "grad_norm": 1.1787981986999512, "learning_rate": 9.296280605816823e-05, "loss": 0.853, "step": 53530 }, { "epoch": 0.34205179970100813, "grad_norm": 0.8490791320800781, "learning_rate": 9.296023906706357e-05, "loss": 1.2468, "step": 53540 }, { "epoch": 0.34211568685074684, "grad_norm": 1.1873284578323364, "learning_rate": 9.295767164331215e-05, "loss": 1.1106, "step": 53550 }, { "epoch": 0.34217957400048554, "grad_norm": 1.3740506172180176, "learning_rate": 9.29551037869398e-05, "loss": 0.8165, "step": 53560 }, { "epoch": 0.34224346115022425, "grad_norm": 1.074511170387268, "learning_rate": 9.295253549797241e-05, "loss": 0.8433, "step": 53570 }, { "epoch": 0.34230734829996295, "grad_norm": 0.9406700134277344, "learning_rate": 9.294996677643581e-05, "loss": 0.844, "step": 53580 }, { "epoch": 0.34237123544970166, "grad_norm": 0.9452252984046936, "learning_rate": 9.294739762235589e-05, "loss": 0.7768, "step": 53590 }, { "epoch": 0.34243512259944037, "grad_norm": 0.8128929734230042, "learning_rate": 9.294482803575853e-05, "loss": 0.622, "step": 53600 }, { "epoch": 0.3424990097491791, "grad_norm": 0.825412392616272, "learning_rate": 9.294225801666959e-05, "loss": 1.0291, "step": 53610 }, { "epoch": 0.3425628968989178, "grad_norm": 1.06623113155365, "learning_rate": 9.293968756511496e-05, "loss": 1.0841, "step": 53620 }, { "epoch": 0.34262678404865643, "grad_norm": 0.981828510761261, "learning_rate": 9.293711668112054e-05, "loss": 0.8458, "step": 53630 }, { "epoch": 0.34269067119839514, "grad_norm": 1.0561970472335815, "learning_rate": 9.29345453647122e-05, "loss": 0.7624, "step": 53640 }, { "epoch": 0.34275455834813384, "grad_norm": 0.7628150582313538, "learning_rate": 9.293197361591586e-05, "loss": 0.8328, "step": 53650 }, { "epoch": 0.34281844549787255, "grad_norm": 0.9464593529701233, "learning_rate": 9.292940143475737e-05, "loss": 0.9501, "step": 53660 }, { "epoch": 0.34288233264761125, "grad_norm": 2.0435502529144287, "learning_rate": 9.292682882126272e-05, "loss": 0.864, "step": 53670 }, { "epoch": 0.34294621979734996, "grad_norm": 1.0263941287994385, "learning_rate": 9.292425577545772e-05, "loss": 0.8141, "step": 53680 }, { "epoch": 0.34301010694708867, "grad_norm": 0.7042751908302307, "learning_rate": 9.292168229736836e-05, "loss": 0.7909, "step": 53690 }, { "epoch": 0.3430739940968274, "grad_norm": 1.1945339441299438, "learning_rate": 9.29191083870205e-05, "loss": 0.9799, "step": 53700 }, { "epoch": 0.3431378812465661, "grad_norm": 0.965678870677948, "learning_rate": 9.29165340444401e-05, "loss": 0.8093, "step": 53710 }, { "epoch": 0.3432017683963048, "grad_norm": 2.425915241241455, "learning_rate": 9.291395926965307e-05, "loss": 1.0032, "step": 53720 }, { "epoch": 0.3432656555460435, "grad_norm": 0.5332554578781128, "learning_rate": 9.291138406268536e-05, "loss": 0.7977, "step": 53730 }, { "epoch": 0.3433295426957822, "grad_norm": 2.8045296669006348, "learning_rate": 9.290880842356287e-05, "loss": 0.9274, "step": 53740 }, { "epoch": 0.34339342984552085, "grad_norm": 0.7845577597618103, "learning_rate": 9.290623235231157e-05, "loss": 1.0535, "step": 53750 }, { "epoch": 0.34345731699525955, "grad_norm": 0.9177809953689575, "learning_rate": 9.290365584895739e-05, "loss": 0.9278, "step": 53760 }, { "epoch": 0.34352120414499826, "grad_norm": 0.9220765829086304, "learning_rate": 9.290107891352628e-05, "loss": 0.945, "step": 53770 }, { "epoch": 0.34358509129473697, "grad_norm": 0.8571166396141052, "learning_rate": 9.289850154604417e-05, "loss": 0.903, "step": 53780 }, { "epoch": 0.3436489784444757, "grad_norm": 0.8738123178482056, "learning_rate": 9.289592374653708e-05, "loss": 0.9928, "step": 53790 }, { "epoch": 0.3437128655942144, "grad_norm": 0.7225977778434753, "learning_rate": 9.28933455150309e-05, "loss": 0.8828, "step": 53800 }, { "epoch": 0.3437767527439531, "grad_norm": 1.3303672075271606, "learning_rate": 9.289076685155162e-05, "loss": 1.0604, "step": 53810 }, { "epoch": 0.3438406398936918, "grad_norm": 0.8628764152526855, "learning_rate": 9.28881877561252e-05, "loss": 1.1439, "step": 53820 }, { "epoch": 0.3439045270434305, "grad_norm": 0.6281081438064575, "learning_rate": 9.288560822877765e-05, "loss": 0.9286, "step": 53830 }, { "epoch": 0.3439684141931692, "grad_norm": 0.6044685244560242, "learning_rate": 9.288302826953492e-05, "loss": 1.1626, "step": 53840 }, { "epoch": 0.3440323013429079, "grad_norm": 0.847324788570404, "learning_rate": 9.288044787842298e-05, "loss": 0.7661, "step": 53850 }, { "epoch": 0.3440961884926466, "grad_norm": 0.9134111404418945, "learning_rate": 9.287786705546785e-05, "loss": 0.7944, "step": 53860 }, { "epoch": 0.34416007564238527, "grad_norm": 1.3941556215286255, "learning_rate": 9.287528580069551e-05, "loss": 1.2369, "step": 53870 }, { "epoch": 0.34422396279212397, "grad_norm": 0.8589109182357788, "learning_rate": 9.287270411413194e-05, "loss": 0.6585, "step": 53880 }, { "epoch": 0.3442878499418627, "grad_norm": 0.49347206950187683, "learning_rate": 9.287012199580315e-05, "loss": 0.8574, "step": 53890 }, { "epoch": 0.3443517370916014, "grad_norm": 0.9737316370010376, "learning_rate": 9.286753944573514e-05, "loss": 0.6949, "step": 53900 }, { "epoch": 0.3444156242413401, "grad_norm": 1.0737287998199463, "learning_rate": 9.286495646395392e-05, "loss": 0.9367, "step": 53910 }, { "epoch": 0.3444795113910788, "grad_norm": 0.9133766293525696, "learning_rate": 9.28623730504855e-05, "loss": 0.7619, "step": 53920 }, { "epoch": 0.3445433985408175, "grad_norm": 0.784355640411377, "learning_rate": 9.285978920535592e-05, "loss": 1.0223, "step": 53930 }, { "epoch": 0.3446072856905562, "grad_norm": 0.7285311818122864, "learning_rate": 9.285720492859118e-05, "loss": 0.9259, "step": 53940 }, { "epoch": 0.3446711728402949, "grad_norm": 0.8762960433959961, "learning_rate": 9.28546202202173e-05, "loss": 0.8949, "step": 53950 }, { "epoch": 0.3447350599900336, "grad_norm": 0.8869500756263733, "learning_rate": 9.285203508026032e-05, "loss": 0.8522, "step": 53960 }, { "epoch": 0.3447989471397723, "grad_norm": 0.7807958722114563, "learning_rate": 9.284944950874628e-05, "loss": 1.0629, "step": 53970 }, { "epoch": 0.34486283428951103, "grad_norm": 2.027085542678833, "learning_rate": 9.284686350570121e-05, "loss": 0.9566, "step": 53980 }, { "epoch": 0.3449267214392497, "grad_norm": 1.2319154739379883, "learning_rate": 9.284427707115116e-05, "loss": 0.718, "step": 53990 }, { "epoch": 0.3449906085889884, "grad_norm": 0.5686825513839722, "learning_rate": 9.284169020512217e-05, "loss": 0.7659, "step": 54000 }, { "epoch": 0.3450544957387271, "grad_norm": 0.9346210956573486, "learning_rate": 9.283910290764029e-05, "loss": 0.8805, "step": 54010 }, { "epoch": 0.3451183828884658, "grad_norm": 1.0254408121109009, "learning_rate": 9.28365151787316e-05, "loss": 1.0447, "step": 54020 }, { "epoch": 0.3451822700382045, "grad_norm": 1.1026064157485962, "learning_rate": 9.283392701842213e-05, "loss": 1.11, "step": 54030 }, { "epoch": 0.3452461571879432, "grad_norm": 1.1791328191757202, "learning_rate": 9.283133842673797e-05, "loss": 0.9846, "step": 54040 }, { "epoch": 0.3453100443376819, "grad_norm": 0.6459341049194336, "learning_rate": 9.282874940370517e-05, "loss": 0.8446, "step": 54050 }, { "epoch": 0.3453739314874206, "grad_norm": 0.7654846906661987, "learning_rate": 9.282615994934982e-05, "loss": 1.1735, "step": 54060 }, { "epoch": 0.34543781863715933, "grad_norm": 1.2747883796691895, "learning_rate": 9.282357006369798e-05, "loss": 0.9468, "step": 54070 }, { "epoch": 0.34550170578689804, "grad_norm": 0.5862970352172852, "learning_rate": 9.282097974677574e-05, "loss": 0.5708, "step": 54080 }, { "epoch": 0.34556559293663675, "grad_norm": 1.1748859882354736, "learning_rate": 9.28183889986092e-05, "loss": 0.9329, "step": 54090 }, { "epoch": 0.34562948008637545, "grad_norm": 0.7171411514282227, "learning_rate": 9.281579781922442e-05, "loss": 1.0105, "step": 54100 }, { "epoch": 0.3456933672361141, "grad_norm": 0.8102126717567444, "learning_rate": 9.281320620864754e-05, "loss": 0.8918, "step": 54110 }, { "epoch": 0.3457572543858528, "grad_norm": 1.1540294885635376, "learning_rate": 9.281061416690462e-05, "loss": 0.794, "step": 54120 }, { "epoch": 0.3458211415355915, "grad_norm": 0.4848040044307709, "learning_rate": 9.280802169402178e-05, "loss": 0.7435, "step": 54130 }, { "epoch": 0.3458850286853302, "grad_norm": 1.207207202911377, "learning_rate": 9.280542879002512e-05, "loss": 0.9234, "step": 54140 }, { "epoch": 0.3459489158350689, "grad_norm": 0.7210013270378113, "learning_rate": 9.280283545494077e-05, "loss": 0.942, "step": 54150 }, { "epoch": 0.34601280298480763, "grad_norm": 2.0840461254119873, "learning_rate": 9.280024168879482e-05, "loss": 0.828, "step": 54160 }, { "epoch": 0.34607669013454634, "grad_norm": 0.8813756704330444, "learning_rate": 9.279764749161344e-05, "loss": 0.8051, "step": 54170 }, { "epoch": 0.34614057728428504, "grad_norm": 0.606823205947876, "learning_rate": 9.27950528634227e-05, "loss": 1.0424, "step": 54180 }, { "epoch": 0.34620446443402375, "grad_norm": 0.9201170206069946, "learning_rate": 9.279245780424876e-05, "loss": 0.772, "step": 54190 }, { "epoch": 0.34626835158376246, "grad_norm": 0.9958915710449219, "learning_rate": 9.278986231411776e-05, "loss": 0.6918, "step": 54200 }, { "epoch": 0.34633223873350116, "grad_norm": 0.7050455212593079, "learning_rate": 9.278726639305581e-05, "loss": 0.7851, "step": 54210 }, { "epoch": 0.34639612588323987, "grad_norm": 0.9351766109466553, "learning_rate": 9.27846700410891e-05, "loss": 0.796, "step": 54220 }, { "epoch": 0.3464600130329785, "grad_norm": 2.2169976234436035, "learning_rate": 9.278207325824373e-05, "loss": 0.9384, "step": 54230 }, { "epoch": 0.3465239001827172, "grad_norm": 1.0841212272644043, "learning_rate": 9.277947604454587e-05, "loss": 0.996, "step": 54240 }, { "epoch": 0.34658778733245593, "grad_norm": 1.3543506860733032, "learning_rate": 9.277687840002167e-05, "loss": 0.7683, "step": 54250 }, { "epoch": 0.34665167448219464, "grad_norm": 0.7349464297294617, "learning_rate": 9.277428032469731e-05, "loss": 0.9666, "step": 54260 }, { "epoch": 0.34671556163193334, "grad_norm": 1.0820789337158203, "learning_rate": 9.277168181859893e-05, "loss": 0.6931, "step": 54270 }, { "epoch": 0.34677944878167205, "grad_norm": 0.6938410997390747, "learning_rate": 9.276908288175272e-05, "loss": 0.8715, "step": 54280 }, { "epoch": 0.34684333593141076, "grad_norm": 0.7121148109436035, "learning_rate": 9.276648351418484e-05, "loss": 0.8973, "step": 54290 }, { "epoch": 0.34690722308114946, "grad_norm": 0.8023224472999573, "learning_rate": 9.276388371592149e-05, "loss": 0.8985, "step": 54300 }, { "epoch": 0.34697111023088817, "grad_norm": 1.1684279441833496, "learning_rate": 9.276128348698881e-05, "loss": 0.8147, "step": 54310 }, { "epoch": 0.3470349973806269, "grad_norm": 1.3102762699127197, "learning_rate": 9.275868282741303e-05, "loss": 0.8545, "step": 54320 }, { "epoch": 0.3470988845303656, "grad_norm": 0.8226547837257385, "learning_rate": 9.27560817372203e-05, "loss": 0.6908, "step": 54330 }, { "epoch": 0.3471627716801043, "grad_norm": 1.1584205627441406, "learning_rate": 9.275348021643686e-05, "loss": 0.8704, "step": 54340 }, { "epoch": 0.34722665882984294, "grad_norm": 0.852271556854248, "learning_rate": 9.275087826508887e-05, "loss": 0.8696, "step": 54350 }, { "epoch": 0.34729054597958164, "grad_norm": 2.0320937633514404, "learning_rate": 9.274827588320257e-05, "loss": 0.6919, "step": 54360 }, { "epoch": 0.34735443312932035, "grad_norm": 0.7250359058380127, "learning_rate": 9.274567307080412e-05, "loss": 0.8589, "step": 54370 }, { "epoch": 0.34741832027905906, "grad_norm": 1.2491799592971802, "learning_rate": 9.27430698279198e-05, "loss": 0.8009, "step": 54380 }, { "epoch": 0.34748220742879776, "grad_norm": 0.9660385251045227, "learning_rate": 9.274046615457577e-05, "loss": 1.0643, "step": 54390 }, { "epoch": 0.34754609457853647, "grad_norm": 0.9620506167411804, "learning_rate": 9.273786205079826e-05, "loss": 0.8099, "step": 54400 }, { "epoch": 0.3476099817282752, "grad_norm": 0.6800320744514465, "learning_rate": 9.273525751661353e-05, "loss": 1.039, "step": 54410 }, { "epoch": 0.3476738688780139, "grad_norm": 0.6459980010986328, "learning_rate": 9.273265255204778e-05, "loss": 0.94, "step": 54420 }, { "epoch": 0.3477377560277526, "grad_norm": 0.5387960076332092, "learning_rate": 9.273004715712723e-05, "loss": 0.9139, "step": 54430 }, { "epoch": 0.3478016431774913, "grad_norm": 0.9442420601844788, "learning_rate": 9.272744133187816e-05, "loss": 0.813, "step": 54440 }, { "epoch": 0.34786553032723, "grad_norm": 0.6634787321090698, "learning_rate": 9.272483507632676e-05, "loss": 0.6832, "step": 54450 }, { "epoch": 0.3479294174769687, "grad_norm": 0.7288976311683655, "learning_rate": 9.272222839049933e-05, "loss": 0.873, "step": 54460 }, { "epoch": 0.3479933046267074, "grad_norm": 1.1111667156219482, "learning_rate": 9.27196212744221e-05, "loss": 1.1114, "step": 54470 }, { "epoch": 0.34805719177644606, "grad_norm": 1.427985668182373, "learning_rate": 9.271701372812134e-05, "loss": 0.8317, "step": 54480 }, { "epoch": 0.34812107892618477, "grad_norm": 0.5816881656646729, "learning_rate": 9.271440575162328e-05, "loss": 0.9996, "step": 54490 }, { "epoch": 0.3481849660759235, "grad_norm": 1.1159511804580688, "learning_rate": 9.27117973449542e-05, "loss": 0.8265, "step": 54500 }, { "epoch": 0.3482488532256622, "grad_norm": 1.1096454858779907, "learning_rate": 9.270918850814037e-05, "loss": 0.9171, "step": 54510 }, { "epoch": 0.3483127403754009, "grad_norm": 0.5924681425094604, "learning_rate": 9.270657924120808e-05, "loss": 0.9957, "step": 54520 }, { "epoch": 0.3483766275251396, "grad_norm": 3.2207529544830322, "learning_rate": 9.270396954418357e-05, "loss": 1.1604, "step": 54530 }, { "epoch": 0.3484405146748783, "grad_norm": 1.6686917543411255, "learning_rate": 9.270135941709315e-05, "loss": 0.7374, "step": 54540 }, { "epoch": 0.348504401824617, "grad_norm": 0.8895770907402039, "learning_rate": 9.26987488599631e-05, "loss": 0.8122, "step": 54550 }, { "epoch": 0.3485682889743557, "grad_norm": 0.9306029081344604, "learning_rate": 9.26961378728197e-05, "loss": 0.7176, "step": 54560 }, { "epoch": 0.3486321761240944, "grad_norm": 1.2075837850570679, "learning_rate": 9.269352645568927e-05, "loss": 0.8263, "step": 54570 }, { "epoch": 0.3486960632738331, "grad_norm": 0.7406107187271118, "learning_rate": 9.269091460859807e-05, "loss": 0.8766, "step": 54580 }, { "epoch": 0.34875995042357183, "grad_norm": 0.717327892780304, "learning_rate": 9.268830233157245e-05, "loss": 1.2179, "step": 54590 }, { "epoch": 0.3488238375733105, "grad_norm": 0.9631721377372742, "learning_rate": 9.268568962463868e-05, "loss": 0.9515, "step": 54600 }, { "epoch": 0.3488877247230492, "grad_norm": 0.9041351675987244, "learning_rate": 9.26830764878231e-05, "loss": 1.1696, "step": 54610 }, { "epoch": 0.3489516118727879, "grad_norm": 0.8273685574531555, "learning_rate": 9.2680462921152e-05, "loss": 1.1219, "step": 54620 }, { "epoch": 0.3490154990225266, "grad_norm": 0.5111979842185974, "learning_rate": 9.267784892465172e-05, "loss": 0.8967, "step": 54630 }, { "epoch": 0.3490793861722653, "grad_norm": 0.8033791184425354, "learning_rate": 9.267523449834858e-05, "loss": 0.8957, "step": 54640 }, { "epoch": 0.349143273322004, "grad_norm": 0.8571832776069641, "learning_rate": 9.267261964226892e-05, "loss": 1.0502, "step": 54650 }, { "epoch": 0.3492071604717427, "grad_norm": 0.4170287549495697, "learning_rate": 9.267000435643904e-05, "loss": 0.8696, "step": 54660 }, { "epoch": 0.3492710476214814, "grad_norm": 0.687233567237854, "learning_rate": 9.266738864088533e-05, "loss": 0.787, "step": 54670 }, { "epoch": 0.34933493477122013, "grad_norm": 0.8800210356712341, "learning_rate": 9.266477249563408e-05, "loss": 0.8221, "step": 54680 }, { "epoch": 0.34939882192095884, "grad_norm": 0.8803540468215942, "learning_rate": 9.266215592071167e-05, "loss": 1.0652, "step": 54690 }, { "epoch": 0.34946270907069754, "grad_norm": 0.5410533547401428, "learning_rate": 9.265953891614445e-05, "loss": 0.9378, "step": 54700 }, { "epoch": 0.34952659622043625, "grad_norm": 0.5955383777618408, "learning_rate": 9.265692148195875e-05, "loss": 0.8833, "step": 54710 }, { "epoch": 0.3495904833701749, "grad_norm": 0.633705735206604, "learning_rate": 9.265430361818096e-05, "loss": 1.004, "step": 54720 }, { "epoch": 0.3496543705199136, "grad_norm": 0.9979560971260071, "learning_rate": 9.265168532483744e-05, "loss": 0.8923, "step": 54730 }, { "epoch": 0.3497182576696523, "grad_norm": 0.5315431952476501, "learning_rate": 9.264906660195453e-05, "loss": 0.7914, "step": 54740 }, { "epoch": 0.349782144819391, "grad_norm": 1.4878370761871338, "learning_rate": 9.264644744955863e-05, "loss": 1.0878, "step": 54750 }, { "epoch": 0.3498460319691297, "grad_norm": 0.9964064359664917, "learning_rate": 9.264382786767612e-05, "loss": 1.1167, "step": 54760 }, { "epoch": 0.34990991911886843, "grad_norm": 0.8638894557952881, "learning_rate": 9.264120785633335e-05, "loss": 0.7903, "step": 54770 }, { "epoch": 0.34997380626860713, "grad_norm": 0.7577997446060181, "learning_rate": 9.263858741555674e-05, "loss": 0.86, "step": 54780 }, { "epoch": 0.35003769341834584, "grad_norm": 0.9834237694740295, "learning_rate": 9.263596654537265e-05, "loss": 0.8051, "step": 54790 }, { "epoch": 0.35010158056808455, "grad_norm": 0.9026603102684021, "learning_rate": 9.263334524580751e-05, "loss": 0.9596, "step": 54800 }, { "epoch": 0.35016546771782325, "grad_norm": 1.9557400941848755, "learning_rate": 9.26307235168877e-05, "loss": 0.9289, "step": 54810 }, { "epoch": 0.35022935486756196, "grad_norm": 0.5680462718009949, "learning_rate": 9.262810135863962e-05, "loss": 1.1719, "step": 54820 }, { "epoch": 0.35029324201730067, "grad_norm": 1.077825665473938, "learning_rate": 9.26254787710897e-05, "loss": 0.8691, "step": 54830 }, { "epoch": 0.3503571291670393, "grad_norm": 1.1171085834503174, "learning_rate": 9.262285575426431e-05, "loss": 0.7501, "step": 54840 }, { "epoch": 0.350421016316778, "grad_norm": 0.8400352001190186, "learning_rate": 9.262023230818987e-05, "loss": 0.8568, "step": 54850 }, { "epoch": 0.35048490346651673, "grad_norm": 0.8587310910224915, "learning_rate": 9.261760843289284e-05, "loss": 1.1126, "step": 54860 }, { "epoch": 0.35054879061625543, "grad_norm": 0.979992687702179, "learning_rate": 9.261498412839963e-05, "loss": 0.7968, "step": 54870 }, { "epoch": 0.35061267776599414, "grad_norm": 1.4268198013305664, "learning_rate": 9.261235939473665e-05, "loss": 0.9709, "step": 54880 }, { "epoch": 0.35067656491573285, "grad_norm": 0.8531477451324463, "learning_rate": 9.260973423193036e-05, "loss": 1.1384, "step": 54890 }, { "epoch": 0.35074045206547155, "grad_norm": 0.8192383050918579, "learning_rate": 9.260710864000718e-05, "loss": 0.9567, "step": 54900 }, { "epoch": 0.35080433921521026, "grad_norm": 0.6545119881629944, "learning_rate": 9.260448261899355e-05, "loss": 0.8276, "step": 54910 }, { "epoch": 0.35086822636494897, "grad_norm": 1.2469779253005981, "learning_rate": 9.260185616891592e-05, "loss": 0.9101, "step": 54920 }, { "epoch": 0.35093211351468767, "grad_norm": 1.0227653980255127, "learning_rate": 9.259922928980075e-05, "loss": 1.0185, "step": 54930 }, { "epoch": 0.3509960006644264, "grad_norm": 0.8625701665878296, "learning_rate": 9.259660198167449e-05, "loss": 1.0336, "step": 54940 }, { "epoch": 0.3510598878141651, "grad_norm": 1.399640679359436, "learning_rate": 9.259397424456359e-05, "loss": 0.9261, "step": 54950 }, { "epoch": 0.35112377496390373, "grad_norm": 1.6561399698257446, "learning_rate": 9.259134607849451e-05, "loss": 0.8661, "step": 54960 }, { "epoch": 0.35118766211364244, "grad_norm": 0.7466694712638855, "learning_rate": 9.258871748349375e-05, "loss": 0.8944, "step": 54970 }, { "epoch": 0.35125154926338115, "grad_norm": 2.0133652687072754, "learning_rate": 9.258608845958774e-05, "loss": 0.7284, "step": 54980 }, { "epoch": 0.35131543641311985, "grad_norm": 0.8402307629585266, "learning_rate": 9.258345900680299e-05, "loss": 1.1441, "step": 54990 }, { "epoch": 0.35137932356285856, "grad_norm": 0.6770734190940857, "learning_rate": 9.258082912516597e-05, "loss": 0.9305, "step": 55000 }, { "epoch": 0.35144321071259726, "grad_norm": 1.0294511318206787, "learning_rate": 9.257819881470315e-05, "loss": 0.7655, "step": 55010 }, { "epoch": 0.35150709786233597, "grad_norm": 0.6236374974250793, "learning_rate": 9.257556807544106e-05, "loss": 0.6974, "step": 55020 }, { "epoch": 0.3515709850120747, "grad_norm": 0.7847385406494141, "learning_rate": 9.257293690740614e-05, "loss": 1.0462, "step": 55030 }, { "epoch": 0.3516348721618134, "grad_norm": 0.6366947293281555, "learning_rate": 9.257030531062492e-05, "loss": 0.9091, "step": 55040 }, { "epoch": 0.3516987593115521, "grad_norm": 0.9689487218856812, "learning_rate": 9.25676732851239e-05, "loss": 1.14, "step": 55050 }, { "epoch": 0.3517626464612908, "grad_norm": 0.7967630624771118, "learning_rate": 9.256504083092959e-05, "loss": 0.7999, "step": 55060 }, { "epoch": 0.3518265336110295, "grad_norm": 0.6108505725860596, "learning_rate": 9.256240794806847e-05, "loss": 1.1205, "step": 55070 }, { "epoch": 0.35189042076076815, "grad_norm": 1.0797632932662964, "learning_rate": 9.25597746365671e-05, "loss": 0.8593, "step": 55080 }, { "epoch": 0.35195430791050686, "grad_norm": 0.7324128150939941, "learning_rate": 9.255714089645198e-05, "loss": 0.9101, "step": 55090 }, { "epoch": 0.35201819506024556, "grad_norm": 0.6534935235977173, "learning_rate": 9.255450672774964e-05, "loss": 1.2862, "step": 55100 }, { "epoch": 0.35208208220998427, "grad_norm": 0.7674654722213745, "learning_rate": 9.255187213048658e-05, "loss": 1.0429, "step": 55110 }, { "epoch": 0.352145969359723, "grad_norm": 0.8261142373085022, "learning_rate": 9.254923710468937e-05, "loss": 0.8614, "step": 55120 }, { "epoch": 0.3522098565094617, "grad_norm": 1.2243504524230957, "learning_rate": 9.254660165038453e-05, "loss": 0.9836, "step": 55130 }, { "epoch": 0.3522737436592004, "grad_norm": 0.9247923493385315, "learning_rate": 9.254396576759861e-05, "loss": 0.9118, "step": 55140 }, { "epoch": 0.3523376308089391, "grad_norm": 1.049172043800354, "learning_rate": 9.254132945635814e-05, "loss": 1.1066, "step": 55150 }, { "epoch": 0.3524015179586778, "grad_norm": 0.9203839302062988, "learning_rate": 9.253869271668967e-05, "loss": 1.0225, "step": 55160 }, { "epoch": 0.3524654051084165, "grad_norm": 0.4765165448188782, "learning_rate": 9.253605554861978e-05, "loss": 0.7226, "step": 55170 }, { "epoch": 0.3525292922581552, "grad_norm": 1.0200433731079102, "learning_rate": 9.2533417952175e-05, "loss": 0.9903, "step": 55180 }, { "epoch": 0.3525931794078939, "grad_norm": 1.3597415685653687, "learning_rate": 9.253077992738192e-05, "loss": 0.7764, "step": 55190 }, { "epoch": 0.35265706655763257, "grad_norm": 0.7081646919250488, "learning_rate": 9.252814147426708e-05, "loss": 0.9052, "step": 55200 }, { "epoch": 0.3527209537073713, "grad_norm": 0.5674062967300415, "learning_rate": 9.252550259285707e-05, "loss": 0.8937, "step": 55210 }, { "epoch": 0.35278484085711, "grad_norm": 0.8797856569290161, "learning_rate": 9.252286328317846e-05, "loss": 0.6981, "step": 55220 }, { "epoch": 0.3528487280068487, "grad_norm": 0.6591719388961792, "learning_rate": 9.252022354525783e-05, "loss": 0.7734, "step": 55230 }, { "epoch": 0.3529126151565874, "grad_norm": 0.9455986022949219, "learning_rate": 9.251758337912174e-05, "loss": 0.7539, "step": 55240 }, { "epoch": 0.3529765023063261, "grad_norm": 0.6497638821601868, "learning_rate": 9.251494278479682e-05, "loss": 0.8169, "step": 55250 }, { "epoch": 0.3530403894560648, "grad_norm": 0.9514163136482239, "learning_rate": 9.251230176230965e-05, "loss": 1.2422, "step": 55260 }, { "epoch": 0.3531042766058035, "grad_norm": 1.0354559421539307, "learning_rate": 9.250966031168682e-05, "loss": 0.8663, "step": 55270 }, { "epoch": 0.3531681637555422, "grad_norm": 0.6657097935676575, "learning_rate": 9.250701843295492e-05, "loss": 1.169, "step": 55280 }, { "epoch": 0.3532320509052809, "grad_norm": 0.6656765937805176, "learning_rate": 9.25043761261406e-05, "loss": 0.722, "step": 55290 }, { "epoch": 0.35329593805501963, "grad_norm": 0.7539229989051819, "learning_rate": 9.250173339127042e-05, "loss": 0.8882, "step": 55300 }, { "epoch": 0.35335982520475834, "grad_norm": 2.1997039318084717, "learning_rate": 9.249909022837102e-05, "loss": 0.8417, "step": 55310 }, { "epoch": 0.35342371235449704, "grad_norm": 0.5912847518920898, "learning_rate": 9.249644663746901e-05, "loss": 0.8431, "step": 55320 }, { "epoch": 0.3534875995042357, "grad_norm": 1.1441770792007446, "learning_rate": 9.249380261859103e-05, "loss": 0.6843, "step": 55330 }, { "epoch": 0.3535514866539744, "grad_norm": 1.2015843391418457, "learning_rate": 9.249115817176368e-05, "loss": 0.825, "step": 55340 }, { "epoch": 0.3536153738037131, "grad_norm": 0.9341386556625366, "learning_rate": 9.248851329701362e-05, "loss": 1.0235, "step": 55350 }, { "epoch": 0.3536792609534518, "grad_norm": 0.8819360733032227, "learning_rate": 9.248586799436747e-05, "loss": 0.8604, "step": 55360 }, { "epoch": 0.3537431481031905, "grad_norm": 0.8615573048591614, "learning_rate": 9.248322226385187e-05, "loss": 0.9667, "step": 55370 }, { "epoch": 0.3538070352529292, "grad_norm": 1.185778021812439, "learning_rate": 9.248057610549348e-05, "loss": 1.0003, "step": 55380 }, { "epoch": 0.35387092240266793, "grad_norm": 0.9160196781158447, "learning_rate": 9.247792951931893e-05, "loss": 0.8687, "step": 55390 }, { "epoch": 0.35393480955240664, "grad_norm": 0.6795194745063782, "learning_rate": 9.247528250535487e-05, "loss": 0.7333, "step": 55400 }, { "epoch": 0.35399869670214534, "grad_norm": 0.5489585399627686, "learning_rate": 9.247263506362798e-05, "loss": 0.8638, "step": 55410 }, { "epoch": 0.35406258385188405, "grad_norm": 1.2006055116653442, "learning_rate": 9.246998719416491e-05, "loss": 0.9143, "step": 55420 }, { "epoch": 0.35412647100162276, "grad_norm": 1.0024096965789795, "learning_rate": 9.246733889699233e-05, "loss": 0.9047, "step": 55430 }, { "epoch": 0.35419035815136146, "grad_norm": 0.5763610005378723, "learning_rate": 9.24646901721369e-05, "loss": 0.9754, "step": 55440 }, { "epoch": 0.3542542453011001, "grad_norm": 1.1366212368011475, "learning_rate": 9.24620410196253e-05, "loss": 0.891, "step": 55450 }, { "epoch": 0.3543181324508388, "grad_norm": 1.1361256837844849, "learning_rate": 9.245939143948424e-05, "loss": 1.0441, "step": 55460 }, { "epoch": 0.3543820196005775, "grad_norm": 0.7863855361938477, "learning_rate": 9.245674143174034e-05, "loss": 0.7866, "step": 55470 }, { "epoch": 0.35444590675031623, "grad_norm": 0.8668807744979858, "learning_rate": 9.245409099642033e-05, "loss": 0.9319, "step": 55480 }, { "epoch": 0.35450979390005494, "grad_norm": 0.6587684750556946, "learning_rate": 9.245144013355092e-05, "loss": 0.8019, "step": 55490 }, { "epoch": 0.35457368104979364, "grad_norm": 1.1338073015213013, "learning_rate": 9.244878884315876e-05, "loss": 0.8598, "step": 55500 }, { "epoch": 0.35463756819953235, "grad_norm": 0.4027159512042999, "learning_rate": 9.244613712527057e-05, "loss": 0.7706, "step": 55510 }, { "epoch": 0.35470145534927106, "grad_norm": 1.0326690673828125, "learning_rate": 9.244348497991306e-05, "loss": 0.9883, "step": 55520 }, { "epoch": 0.35476534249900976, "grad_norm": 2.621795415878296, "learning_rate": 9.244083240711297e-05, "loss": 1.087, "step": 55530 }, { "epoch": 0.35482922964874847, "grad_norm": 0.8886315822601318, "learning_rate": 9.243817940689694e-05, "loss": 0.7566, "step": 55540 }, { "epoch": 0.3548931167984872, "grad_norm": 0.7971783876419067, "learning_rate": 9.243552597929174e-05, "loss": 0.7039, "step": 55550 }, { "epoch": 0.3549570039482259, "grad_norm": 0.7734363675117493, "learning_rate": 9.243287212432409e-05, "loss": 0.9843, "step": 55560 }, { "epoch": 0.35502089109796453, "grad_norm": 0.9685491919517517, "learning_rate": 9.24302178420207e-05, "loss": 0.9583, "step": 55570 }, { "epoch": 0.35508477824770324, "grad_norm": 1.15921950340271, "learning_rate": 9.242756313240833e-05, "loss": 0.7942, "step": 55580 }, { "epoch": 0.35514866539744194, "grad_norm": 1.1534897089004517, "learning_rate": 9.242490799551366e-05, "loss": 0.8079, "step": 55590 }, { "epoch": 0.35521255254718065, "grad_norm": 0.9609005451202393, "learning_rate": 9.242225243136348e-05, "loss": 0.9695, "step": 55600 }, { "epoch": 0.35527643969691935, "grad_norm": 0.6478775143623352, "learning_rate": 9.241959643998453e-05, "loss": 0.8381, "step": 55610 }, { "epoch": 0.35534032684665806, "grad_norm": 0.9925094246864319, "learning_rate": 9.241694002140354e-05, "loss": 0.6593, "step": 55620 }, { "epoch": 0.35540421399639677, "grad_norm": 0.9142459630966187, "learning_rate": 9.241428317564725e-05, "loss": 0.934, "step": 55630 }, { "epoch": 0.3554681011461355, "grad_norm": 0.6951974034309387, "learning_rate": 9.241162590274244e-05, "loss": 0.9468, "step": 55640 }, { "epoch": 0.3555319882958742, "grad_norm": 0.8623539209365845, "learning_rate": 9.240896820271588e-05, "loss": 0.8084, "step": 55650 }, { "epoch": 0.3555958754456129, "grad_norm": 0.7138127684593201, "learning_rate": 9.240631007559432e-05, "loss": 0.8162, "step": 55660 }, { "epoch": 0.3556597625953516, "grad_norm": 0.8145920634269714, "learning_rate": 9.240365152140451e-05, "loss": 1.0244, "step": 55670 }, { "epoch": 0.3557236497450903, "grad_norm": 0.9237201809883118, "learning_rate": 9.240099254017327e-05, "loss": 0.8636, "step": 55680 }, { "epoch": 0.35578753689482895, "grad_norm": 0.9301193356513977, "learning_rate": 9.239833313192734e-05, "loss": 1.1658, "step": 55690 }, { "epoch": 0.35585142404456765, "grad_norm": 0.6827517151832581, "learning_rate": 9.239567329669352e-05, "loss": 1.1023, "step": 55700 }, { "epoch": 0.35591531119430636, "grad_norm": 1.0909185409545898, "learning_rate": 9.239301303449859e-05, "loss": 0.8033, "step": 55710 }, { "epoch": 0.35597919834404507, "grad_norm": 0.4835173189640045, "learning_rate": 9.239035234536934e-05, "loss": 0.8785, "step": 55720 }, { "epoch": 0.3560430854937838, "grad_norm": 0.862131655216217, "learning_rate": 9.238769122933257e-05, "loss": 1.0392, "step": 55730 }, { "epoch": 0.3561069726435225, "grad_norm": 1.5188207626342773, "learning_rate": 9.238502968641509e-05, "loss": 1.1016, "step": 55740 }, { "epoch": 0.3561708597932612, "grad_norm": 0.6719252467155457, "learning_rate": 9.238236771664369e-05, "loss": 1.0367, "step": 55750 }, { "epoch": 0.3562347469429999, "grad_norm": 0.8751115798950195, "learning_rate": 9.237970532004516e-05, "loss": 0.9716, "step": 55760 }, { "epoch": 0.3562986340927386, "grad_norm": 0.8691346049308777, "learning_rate": 9.237704249664637e-05, "loss": 0.8428, "step": 55770 }, { "epoch": 0.3563625212424773, "grad_norm": 0.7232783436775208, "learning_rate": 9.237437924647408e-05, "loss": 0.7021, "step": 55780 }, { "epoch": 0.356426408392216, "grad_norm": 1.281238317489624, "learning_rate": 9.237171556955513e-05, "loss": 0.9095, "step": 55790 }, { "epoch": 0.3564902955419547, "grad_norm": 1.1289631128311157, "learning_rate": 9.236905146591635e-05, "loss": 0.9427, "step": 55800 }, { "epoch": 0.35655418269169337, "grad_norm": 0.8392340540885925, "learning_rate": 9.236638693558456e-05, "loss": 0.7125, "step": 55810 }, { "epoch": 0.3566180698414321, "grad_norm": 1.3441346883773804, "learning_rate": 9.23637219785866e-05, "loss": 0.8185, "step": 55820 }, { "epoch": 0.3566819569911708, "grad_norm": 0.7084068059921265, "learning_rate": 9.236105659494933e-05, "loss": 0.8048, "step": 55830 }, { "epoch": 0.3567458441409095, "grad_norm": 0.8866279125213623, "learning_rate": 9.235839078469956e-05, "loss": 1.0885, "step": 55840 }, { "epoch": 0.3568097312906482, "grad_norm": 0.9575055837631226, "learning_rate": 9.235572454786414e-05, "loss": 0.8621, "step": 55850 }, { "epoch": 0.3568736184403869, "grad_norm": 0.7449828386306763, "learning_rate": 9.235305788446995e-05, "loss": 0.902, "step": 55860 }, { "epoch": 0.3569375055901256, "grad_norm": 0.5956260561943054, "learning_rate": 9.235039079454382e-05, "loss": 0.9419, "step": 55870 }, { "epoch": 0.3570013927398643, "grad_norm": 0.7238242030143738, "learning_rate": 9.23477232781126e-05, "loss": 1.07, "step": 55880 }, { "epoch": 0.357065279889603, "grad_norm": 1.0870457887649536, "learning_rate": 9.234505533520319e-05, "loss": 0.9432, "step": 55890 }, { "epoch": 0.3571291670393417, "grad_norm": 0.9857404232025146, "learning_rate": 9.234238696584244e-05, "loss": 1.1723, "step": 55900 }, { "epoch": 0.35719305418908043, "grad_norm": 0.9271001815795898, "learning_rate": 9.233971817005722e-05, "loss": 0.9523, "step": 55910 }, { "epoch": 0.35725694133881913, "grad_norm": 1.4764975309371948, "learning_rate": 9.23370489478744e-05, "loss": 1.0909, "step": 55920 }, { "epoch": 0.3573208284885578, "grad_norm": 1.0458935499191284, "learning_rate": 9.233437929932087e-05, "loss": 0.8501, "step": 55930 }, { "epoch": 0.3573847156382965, "grad_norm": 0.8124297857284546, "learning_rate": 9.233170922442353e-05, "loss": 1.0442, "step": 55940 }, { "epoch": 0.3574486027880352, "grad_norm": 1.1618013381958008, "learning_rate": 9.232903872320924e-05, "loss": 1.0649, "step": 55950 }, { "epoch": 0.3575124899377739, "grad_norm": 0.8419058918952942, "learning_rate": 9.232636779570491e-05, "loss": 0.8909, "step": 55960 }, { "epoch": 0.3575763770875126, "grad_norm": 0.9706215262413025, "learning_rate": 9.232369644193746e-05, "loss": 0.9632, "step": 55970 }, { "epoch": 0.3576402642372513, "grad_norm": 1.0121309757232666, "learning_rate": 9.232102466193375e-05, "loss": 0.7683, "step": 55980 }, { "epoch": 0.35770415138699, "grad_norm": 0.8496699929237366, "learning_rate": 9.231835245572072e-05, "loss": 0.7359, "step": 55990 }, { "epoch": 0.3577680385367287, "grad_norm": 0.6330521702766418, "learning_rate": 9.231567982332528e-05, "loss": 0.7402, "step": 56000 }, { "epoch": 0.35783192568646743, "grad_norm": 0.8351934552192688, "learning_rate": 9.23130067647743e-05, "loss": 0.8587, "step": 56010 }, { "epoch": 0.35789581283620614, "grad_norm": 0.8029147386550903, "learning_rate": 9.231033328009477e-05, "loss": 0.7748, "step": 56020 }, { "epoch": 0.35795969998594485, "grad_norm": 0.6627703905105591, "learning_rate": 9.230765936931355e-05, "loss": 0.8785, "step": 56030 }, { "epoch": 0.35802358713568355, "grad_norm": 1.2232366800308228, "learning_rate": 9.230498503245764e-05, "loss": 0.8073, "step": 56040 }, { "epoch": 0.3580874742854222, "grad_norm": 0.783330500125885, "learning_rate": 9.23023102695539e-05, "loss": 1.0156, "step": 56050 }, { "epoch": 0.3581513614351609, "grad_norm": 0.8045901656150818, "learning_rate": 9.229963508062931e-05, "loss": 0.9699, "step": 56060 }, { "epoch": 0.3582152485848996, "grad_norm": 1.1084731817245483, "learning_rate": 9.229695946571079e-05, "loss": 1.0626, "step": 56070 }, { "epoch": 0.3582791357346383, "grad_norm": 0.9448803067207336, "learning_rate": 9.229428342482531e-05, "loss": 0.873, "step": 56080 }, { "epoch": 0.358343022884377, "grad_norm": 1.1229417324066162, "learning_rate": 9.229160695799981e-05, "loss": 0.9604, "step": 56090 }, { "epoch": 0.35840691003411573, "grad_norm": 0.8668258190155029, "learning_rate": 9.228893006526122e-05, "loss": 1.0267, "step": 56100 }, { "epoch": 0.35847079718385444, "grad_norm": 1.0289602279663086, "learning_rate": 9.228625274663653e-05, "loss": 0.8669, "step": 56110 }, { "epoch": 0.35853468433359315, "grad_norm": 0.9492294192314148, "learning_rate": 9.22835750021527e-05, "loss": 0.8236, "step": 56120 }, { "epoch": 0.35859857148333185, "grad_norm": 0.9482596516609192, "learning_rate": 9.228116466802996e-05, "loss": 0.9974, "step": 56130 }, { "epoch": 0.35866245863307056, "grad_norm": 0.8407812118530273, "learning_rate": 9.227848611448803e-05, "loss": 0.8215, "step": 56140 }, { "epoch": 0.35872634578280926, "grad_norm": 0.8675394654273987, "learning_rate": 9.227580713516519e-05, "loss": 1.0991, "step": 56150 }, { "epoch": 0.35879023293254797, "grad_norm": 0.9417056441307068, "learning_rate": 9.227312773008838e-05, "loss": 1.1657, "step": 56160 }, { "epoch": 0.3588541200822867, "grad_norm": 0.6891525983810425, "learning_rate": 9.22704478992846e-05, "loss": 0.8083, "step": 56170 }, { "epoch": 0.3589180072320253, "grad_norm": 0.8754307627677917, "learning_rate": 9.226776764278087e-05, "loss": 0.8525, "step": 56180 }, { "epoch": 0.35898189438176403, "grad_norm": 0.7058795094490051, "learning_rate": 9.226508696060412e-05, "loss": 0.9577, "step": 56190 }, { "epoch": 0.35904578153150274, "grad_norm": 0.7938945889472961, "learning_rate": 9.22624058527814e-05, "loss": 0.9518, "step": 56200 }, { "epoch": 0.35910966868124145, "grad_norm": 1.1744211912155151, "learning_rate": 9.225972431933968e-05, "loss": 0.9626, "step": 56210 }, { "epoch": 0.35917355583098015, "grad_norm": 0.6415241360664368, "learning_rate": 9.225704236030597e-05, "loss": 0.8535, "step": 56220 }, { "epoch": 0.35923744298071886, "grad_norm": 0.8061168789863586, "learning_rate": 9.225435997570731e-05, "loss": 0.9465, "step": 56230 }, { "epoch": 0.35930133013045756, "grad_norm": 1.0841360092163086, "learning_rate": 9.225167716557066e-05, "loss": 0.8539, "step": 56240 }, { "epoch": 0.35936521728019627, "grad_norm": 0.8104168772697449, "learning_rate": 9.22489939299231e-05, "loss": 0.9817, "step": 56250 }, { "epoch": 0.359429104429935, "grad_norm": 0.5234248042106628, "learning_rate": 9.22463102687916e-05, "loss": 0.9769, "step": 56260 }, { "epoch": 0.3594929915796737, "grad_norm": 0.9442692995071411, "learning_rate": 9.224362618220321e-05, "loss": 0.7631, "step": 56270 }, { "epoch": 0.3595568787294124, "grad_norm": 0.7581874132156372, "learning_rate": 9.224094167018496e-05, "loss": 0.9655, "step": 56280 }, { "epoch": 0.3596207658791511, "grad_norm": 0.9377095699310303, "learning_rate": 9.223825673276387e-05, "loss": 0.8839, "step": 56290 }, { "epoch": 0.35968465302888974, "grad_norm": 1.3251482248306274, "learning_rate": 9.2235571369967e-05, "loss": 1.07, "step": 56300 }, { "epoch": 0.35974854017862845, "grad_norm": 0.6887118220329285, "learning_rate": 9.223288558182141e-05, "loss": 0.8927, "step": 56310 }, { "epoch": 0.35981242732836716, "grad_norm": 1.181915044784546, "learning_rate": 9.22301993683541e-05, "loss": 0.8667, "step": 56320 }, { "epoch": 0.35987631447810586, "grad_norm": 0.9099196195602417, "learning_rate": 9.222751272959216e-05, "loss": 0.853, "step": 56330 }, { "epoch": 0.35994020162784457, "grad_norm": 0.717306911945343, "learning_rate": 9.222482566556263e-05, "loss": 0.6981, "step": 56340 }, { "epoch": 0.3600040887775833, "grad_norm": 0.9583873748779297, "learning_rate": 9.222213817629258e-05, "loss": 0.9945, "step": 56350 }, { "epoch": 0.360067975927322, "grad_norm": 0.770458459854126, "learning_rate": 9.221945026180907e-05, "loss": 1.0296, "step": 56360 }, { "epoch": 0.3601318630770607, "grad_norm": 1.2659205198287964, "learning_rate": 9.221676192213918e-05, "loss": 1.0096, "step": 56370 }, { "epoch": 0.3601957502267994, "grad_norm": 1.0208081007003784, "learning_rate": 9.221407315730997e-05, "loss": 1.0415, "step": 56380 }, { "epoch": 0.3602596373765381, "grad_norm": 0.8561046719551086, "learning_rate": 9.22113839673485e-05, "loss": 1.052, "step": 56390 }, { "epoch": 0.3603235245262768, "grad_norm": 0.8812417984008789, "learning_rate": 9.22086943522819e-05, "loss": 0.9035, "step": 56400 }, { "epoch": 0.3603874116760155, "grad_norm": 0.9357554316520691, "learning_rate": 9.220600431213721e-05, "loss": 1.052, "step": 56410 }, { "epoch": 0.36045129882575416, "grad_norm": 0.5159763693809509, "learning_rate": 9.220331384694157e-05, "loss": 0.857, "step": 56420 }, { "epoch": 0.36051518597549287, "grad_norm": 0.6961538791656494, "learning_rate": 9.220062295672203e-05, "loss": 0.7773, "step": 56430 }, { "epoch": 0.3605790731252316, "grad_norm": 0.8022356033325195, "learning_rate": 9.219793164150572e-05, "loss": 0.9277, "step": 56440 }, { "epoch": 0.3606429602749703, "grad_norm": 0.7829380631446838, "learning_rate": 9.219523990131972e-05, "loss": 1.2579, "step": 56450 }, { "epoch": 0.360706847424709, "grad_norm": 0.705920934677124, "learning_rate": 9.219254773619118e-05, "loss": 0.7642, "step": 56460 }, { "epoch": 0.3607707345744477, "grad_norm": 0.6244139075279236, "learning_rate": 9.218985514614715e-05, "loss": 0.8506, "step": 56470 }, { "epoch": 0.3608346217241864, "grad_norm": 0.9292709231376648, "learning_rate": 9.218716213121479e-05, "loss": 0.8007, "step": 56480 }, { "epoch": 0.3608985088739251, "grad_norm": 1.1093422174453735, "learning_rate": 9.218446869142121e-05, "loss": 0.948, "step": 56490 }, { "epoch": 0.3609623960236638, "grad_norm": 0.9102591872215271, "learning_rate": 9.218177482679354e-05, "loss": 0.9274, "step": 56500 }, { "epoch": 0.3610262831734025, "grad_norm": 0.8324138522148132, "learning_rate": 9.217908053735889e-05, "loss": 0.7481, "step": 56510 }, { "epoch": 0.3610901703231412, "grad_norm": 0.6961225867271423, "learning_rate": 9.217638582314442e-05, "loss": 0.9775, "step": 56520 }, { "epoch": 0.36115405747287993, "grad_norm": 0.9226143956184387, "learning_rate": 9.217369068417726e-05, "loss": 0.849, "step": 56530 }, { "epoch": 0.3612179446226186, "grad_norm": 0.7031887769699097, "learning_rate": 9.217099512048454e-05, "loss": 0.8807, "step": 56540 }, { "epoch": 0.3612818317723573, "grad_norm": 1.840198278427124, "learning_rate": 9.216829913209342e-05, "loss": 0.8067, "step": 56550 }, { "epoch": 0.361345718922096, "grad_norm": 0.6737743020057678, "learning_rate": 9.216560271903105e-05, "loss": 0.992, "step": 56560 }, { "epoch": 0.3614096060718347, "grad_norm": 1.441496729850769, "learning_rate": 9.216290588132457e-05, "loss": 0.9385, "step": 56570 }, { "epoch": 0.3614734932215734, "grad_norm": 0.6251863837242126, "learning_rate": 9.216020861900117e-05, "loss": 0.6162, "step": 56580 }, { "epoch": 0.3615373803713121, "grad_norm": 1.0875921249389648, "learning_rate": 9.215751093208798e-05, "loss": 0.631, "step": 56590 }, { "epoch": 0.3616012675210508, "grad_norm": 0.7786641716957092, "learning_rate": 9.215481282061221e-05, "loss": 1.1934, "step": 56600 }, { "epoch": 0.3616651546707895, "grad_norm": 0.6881313323974609, "learning_rate": 9.215211428460098e-05, "loss": 1.0602, "step": 56610 }, { "epoch": 0.36172904182052823, "grad_norm": 1.173574686050415, "learning_rate": 9.21494153240815e-05, "loss": 0.8231, "step": 56620 }, { "epoch": 0.36179292897026694, "grad_norm": 0.581283450126648, "learning_rate": 9.214671593908092e-05, "loss": 0.6751, "step": 56630 }, { "epoch": 0.36185681612000564, "grad_norm": 0.7389190196990967, "learning_rate": 9.214401612962649e-05, "loss": 0.7668, "step": 56640 }, { "epoch": 0.36192070326974435, "grad_norm": 0.6907786130905151, "learning_rate": 9.214131589574534e-05, "loss": 0.8037, "step": 56650 }, { "epoch": 0.361984590419483, "grad_norm": 0.8607721328735352, "learning_rate": 9.213861523746467e-05, "loss": 0.8867, "step": 56660 }, { "epoch": 0.3620484775692217, "grad_norm": 1.4309948682785034, "learning_rate": 9.213591415481172e-05, "loss": 0.9099, "step": 56670 }, { "epoch": 0.3621123647189604, "grad_norm": 0.4009925425052643, "learning_rate": 9.213321264781363e-05, "loss": 0.6807, "step": 56680 }, { "epoch": 0.3621762518686991, "grad_norm": 1.2572836875915527, "learning_rate": 9.213051071649766e-05, "loss": 0.9303, "step": 56690 }, { "epoch": 0.3622401390184378, "grad_norm": 0.9721736311912537, "learning_rate": 9.212780836089098e-05, "loss": 0.8034, "step": 56700 }, { "epoch": 0.36230402616817653, "grad_norm": 0.7228071093559265, "learning_rate": 9.212510558102083e-05, "loss": 1.0872, "step": 56710 }, { "epoch": 0.36236791331791524, "grad_norm": 1.043264389038086, "learning_rate": 9.212240237691443e-05, "loss": 0.8663, "step": 56720 }, { "epoch": 0.36243180046765394, "grad_norm": 0.8477384448051453, "learning_rate": 9.211969874859898e-05, "loss": 1.0247, "step": 56730 }, { "epoch": 0.36249568761739265, "grad_norm": 1.0123629570007324, "learning_rate": 9.211699469610174e-05, "loss": 0.886, "step": 56740 }, { "epoch": 0.36255957476713135, "grad_norm": 1.2255103588104248, "learning_rate": 9.211429021944993e-05, "loss": 0.8577, "step": 56750 }, { "epoch": 0.36262346191687006, "grad_norm": 1.0467153787612915, "learning_rate": 9.211158531867078e-05, "loss": 1.0881, "step": 56760 }, { "epoch": 0.36268734906660877, "grad_norm": 0.876595139503479, "learning_rate": 9.210887999379153e-05, "loss": 1.1139, "step": 56770 }, { "epoch": 0.3627512362163474, "grad_norm": 0.9058437943458557, "learning_rate": 9.210617424483943e-05, "loss": 1.2917, "step": 56780 }, { "epoch": 0.3628151233660861, "grad_norm": 0.755662202835083, "learning_rate": 9.210346807184174e-05, "loss": 0.899, "step": 56790 }, { "epoch": 0.36287901051582483, "grad_norm": 0.5830435156822205, "learning_rate": 9.210076147482567e-05, "loss": 0.748, "step": 56800 }, { "epoch": 0.36294289766556354, "grad_norm": 0.7086238861083984, "learning_rate": 9.209805445381854e-05, "loss": 1.1404, "step": 56810 }, { "epoch": 0.36300678481530224, "grad_norm": 0.8161737322807312, "learning_rate": 9.209534700884758e-05, "loss": 0.6793, "step": 56820 }, { "epoch": 0.36307067196504095, "grad_norm": 0.9982635974884033, "learning_rate": 9.209263913994004e-05, "loss": 0.865, "step": 56830 }, { "epoch": 0.36313455911477965, "grad_norm": 0.8346578478813171, "learning_rate": 9.208993084712322e-05, "loss": 1.001, "step": 56840 }, { "epoch": 0.36319844626451836, "grad_norm": 0.7267649173736572, "learning_rate": 9.20872221304244e-05, "loss": 0.6267, "step": 56850 }, { "epoch": 0.36326233341425707, "grad_norm": 0.8349143266677856, "learning_rate": 9.208451298987082e-05, "loss": 0.9017, "step": 56860 }, { "epoch": 0.36332622056399577, "grad_norm": 4.069967269897461, "learning_rate": 9.20818034254898e-05, "loss": 0.9907, "step": 56870 }, { "epoch": 0.3633901077137345, "grad_norm": 1.0109018087387085, "learning_rate": 9.20790934373086e-05, "loss": 0.7675, "step": 56880 }, { "epoch": 0.3634539948634732, "grad_norm": 0.7021664977073669, "learning_rate": 9.207638302535452e-05, "loss": 0.8808, "step": 56890 }, { "epoch": 0.36351788201321183, "grad_norm": 1.4066033363342285, "learning_rate": 9.207367218965487e-05, "loss": 1.1123, "step": 56900 }, { "epoch": 0.36358176916295054, "grad_norm": 0.5992746353149414, "learning_rate": 9.207096093023694e-05, "loss": 0.7128, "step": 56910 }, { "epoch": 0.36364565631268925, "grad_norm": 0.9940372109413147, "learning_rate": 9.206824924712805e-05, "loss": 1.0003, "step": 56920 }, { "epoch": 0.36370954346242795, "grad_norm": 0.7933813333511353, "learning_rate": 9.206553714035549e-05, "loss": 0.9643, "step": 56930 }, { "epoch": 0.36377343061216666, "grad_norm": 0.7373024225234985, "learning_rate": 9.206282460994657e-05, "loss": 0.8773, "step": 56940 }, { "epoch": 0.36383731776190537, "grad_norm": 1.27448570728302, "learning_rate": 9.206011165592863e-05, "loss": 0.909, "step": 56950 }, { "epoch": 0.36390120491164407, "grad_norm": 0.7734085917472839, "learning_rate": 9.205739827832895e-05, "loss": 0.9389, "step": 56960 }, { "epoch": 0.3639650920613828, "grad_norm": 0.5840640068054199, "learning_rate": 9.205468447717491e-05, "loss": 1.01, "step": 56970 }, { "epoch": 0.3640289792111215, "grad_norm": 1.308883547782898, "learning_rate": 9.205197025249382e-05, "loss": 0.8717, "step": 56980 }, { "epoch": 0.3640928663608602, "grad_norm": 0.5307298898696899, "learning_rate": 9.2049255604313e-05, "loss": 0.998, "step": 56990 }, { "epoch": 0.3641567535105989, "grad_norm": 0.6630975604057312, "learning_rate": 9.20465405326598e-05, "loss": 0.9668, "step": 57000 }, { "epoch": 0.3642206406603376, "grad_norm": 0.6394637823104858, "learning_rate": 9.204382503756154e-05, "loss": 0.8324, "step": 57010 }, { "epoch": 0.3642845278100763, "grad_norm": 1.0292775630950928, "learning_rate": 9.204110911904562e-05, "loss": 0.8907, "step": 57020 }, { "epoch": 0.36434841495981496, "grad_norm": 1.187157392501831, "learning_rate": 9.203839277713935e-05, "loss": 1.0058, "step": 57030 }, { "epoch": 0.36441230210955367, "grad_norm": 1.1334859132766724, "learning_rate": 9.20356760118701e-05, "loss": 0.9049, "step": 57040 }, { "epoch": 0.36447618925929237, "grad_norm": 1.5810905694961548, "learning_rate": 9.203295882326521e-05, "loss": 0.8885, "step": 57050 }, { "epoch": 0.3645400764090311, "grad_norm": 0.981046736240387, "learning_rate": 9.203024121135209e-05, "loss": 0.8166, "step": 57060 }, { "epoch": 0.3646039635587698, "grad_norm": 0.9694949388504028, "learning_rate": 9.202752317615805e-05, "loss": 0.7219, "step": 57070 }, { "epoch": 0.3646678507085085, "grad_norm": 1.3031917810440063, "learning_rate": 9.202480471771052e-05, "loss": 0.9798, "step": 57080 }, { "epoch": 0.3647317378582472, "grad_norm": 0.9253140687942505, "learning_rate": 9.202208583603683e-05, "loss": 0.7253, "step": 57090 }, { "epoch": 0.3647956250079859, "grad_norm": 1.0539807081222534, "learning_rate": 9.201936653116439e-05, "loss": 0.8563, "step": 57100 }, { "epoch": 0.3648595121577246, "grad_norm": 0.8437415361404419, "learning_rate": 9.201664680312057e-05, "loss": 0.955, "step": 57110 }, { "epoch": 0.3649233993074633, "grad_norm": 0.7053326368331909, "learning_rate": 9.201392665193276e-05, "loss": 0.8577, "step": 57120 }, { "epoch": 0.364987286457202, "grad_norm": 0.5430055856704712, "learning_rate": 9.201120607762837e-05, "loss": 0.8196, "step": 57130 }, { "epoch": 0.3650511736069407, "grad_norm": 0.6964545845985413, "learning_rate": 9.20084850802348e-05, "loss": 0.8877, "step": 57140 }, { "epoch": 0.3651150607566794, "grad_norm": 2.833962917327881, "learning_rate": 9.200576365977943e-05, "loss": 1.1258, "step": 57150 }, { "epoch": 0.3651789479064181, "grad_norm": 0.9289480447769165, "learning_rate": 9.200304181628968e-05, "loss": 0.8065, "step": 57160 }, { "epoch": 0.3652428350561568, "grad_norm": 0.6666757464408875, "learning_rate": 9.200031954979297e-05, "loss": 0.876, "step": 57170 }, { "epoch": 0.3653067222058955, "grad_norm": 0.9867071509361267, "learning_rate": 9.19975968603167e-05, "loss": 0.9151, "step": 57180 }, { "epoch": 0.3653706093556342, "grad_norm": 0.6142376661300659, "learning_rate": 9.19948737478883e-05, "loss": 0.7852, "step": 57190 }, { "epoch": 0.3654344965053729, "grad_norm": 1.6434037685394287, "learning_rate": 9.199215021253518e-05, "loss": 0.8127, "step": 57200 }, { "epoch": 0.3654983836551116, "grad_norm": 1.2232186794281006, "learning_rate": 9.198942625428479e-05, "loss": 0.9223, "step": 57210 }, { "epoch": 0.3655622708048503, "grad_norm": 1.112564206123352, "learning_rate": 9.198670187316456e-05, "loss": 1.0382, "step": 57220 }, { "epoch": 0.365626157954589, "grad_norm": 0.8125051259994507, "learning_rate": 9.19839770692019e-05, "loss": 0.8196, "step": 57230 }, { "epoch": 0.36569004510432773, "grad_norm": 3.3364717960357666, "learning_rate": 9.198125184242427e-05, "loss": 1.0401, "step": 57240 }, { "epoch": 0.36575393225406644, "grad_norm": 0.8038178086280823, "learning_rate": 9.197852619285913e-05, "loss": 1.2333, "step": 57250 }, { "epoch": 0.36581781940380514, "grad_norm": 0.9946600198745728, "learning_rate": 9.19758001205339e-05, "loss": 0.8022, "step": 57260 }, { "epoch": 0.3658817065535438, "grad_norm": 2.188892126083374, "learning_rate": 9.197307362547607e-05, "loss": 0.8886, "step": 57270 }, { "epoch": 0.3659455937032825, "grad_norm": 0.699150025844574, "learning_rate": 9.197034670771306e-05, "loss": 0.8193, "step": 57280 }, { "epoch": 0.3660094808530212, "grad_norm": 0.9230877757072449, "learning_rate": 9.196761936727235e-05, "loss": 0.9072, "step": 57290 }, { "epoch": 0.3660733680027599, "grad_norm": 1.229096531867981, "learning_rate": 9.19648916041814e-05, "loss": 0.883, "step": 57300 }, { "epoch": 0.3661372551524986, "grad_norm": 0.6960686445236206, "learning_rate": 9.196216341846771e-05, "loss": 1.1022, "step": 57310 }, { "epoch": 0.3662011423022373, "grad_norm": 0.7202252745628357, "learning_rate": 9.195943481015872e-05, "loss": 0.9708, "step": 57320 }, { "epoch": 0.36626502945197603, "grad_norm": 0.6151859760284424, "learning_rate": 9.19567057792819e-05, "loss": 1.1355, "step": 57330 }, { "epoch": 0.36632891660171474, "grad_norm": 0.6116877198219299, "learning_rate": 9.195397632586478e-05, "loss": 0.7314, "step": 57340 }, { "epoch": 0.36639280375145344, "grad_norm": 1.559106707572937, "learning_rate": 9.195124644993483e-05, "loss": 0.9246, "step": 57350 }, { "epoch": 0.36645669090119215, "grad_norm": 0.8441659808158875, "learning_rate": 9.194851615151951e-05, "loss": 0.8061, "step": 57360 }, { "epoch": 0.36652057805093086, "grad_norm": 0.8084182143211365, "learning_rate": 9.194578543064635e-05, "loss": 1.0054, "step": 57370 }, { "epoch": 0.36658446520066956, "grad_norm": 0.9725624918937683, "learning_rate": 9.194305428734285e-05, "loss": 0.9369, "step": 57380 }, { "epoch": 0.3666483523504082, "grad_norm": 0.7019644975662231, "learning_rate": 9.19403227216365e-05, "loss": 1.0316, "step": 57390 }, { "epoch": 0.3667122395001469, "grad_norm": 0.84947669506073, "learning_rate": 9.193759073355482e-05, "loss": 1.0048, "step": 57400 }, { "epoch": 0.3667761266498856, "grad_norm": 0.8308162689208984, "learning_rate": 9.193485832312532e-05, "loss": 1.2343, "step": 57410 }, { "epoch": 0.36684001379962433, "grad_norm": 1.0776206254959106, "learning_rate": 9.193212549037551e-05, "loss": 1.2088, "step": 57420 }, { "epoch": 0.36690390094936304, "grad_norm": 1.5936800241470337, "learning_rate": 9.192939223533292e-05, "loss": 1.0616, "step": 57430 }, { "epoch": 0.36696778809910174, "grad_norm": 0.9751461148262024, "learning_rate": 9.192665855802509e-05, "loss": 0.9944, "step": 57440 }, { "epoch": 0.36703167524884045, "grad_norm": 0.893570601940155, "learning_rate": 9.192392445847953e-05, "loss": 0.6853, "step": 57450 }, { "epoch": 0.36709556239857916, "grad_norm": 1.0084480047225952, "learning_rate": 9.192118993672378e-05, "loss": 1.1445, "step": 57460 }, { "epoch": 0.36715944954831786, "grad_norm": 0.9631890654563904, "learning_rate": 9.191845499278539e-05, "loss": 0.9025, "step": 57470 }, { "epoch": 0.36722333669805657, "grad_norm": 1.809931993484497, "learning_rate": 9.191571962669187e-05, "loss": 0.8336, "step": 57480 }, { "epoch": 0.3672872238477953, "grad_norm": 1.0318517684936523, "learning_rate": 9.191298383847083e-05, "loss": 0.9237, "step": 57490 }, { "epoch": 0.367351110997534, "grad_norm": 1.242576003074646, "learning_rate": 9.191024762814975e-05, "loss": 0.9736, "step": 57500 }, { "epoch": 0.36741499814727263, "grad_norm": 0.8778398036956787, "learning_rate": 9.190751099575623e-05, "loss": 0.7765, "step": 57510 }, { "epoch": 0.36747888529701134, "grad_norm": 1.108216643333435, "learning_rate": 9.19047739413178e-05, "loss": 1.0051, "step": 57520 }, { "epoch": 0.36754277244675004, "grad_norm": 0.9173517227172852, "learning_rate": 9.190203646486206e-05, "loss": 0.9958, "step": 57530 }, { "epoch": 0.36760665959648875, "grad_norm": 0.8545486330986023, "learning_rate": 9.189929856641657e-05, "loss": 0.8174, "step": 57540 }, { "epoch": 0.36767054674622746, "grad_norm": 0.8391945362091064, "learning_rate": 9.18965602460089e-05, "loss": 0.9733, "step": 57550 }, { "epoch": 0.36773443389596616, "grad_norm": 0.6733419895172119, "learning_rate": 9.189382150366662e-05, "loss": 0.8057, "step": 57560 }, { "epoch": 0.36779832104570487, "grad_norm": 2.302520513534546, "learning_rate": 9.189108233941729e-05, "loss": 0.9927, "step": 57570 }, { "epoch": 0.3678622081954436, "grad_norm": 0.8115237355232239, "learning_rate": 9.188834275328853e-05, "loss": 0.9236, "step": 57580 }, { "epoch": 0.3679260953451823, "grad_norm": 1.0810778141021729, "learning_rate": 9.188560274530793e-05, "loss": 1.1711, "step": 57590 }, { "epoch": 0.367989982494921, "grad_norm": 0.9964002966880798, "learning_rate": 9.188286231550307e-05, "loss": 1.013, "step": 57600 }, { "epoch": 0.3680538696446597, "grad_norm": 1.2044520378112793, "learning_rate": 9.188012146390155e-05, "loss": 0.798, "step": 57610 }, { "epoch": 0.3681177567943984, "grad_norm": 2.0826616287231445, "learning_rate": 9.187738019053098e-05, "loss": 0.7468, "step": 57620 }, { "epoch": 0.36818164394413705, "grad_norm": 0.8267672657966614, "learning_rate": 9.187463849541895e-05, "loss": 0.911, "step": 57630 }, { "epoch": 0.36824553109387576, "grad_norm": 0.7479827404022217, "learning_rate": 9.18718963785931e-05, "loss": 0.7645, "step": 57640 }, { "epoch": 0.36830941824361446, "grad_norm": 0.7282874584197998, "learning_rate": 9.186915384008103e-05, "loss": 1.1665, "step": 57650 }, { "epoch": 0.36837330539335317, "grad_norm": 0.6607038974761963, "learning_rate": 9.186641087991034e-05, "loss": 0.9505, "step": 57660 }, { "epoch": 0.3684371925430919, "grad_norm": 0.7457965612411499, "learning_rate": 9.186366749810869e-05, "loss": 0.7571, "step": 57670 }, { "epoch": 0.3685010796928306, "grad_norm": 0.9892933368682861, "learning_rate": 9.186092369470368e-05, "loss": 0.8002, "step": 57680 }, { "epoch": 0.3685649668425693, "grad_norm": 1.134438395500183, "learning_rate": 9.185817946972296e-05, "loss": 0.9039, "step": 57690 }, { "epoch": 0.368628853992308, "grad_norm": 0.7737533450126648, "learning_rate": 9.185543482319417e-05, "loss": 0.8303, "step": 57700 }, { "epoch": 0.3686927411420467, "grad_norm": 2.6117820739746094, "learning_rate": 9.185268975514491e-05, "loss": 0.9566, "step": 57710 }, { "epoch": 0.3687566282917854, "grad_norm": 1.5219961404800415, "learning_rate": 9.184994426560289e-05, "loss": 0.976, "step": 57720 }, { "epoch": 0.3688205154415241, "grad_norm": 0.796924889087677, "learning_rate": 9.18471983545957e-05, "loss": 0.8359, "step": 57730 }, { "epoch": 0.3688844025912628, "grad_norm": 0.5662297606468201, "learning_rate": 9.184445202215104e-05, "loss": 0.9518, "step": 57740 }, { "epoch": 0.36894828974100147, "grad_norm": 1.4038177728652954, "learning_rate": 9.184170526829654e-05, "loss": 0.8367, "step": 57750 }, { "epoch": 0.3690121768907402, "grad_norm": 1.051730990409851, "learning_rate": 9.183895809305987e-05, "loss": 0.8319, "step": 57760 }, { "epoch": 0.3690760640404789, "grad_norm": 0.6114339232444763, "learning_rate": 9.183621049646869e-05, "loss": 0.8821, "step": 57770 }, { "epoch": 0.3691399511902176, "grad_norm": 0.7710915803909302, "learning_rate": 9.18334624785507e-05, "loss": 0.7208, "step": 57780 }, { "epoch": 0.3692038383399563, "grad_norm": 1.5859193801879883, "learning_rate": 9.183071403933353e-05, "loss": 0.8121, "step": 57790 }, { "epoch": 0.369267725489695, "grad_norm": 0.8052014708518982, "learning_rate": 9.182796517884487e-05, "loss": 0.9727, "step": 57800 }, { "epoch": 0.3693316126394337, "grad_norm": 0.7919948101043701, "learning_rate": 9.182521589711244e-05, "loss": 1.0669, "step": 57810 }, { "epoch": 0.3693954997891724, "grad_norm": 0.9116694927215576, "learning_rate": 9.182246619416388e-05, "loss": 0.7669, "step": 57820 }, { "epoch": 0.3694593869389111, "grad_norm": 0.7370694875717163, "learning_rate": 9.181971607002693e-05, "loss": 0.6573, "step": 57830 }, { "epoch": 0.3695232740886498, "grad_norm": 2.3321750164031982, "learning_rate": 9.181696552472924e-05, "loss": 1.0031, "step": 57840 }, { "epoch": 0.36958716123838853, "grad_norm": 1.1017699241638184, "learning_rate": 9.181421455829852e-05, "loss": 0.9181, "step": 57850 }, { "epoch": 0.36965104838812723, "grad_norm": 1.479238510131836, "learning_rate": 9.181146317076252e-05, "loss": 1.0418, "step": 57860 }, { "epoch": 0.36971493553786594, "grad_norm": 0.8015438914299011, "learning_rate": 9.180871136214889e-05, "loss": 0.8837, "step": 57870 }, { "epoch": 0.3697788226876046, "grad_norm": 0.7428931593894958, "learning_rate": 9.180595913248537e-05, "loss": 0.9252, "step": 57880 }, { "epoch": 0.3698427098373433, "grad_norm": 1.2738107442855835, "learning_rate": 9.180320648179968e-05, "loss": 0.8249, "step": 57890 }, { "epoch": 0.369906596987082, "grad_norm": 1.0015136003494263, "learning_rate": 9.180045341011953e-05, "loss": 0.9307, "step": 57900 }, { "epoch": 0.3699704841368207, "grad_norm": 0.7193623185157776, "learning_rate": 9.179769991747264e-05, "loss": 0.8081, "step": 57910 }, { "epoch": 0.3700343712865594, "grad_norm": 0.964747428894043, "learning_rate": 9.179494600388677e-05, "loss": 0.9367, "step": 57920 }, { "epoch": 0.3700982584362981, "grad_norm": 0.8497000932693481, "learning_rate": 9.179219166938963e-05, "loss": 0.7509, "step": 57930 }, { "epoch": 0.37016214558603683, "grad_norm": 1.6816493272781372, "learning_rate": 9.178943691400896e-05, "loss": 0.7834, "step": 57940 }, { "epoch": 0.37022603273577553, "grad_norm": 0.8694002032279968, "learning_rate": 9.178668173777252e-05, "loss": 0.9374, "step": 57950 }, { "epoch": 0.37028991988551424, "grad_norm": 0.6682251691818237, "learning_rate": 9.178392614070803e-05, "loss": 0.9475, "step": 57960 }, { "epoch": 0.37035380703525295, "grad_norm": 0.610185980796814, "learning_rate": 9.178117012284326e-05, "loss": 0.8925, "step": 57970 }, { "epoch": 0.37041769418499165, "grad_norm": 0.5272064805030823, "learning_rate": 9.177841368420596e-05, "loss": 0.8726, "step": 57980 }, { "epoch": 0.37048158133473036, "grad_norm": 0.878194272518158, "learning_rate": 9.17756568248239e-05, "loss": 0.8307, "step": 57990 }, { "epoch": 0.370545468484469, "grad_norm": 0.5838222503662109, "learning_rate": 9.177289954472483e-05, "loss": 1.0879, "step": 58000 }, { "epoch": 0.3706093556342077, "grad_norm": 1.0944530963897705, "learning_rate": 9.177014184393654e-05, "loss": 0.8774, "step": 58010 }, { "epoch": 0.3706732427839464, "grad_norm": 0.8681952953338623, "learning_rate": 9.176738372248675e-05, "loss": 0.9085, "step": 58020 }, { "epoch": 0.37073712993368513, "grad_norm": 1.0131874084472656, "learning_rate": 9.176462518040328e-05, "loss": 1.0068, "step": 58030 }, { "epoch": 0.37080101708342383, "grad_norm": 1.0025804042816162, "learning_rate": 9.176186621771392e-05, "loss": 0.8304, "step": 58040 }, { "epoch": 0.37086490423316254, "grad_norm": 0.5216399431228638, "learning_rate": 9.175910683444641e-05, "loss": 1.0596, "step": 58050 }, { "epoch": 0.37092879138290125, "grad_norm": 1.1650744676589966, "learning_rate": 9.17563470306286e-05, "loss": 0.9596, "step": 58060 }, { "epoch": 0.37099267853263995, "grad_norm": 0.738498866558075, "learning_rate": 9.175358680628825e-05, "loss": 1.0937, "step": 58070 }, { "epoch": 0.37105656568237866, "grad_norm": 1.8002482652664185, "learning_rate": 9.175082616145314e-05, "loss": 1.0585, "step": 58080 }, { "epoch": 0.37112045283211736, "grad_norm": 0.9968917369842529, "learning_rate": 9.17480650961511e-05, "loss": 0.8607, "step": 58090 }, { "epoch": 0.37118433998185607, "grad_norm": 0.6830025911331177, "learning_rate": 9.174530361040992e-05, "loss": 0.909, "step": 58100 }, { "epoch": 0.3712482271315948, "grad_norm": 0.8409507870674133, "learning_rate": 9.174254170425742e-05, "loss": 0.7824, "step": 58110 }, { "epoch": 0.3713121142813334, "grad_norm": 2.4945926666259766, "learning_rate": 9.173977937772143e-05, "loss": 1.0359, "step": 58120 }, { "epoch": 0.37137600143107213, "grad_norm": 2.378359317779541, "learning_rate": 9.173701663082972e-05, "loss": 0.9768, "step": 58130 }, { "epoch": 0.37143988858081084, "grad_norm": 1.1033055782318115, "learning_rate": 9.173425346361017e-05, "loss": 0.8963, "step": 58140 }, { "epoch": 0.37150377573054955, "grad_norm": 0.982449471950531, "learning_rate": 9.173148987609057e-05, "loss": 0.9571, "step": 58150 }, { "epoch": 0.37156766288028825, "grad_norm": 0.8938246965408325, "learning_rate": 9.172872586829878e-05, "loss": 1.0901, "step": 58160 }, { "epoch": 0.37163155003002696, "grad_norm": 1.218361258506775, "learning_rate": 9.17259614402626e-05, "loss": 0.9947, "step": 58170 }, { "epoch": 0.37169543717976566, "grad_norm": 0.7889940142631531, "learning_rate": 9.17231965920099e-05, "loss": 0.8841, "step": 58180 }, { "epoch": 0.37175932432950437, "grad_norm": 0.9611823558807373, "learning_rate": 9.17204313235685e-05, "loss": 0.8782, "step": 58190 }, { "epoch": 0.3718232114792431, "grad_norm": 1.1690157651901245, "learning_rate": 9.171766563496628e-05, "loss": 0.7884, "step": 58200 }, { "epoch": 0.3718870986289818, "grad_norm": 1.155748963356018, "learning_rate": 9.171489952623109e-05, "loss": 1.0516, "step": 58210 }, { "epoch": 0.3719509857787205, "grad_norm": 0.927507758140564, "learning_rate": 9.171213299739075e-05, "loss": 1.0492, "step": 58220 }, { "epoch": 0.3720148729284592, "grad_norm": 1.9883320331573486, "learning_rate": 9.170936604847315e-05, "loss": 1.0933, "step": 58230 }, { "epoch": 0.37207876007819785, "grad_norm": 0.8213433623313904, "learning_rate": 9.170659867950615e-05, "loss": 0.8121, "step": 58240 }, { "epoch": 0.37214264722793655, "grad_norm": 1.8754318952560425, "learning_rate": 9.170383089051762e-05, "loss": 1.0397, "step": 58250 }, { "epoch": 0.37220653437767526, "grad_norm": 1.1018773317337036, "learning_rate": 9.170106268153543e-05, "loss": 0.9177, "step": 58260 }, { "epoch": 0.37227042152741396, "grad_norm": 0.6226816773414612, "learning_rate": 9.169829405258747e-05, "loss": 0.8247, "step": 58270 }, { "epoch": 0.37233430867715267, "grad_norm": 0.615023136138916, "learning_rate": 9.169552500370161e-05, "loss": 1.0718, "step": 58280 }, { "epoch": 0.3723981958268914, "grad_norm": 0.7454681396484375, "learning_rate": 9.169275553490573e-05, "loss": 0.9678, "step": 58290 }, { "epoch": 0.3724620829766301, "grad_norm": 0.9580934047698975, "learning_rate": 9.168998564622774e-05, "loss": 1.2206, "step": 58300 }, { "epoch": 0.3725259701263688, "grad_norm": 1.0605610609054565, "learning_rate": 9.168721533769556e-05, "loss": 0.8984, "step": 58310 }, { "epoch": 0.3725898572761075, "grad_norm": 1.1907299757003784, "learning_rate": 9.168444460933702e-05, "loss": 0.8531, "step": 58320 }, { "epoch": 0.3726537444258462, "grad_norm": 0.995368480682373, "learning_rate": 9.168167346118006e-05, "loss": 0.6946, "step": 58330 }, { "epoch": 0.3727176315755849, "grad_norm": 1.1580376625061035, "learning_rate": 9.167890189325261e-05, "loss": 0.7377, "step": 58340 }, { "epoch": 0.3727815187253236, "grad_norm": 1.3204131126403809, "learning_rate": 9.167612990558254e-05, "loss": 0.8134, "step": 58350 }, { "epoch": 0.37284540587506226, "grad_norm": 1.4517935514450073, "learning_rate": 9.167335749819781e-05, "loss": 0.7879, "step": 58360 }, { "epoch": 0.37290929302480097, "grad_norm": 0.6076644659042358, "learning_rate": 9.167058467112629e-05, "loss": 0.9626, "step": 58370 }, { "epoch": 0.3729731801745397, "grad_norm": 0.8815622925758362, "learning_rate": 9.166781142439595e-05, "loss": 0.8204, "step": 58380 }, { "epoch": 0.3730370673242784, "grad_norm": 1.5476758480072021, "learning_rate": 9.16650377580347e-05, "loss": 1.0324, "step": 58390 }, { "epoch": 0.3731009544740171, "grad_norm": 1.1092950105667114, "learning_rate": 9.166226367207047e-05, "loss": 0.7715, "step": 58400 }, { "epoch": 0.3731648416237558, "grad_norm": 0.8686773777008057, "learning_rate": 9.16594891665312e-05, "loss": 0.8466, "step": 58410 }, { "epoch": 0.3732287287734945, "grad_norm": 0.948353111743927, "learning_rate": 9.165671424144484e-05, "loss": 0.7348, "step": 58420 }, { "epoch": 0.3732926159232332, "grad_norm": 0.8880228400230408, "learning_rate": 9.165393889683933e-05, "loss": 0.8305, "step": 58430 }, { "epoch": 0.3733565030729719, "grad_norm": 0.6419790983200073, "learning_rate": 9.165116313274262e-05, "loss": 0.8744, "step": 58440 }, { "epoch": 0.3734203902227106, "grad_norm": 0.6578751802444458, "learning_rate": 9.164838694918266e-05, "loss": 1.0893, "step": 58450 }, { "epoch": 0.3734842773724493, "grad_norm": 0.6613552570343018, "learning_rate": 9.16456103461874e-05, "loss": 1.0446, "step": 58460 }, { "epoch": 0.37354816452218803, "grad_norm": 0.7612963318824768, "learning_rate": 9.164283332378483e-05, "loss": 0.9673, "step": 58470 }, { "epoch": 0.3736120516719267, "grad_norm": 1.8602917194366455, "learning_rate": 9.16400558820029e-05, "loss": 1.1563, "step": 58480 }, { "epoch": 0.3736759388216654, "grad_norm": 0.7156874537467957, "learning_rate": 9.163755582585293e-05, "loss": 0.9562, "step": 58490 }, { "epoch": 0.3737398259714041, "grad_norm": 0.8070692420005798, "learning_rate": 9.163477758732727e-05, "loss": 0.8344, "step": 58500 }, { "epoch": 0.3738037131211428, "grad_norm": 0.8404533267021179, "learning_rate": 9.163199892950341e-05, "loss": 0.9861, "step": 58510 }, { "epoch": 0.3738676002708815, "grad_norm": 0.8748318552970886, "learning_rate": 9.162921985240928e-05, "loss": 0.9779, "step": 58520 }, { "epoch": 0.3739314874206202, "grad_norm": 0.8599054217338562, "learning_rate": 9.16264403560729e-05, "loss": 0.8124, "step": 58530 }, { "epoch": 0.3739953745703589, "grad_norm": 0.7923135161399841, "learning_rate": 9.162366044052226e-05, "loss": 0.6135, "step": 58540 }, { "epoch": 0.3740592617200976, "grad_norm": 0.6415694952011108, "learning_rate": 9.162088010578535e-05, "loss": 0.9293, "step": 58550 }, { "epoch": 0.37412314886983633, "grad_norm": 2.256666898727417, "learning_rate": 9.161809935189016e-05, "loss": 1.1138, "step": 58560 }, { "epoch": 0.37418703601957504, "grad_norm": 1.2693225145339966, "learning_rate": 9.161531817886471e-05, "loss": 0.6599, "step": 58570 }, { "epoch": 0.37425092316931374, "grad_norm": 0.8467420935630798, "learning_rate": 9.1612536586737e-05, "loss": 0.6876, "step": 58580 }, { "epoch": 0.37431481031905245, "grad_norm": 0.9001184701919556, "learning_rate": 9.160975457553504e-05, "loss": 0.5682, "step": 58590 }, { "epoch": 0.37437869746879116, "grad_norm": 0.6269614696502686, "learning_rate": 9.160697214528687e-05, "loss": 1.0431, "step": 58600 }, { "epoch": 0.3744425846185298, "grad_norm": 1.413830280303955, "learning_rate": 9.160418929602048e-05, "loss": 0.7761, "step": 58610 }, { "epoch": 0.3745064717682685, "grad_norm": 2.2682693004608154, "learning_rate": 9.160140602776392e-05, "loss": 1.1893, "step": 58620 }, { "epoch": 0.3745703589180072, "grad_norm": 0.5779352188110352, "learning_rate": 9.159862234054521e-05, "loss": 1.035, "step": 58630 }, { "epoch": 0.3746342460677459, "grad_norm": 0.7203439474105835, "learning_rate": 9.15958382343924e-05, "loss": 0.9331, "step": 58640 }, { "epoch": 0.37469813321748463, "grad_norm": 0.8126745223999023, "learning_rate": 9.159305370933349e-05, "loss": 0.7504, "step": 58650 }, { "epoch": 0.37476202036722334, "grad_norm": 0.7604427337646484, "learning_rate": 9.159026876539656e-05, "loss": 0.8239, "step": 58660 }, { "epoch": 0.37482590751696204, "grad_norm": 0.9764753580093384, "learning_rate": 9.158748340260962e-05, "loss": 0.8887, "step": 58670 }, { "epoch": 0.37488979466670075, "grad_norm": 1.0595623254776, "learning_rate": 9.158469762100077e-05, "loss": 0.9124, "step": 58680 }, { "epoch": 0.37495368181643945, "grad_norm": 0.8522325158119202, "learning_rate": 9.158191142059803e-05, "loss": 0.8533, "step": 58690 }, { "epoch": 0.37501756896617816, "grad_norm": 1.0041230916976929, "learning_rate": 9.157912480142947e-05, "loss": 0.9559, "step": 58700 }, { "epoch": 0.37508145611591687, "grad_norm": 1.3694050312042236, "learning_rate": 9.157633776352314e-05, "loss": 0.905, "step": 58710 }, { "epoch": 0.3751453432656556, "grad_norm": 0.4370633363723755, "learning_rate": 9.157355030690714e-05, "loss": 0.7518, "step": 58720 }, { "epoch": 0.3752092304153942, "grad_norm": 0.8439906239509583, "learning_rate": 9.157076243160951e-05, "loss": 0.9578, "step": 58730 }, { "epoch": 0.37527311756513293, "grad_norm": 0.7714953422546387, "learning_rate": 9.156797413765834e-05, "loss": 0.9042, "step": 58740 }, { "epoch": 0.37533700471487164, "grad_norm": 0.7417599558830261, "learning_rate": 9.156518542508172e-05, "loss": 0.8571, "step": 58750 }, { "epoch": 0.37540089186461034, "grad_norm": 1.951737642288208, "learning_rate": 9.15623962939077e-05, "loss": 0.9094, "step": 58760 }, { "epoch": 0.37546477901434905, "grad_norm": 0.8249172568321228, "learning_rate": 9.155960674416441e-05, "loss": 0.7664, "step": 58770 }, { "epoch": 0.37552866616408775, "grad_norm": 0.667812705039978, "learning_rate": 9.155681677587992e-05, "loss": 0.708, "step": 58780 }, { "epoch": 0.37559255331382646, "grad_norm": 0.6393797993659973, "learning_rate": 9.155402638908235e-05, "loss": 0.8337, "step": 58790 }, { "epoch": 0.37565644046356517, "grad_norm": 0.7899972200393677, "learning_rate": 9.155123558379976e-05, "loss": 1.0715, "step": 58800 }, { "epoch": 0.3757203276133039, "grad_norm": 2.3867976665496826, "learning_rate": 9.154844436006029e-05, "loss": 0.9635, "step": 58810 }, { "epoch": 0.3757842147630426, "grad_norm": 0.7886314392089844, "learning_rate": 9.154565271789206e-05, "loss": 0.8288, "step": 58820 }, { "epoch": 0.3758481019127813, "grad_norm": 0.6438289880752563, "learning_rate": 9.154286065732313e-05, "loss": 0.683, "step": 58830 }, { "epoch": 0.37591198906252, "grad_norm": 0.8149610161781311, "learning_rate": 9.154006817838168e-05, "loss": 0.9502, "step": 58840 }, { "epoch": 0.37597587621225864, "grad_norm": 1.0395874977111816, "learning_rate": 9.15372752810958e-05, "loss": 0.6418, "step": 58850 }, { "epoch": 0.37603976336199735, "grad_norm": 1.5722790956497192, "learning_rate": 9.153448196549362e-05, "loss": 0.927, "step": 58860 }, { "epoch": 0.37610365051173605, "grad_norm": 1.1867657899856567, "learning_rate": 9.153168823160327e-05, "loss": 0.7479, "step": 58870 }, { "epoch": 0.37616753766147476, "grad_norm": 0.9400370121002197, "learning_rate": 9.15288940794529e-05, "loss": 0.8849, "step": 58880 }, { "epoch": 0.37623142481121347, "grad_norm": 0.6055128574371338, "learning_rate": 9.152609950907062e-05, "loss": 0.8318, "step": 58890 }, { "epoch": 0.3762953119609522, "grad_norm": 0.8164952993392944, "learning_rate": 9.152330452048462e-05, "loss": 0.9452, "step": 58900 }, { "epoch": 0.3763591991106909, "grad_norm": 0.4781966507434845, "learning_rate": 9.152050911372301e-05, "loss": 1.0144, "step": 58910 }, { "epoch": 0.3764230862604296, "grad_norm": 0.7525957822799683, "learning_rate": 9.151771328881394e-05, "loss": 1.0175, "step": 58920 }, { "epoch": 0.3764869734101683, "grad_norm": 0.9770300388336182, "learning_rate": 9.151491704578559e-05, "loss": 1.0909, "step": 58930 }, { "epoch": 0.376550860559907, "grad_norm": 0.8200979232788086, "learning_rate": 9.151212038466612e-05, "loss": 0.6905, "step": 58940 }, { "epoch": 0.3766147477096457, "grad_norm": 0.8204917907714844, "learning_rate": 9.150932330548367e-05, "loss": 0.9003, "step": 58950 }, { "epoch": 0.3766786348593844, "grad_norm": 0.7505319714546204, "learning_rate": 9.150652580826642e-05, "loss": 1.0317, "step": 58960 }, { "epoch": 0.37674252200912306, "grad_norm": 0.782026469707489, "learning_rate": 9.150372789304256e-05, "loss": 0.8431, "step": 58970 }, { "epoch": 0.37680640915886177, "grad_norm": 2.76662278175354, "learning_rate": 9.150092955984025e-05, "loss": 1.0264, "step": 58980 }, { "epoch": 0.37687029630860047, "grad_norm": 0.9735605716705322, "learning_rate": 9.149813080868766e-05, "loss": 1.0035, "step": 58990 }, { "epoch": 0.3769341834583392, "grad_norm": 0.6053544282913208, "learning_rate": 9.149533163961302e-05, "loss": 0.8895, "step": 59000 }, { "epoch": 0.3769980706080779, "grad_norm": 1.281782865524292, "learning_rate": 9.149253205264448e-05, "loss": 1.1018, "step": 59010 }, { "epoch": 0.3770619577578166, "grad_norm": 0.6073563694953918, "learning_rate": 9.148973204781023e-05, "loss": 1.0346, "step": 59020 }, { "epoch": 0.3771258449075553, "grad_norm": 0.5802990198135376, "learning_rate": 9.148693162513851e-05, "loss": 0.8453, "step": 59030 }, { "epoch": 0.377189732057294, "grad_norm": 0.9088721871376038, "learning_rate": 9.148413078465747e-05, "loss": 1.0229, "step": 59040 }, { "epoch": 0.3772536192070327, "grad_norm": 0.8357219099998474, "learning_rate": 9.148132952639536e-05, "loss": 1.133, "step": 59050 }, { "epoch": 0.3773175063567714, "grad_norm": 0.7745949029922485, "learning_rate": 9.147852785038038e-05, "loss": 0.6222, "step": 59060 }, { "epoch": 0.3773813935065101, "grad_norm": 0.5787645578384399, "learning_rate": 9.147572575664074e-05, "loss": 0.8277, "step": 59070 }, { "epoch": 0.3774452806562488, "grad_norm": 0.5599297881126404, "learning_rate": 9.147292324520466e-05, "loss": 0.8404, "step": 59080 }, { "epoch": 0.3775091678059875, "grad_norm": 0.6565321087837219, "learning_rate": 9.147012031610035e-05, "loss": 0.937, "step": 59090 }, { "epoch": 0.3775730549557262, "grad_norm": 0.8938694000244141, "learning_rate": 9.146731696935606e-05, "loss": 1.0061, "step": 59100 }, { "epoch": 0.3776369421054649, "grad_norm": 0.8118715286254883, "learning_rate": 9.146451320500001e-05, "loss": 0.9974, "step": 59110 }, { "epoch": 0.3777008292552036, "grad_norm": 0.7012856006622314, "learning_rate": 9.146170902306045e-05, "loss": 1.0306, "step": 59120 }, { "epoch": 0.3777647164049423, "grad_norm": 0.6307138204574585, "learning_rate": 9.145890442356561e-05, "loss": 0.685, "step": 59130 }, { "epoch": 0.377828603554681, "grad_norm": 0.793086588382721, "learning_rate": 9.145609940654373e-05, "loss": 1.0748, "step": 59140 }, { "epoch": 0.3778924907044197, "grad_norm": 1.0463335514068604, "learning_rate": 9.145329397202307e-05, "loss": 0.9517, "step": 59150 }, { "epoch": 0.3779563778541584, "grad_norm": 0.8374640345573425, "learning_rate": 9.145048812003186e-05, "loss": 0.8408, "step": 59160 }, { "epoch": 0.3780202650038971, "grad_norm": 0.8713728189468384, "learning_rate": 9.144768185059838e-05, "loss": 1.1013, "step": 59170 }, { "epoch": 0.37808415215363583, "grad_norm": 0.706382691860199, "learning_rate": 9.14448751637509e-05, "loss": 0.8862, "step": 59180 }, { "epoch": 0.37814803930337454, "grad_norm": 0.464167058467865, "learning_rate": 9.144206805951767e-05, "loss": 0.6612, "step": 59190 }, { "epoch": 0.37821192645311325, "grad_norm": 0.7974499464035034, "learning_rate": 9.143926053792696e-05, "loss": 1.1017, "step": 59200 }, { "epoch": 0.3782758136028519, "grad_norm": 1.0677493810653687, "learning_rate": 9.143645259900704e-05, "loss": 1.0395, "step": 59210 }, { "epoch": 0.3783397007525906, "grad_norm": 0.6200050711631775, "learning_rate": 9.14336442427862e-05, "loss": 0.8772, "step": 59220 }, { "epoch": 0.3784035879023293, "grad_norm": 0.8041068315505981, "learning_rate": 9.143083546929272e-05, "loss": 0.8241, "step": 59230 }, { "epoch": 0.378467475052068, "grad_norm": 1.050399661064148, "learning_rate": 9.142802627855487e-05, "loss": 0.7528, "step": 59240 }, { "epoch": 0.3785313622018067, "grad_norm": 0.5964527726173401, "learning_rate": 9.142521667060098e-05, "loss": 0.8251, "step": 59250 }, { "epoch": 0.3785952493515454, "grad_norm": 0.813062310218811, "learning_rate": 9.14224066454593e-05, "loss": 1.0259, "step": 59260 }, { "epoch": 0.37865913650128413, "grad_norm": 0.8622726798057556, "learning_rate": 9.141959620315816e-05, "loss": 0.8479, "step": 59270 }, { "epoch": 0.37872302365102284, "grad_norm": 0.59121173620224, "learning_rate": 9.141678534372584e-05, "loss": 0.6244, "step": 59280 }, { "epoch": 0.37878691080076154, "grad_norm": 0.7073304653167725, "learning_rate": 9.141397406719066e-05, "loss": 0.7587, "step": 59290 }, { "epoch": 0.37885079795050025, "grad_norm": 0.5923304557800293, "learning_rate": 9.141116237358095e-05, "loss": 0.9219, "step": 59300 }, { "epoch": 0.37891468510023896, "grad_norm": 0.909243106842041, "learning_rate": 9.1408350262925e-05, "loss": 0.721, "step": 59310 }, { "epoch": 0.37897857224997766, "grad_norm": 0.40945374965667725, "learning_rate": 9.140553773525114e-05, "loss": 0.9946, "step": 59320 }, { "epoch": 0.3790424593997163, "grad_norm": 1.5487751960754395, "learning_rate": 9.14027247905877e-05, "loss": 1.0129, "step": 59330 }, { "epoch": 0.379106346549455, "grad_norm": 0.946149468421936, "learning_rate": 9.1399911428963e-05, "loss": 1.0866, "step": 59340 }, { "epoch": 0.3791702336991937, "grad_norm": 1.2105820178985596, "learning_rate": 9.139709765040537e-05, "loss": 1.1053, "step": 59350 }, { "epoch": 0.37923412084893243, "grad_norm": 0.9297011494636536, "learning_rate": 9.139428345494316e-05, "loss": 1.0082, "step": 59360 }, { "epoch": 0.37929800799867114, "grad_norm": 1.3490419387817383, "learning_rate": 9.139146884260469e-05, "loss": 0.7593, "step": 59370 }, { "epoch": 0.37936189514840984, "grad_norm": 1.027013897895813, "learning_rate": 9.138865381341835e-05, "loss": 0.8555, "step": 59380 }, { "epoch": 0.37942578229814855, "grad_norm": 0.7934104800224304, "learning_rate": 9.138583836741243e-05, "loss": 0.7812, "step": 59390 }, { "epoch": 0.37948966944788726, "grad_norm": 0.702707052230835, "learning_rate": 9.138302250461532e-05, "loss": 0.9684, "step": 59400 }, { "epoch": 0.37955355659762596, "grad_norm": 0.6672869920730591, "learning_rate": 9.138020622505539e-05, "loss": 0.7703, "step": 59410 }, { "epoch": 0.37961744374736467, "grad_norm": 0.811865508556366, "learning_rate": 9.137738952876096e-05, "loss": 0.7615, "step": 59420 }, { "epoch": 0.3796813308971034, "grad_norm": 1.041718602180481, "learning_rate": 9.137457241576044e-05, "loss": 0.8087, "step": 59430 }, { "epoch": 0.3797452180468421, "grad_norm": 0.9935733079910278, "learning_rate": 9.137175488608217e-05, "loss": 0.8609, "step": 59440 }, { "epoch": 0.3798091051965808, "grad_norm": 0.6558438539505005, "learning_rate": 9.136893693975455e-05, "loss": 1.1521, "step": 59450 }, { "epoch": 0.37987299234631944, "grad_norm": 1.0106873512268066, "learning_rate": 9.136611857680593e-05, "loss": 0.8439, "step": 59460 }, { "epoch": 0.37993687949605814, "grad_norm": 0.8947387337684631, "learning_rate": 9.136329979726472e-05, "loss": 0.9528, "step": 59470 }, { "epoch": 0.38000076664579685, "grad_norm": 1.6661876440048218, "learning_rate": 9.13604806011593e-05, "loss": 0.7804, "step": 59480 }, { "epoch": 0.38006465379553556, "grad_norm": 0.7552819848060608, "learning_rate": 9.135766098851803e-05, "loss": 0.8697, "step": 59490 }, { "epoch": 0.38012854094527426, "grad_norm": 1.3484975099563599, "learning_rate": 9.135484095936937e-05, "loss": 0.7785, "step": 59500 }, { "epoch": 0.38019242809501297, "grad_norm": 0.9297848343849182, "learning_rate": 9.135202051374167e-05, "loss": 0.695, "step": 59510 }, { "epoch": 0.3802563152447517, "grad_norm": 0.8916332125663757, "learning_rate": 9.134919965166335e-05, "loss": 0.8245, "step": 59520 }, { "epoch": 0.3803202023944904, "grad_norm": 1.1042640209197998, "learning_rate": 9.13463783731628e-05, "loss": 0.8783, "step": 59530 }, { "epoch": 0.3803840895442291, "grad_norm": 0.8340087532997131, "learning_rate": 9.134355667826847e-05, "loss": 0.7602, "step": 59540 }, { "epoch": 0.3804479766939678, "grad_norm": 1.1028873920440674, "learning_rate": 9.134073456700876e-05, "loss": 0.9535, "step": 59550 }, { "epoch": 0.3805118638437065, "grad_norm": 1.2923681735992432, "learning_rate": 9.133791203941207e-05, "loss": 0.9221, "step": 59560 }, { "epoch": 0.3805757509934452, "grad_norm": 1.8344556093215942, "learning_rate": 9.133508909550686e-05, "loss": 1.1256, "step": 59570 }, { "epoch": 0.38063963814318386, "grad_norm": 0.9875249862670898, "learning_rate": 9.133226573532154e-05, "loss": 0.7142, "step": 59580 }, { "epoch": 0.38070352529292256, "grad_norm": 0.9586598873138428, "learning_rate": 9.132944195888455e-05, "loss": 0.7369, "step": 59590 }, { "epoch": 0.38076741244266127, "grad_norm": 0.8368619084358215, "learning_rate": 9.132661776622431e-05, "loss": 0.7057, "step": 59600 }, { "epoch": 0.3808312995924, "grad_norm": 0.9391975998878479, "learning_rate": 9.132379315736928e-05, "loss": 0.9706, "step": 59610 }, { "epoch": 0.3808951867421387, "grad_norm": 0.6700417995452881, "learning_rate": 9.132096813234792e-05, "loss": 1.1595, "step": 59620 }, { "epoch": 0.3809590738918774, "grad_norm": 4.27878475189209, "learning_rate": 9.131814269118864e-05, "loss": 1.0109, "step": 59630 }, { "epoch": 0.3810229610416161, "grad_norm": 0.9258844256401062, "learning_rate": 9.131531683391993e-05, "loss": 0.979, "step": 59640 }, { "epoch": 0.3810868481913548, "grad_norm": 4.915820121765137, "learning_rate": 9.131249056057023e-05, "loss": 1.0458, "step": 59650 }, { "epoch": 0.3811507353410935, "grad_norm": 2.258350133895874, "learning_rate": 9.130966387116802e-05, "loss": 0.7549, "step": 59660 }, { "epoch": 0.3812146224908322, "grad_norm": 0.5593277812004089, "learning_rate": 9.130683676574175e-05, "loss": 0.8745, "step": 59670 }, { "epoch": 0.3812785096405709, "grad_norm": 0.8787796497344971, "learning_rate": 9.13040092443199e-05, "loss": 0.8045, "step": 59680 }, { "epoch": 0.3813423967903096, "grad_norm": 0.9920330047607422, "learning_rate": 9.130118130693095e-05, "loss": 0.9066, "step": 59690 }, { "epoch": 0.3814062839400483, "grad_norm": 2.229135513305664, "learning_rate": 9.129835295360336e-05, "loss": 0.8905, "step": 59700 }, { "epoch": 0.381470171089787, "grad_norm": 0.8204028010368347, "learning_rate": 9.129552418436563e-05, "loss": 1.0525, "step": 59710 }, { "epoch": 0.3815340582395257, "grad_norm": 0.8208606243133545, "learning_rate": 9.129269499924626e-05, "loss": 0.8469, "step": 59720 }, { "epoch": 0.3815979453892644, "grad_norm": 0.8647171854972839, "learning_rate": 9.128986539827371e-05, "loss": 0.8889, "step": 59730 }, { "epoch": 0.3816618325390031, "grad_norm": 1.4753942489624023, "learning_rate": 9.128703538147651e-05, "loss": 0.9241, "step": 59740 }, { "epoch": 0.3817257196887418, "grad_norm": 6.072597503662109, "learning_rate": 9.128420494888313e-05, "loss": 1.3249, "step": 59750 }, { "epoch": 0.3817896068384805, "grad_norm": 1.8557090759277344, "learning_rate": 9.128137410052211e-05, "loss": 1.0087, "step": 59760 }, { "epoch": 0.3818534939882192, "grad_norm": 0.828505277633667, "learning_rate": 9.127854283642192e-05, "loss": 0.8843, "step": 59770 }, { "epoch": 0.3819173811379579, "grad_norm": 0.6272063851356506, "learning_rate": 9.127571115661111e-05, "loss": 0.9136, "step": 59780 }, { "epoch": 0.38198126828769663, "grad_norm": 0.683825671672821, "learning_rate": 9.127287906111817e-05, "loss": 0.9161, "step": 59790 }, { "epoch": 0.38204515543743534, "grad_norm": 0.7828848958015442, "learning_rate": 9.127004654997163e-05, "loss": 0.8366, "step": 59800 }, { "epoch": 0.38210904258717404, "grad_norm": 2.599881410598755, "learning_rate": 9.126721362320003e-05, "loss": 1.0435, "step": 59810 }, { "epoch": 0.3821729297369127, "grad_norm": 1.0187602043151855, "learning_rate": 9.126438028083186e-05, "loss": 0.9667, "step": 59820 }, { "epoch": 0.3822368168866514, "grad_norm": 1.3073110580444336, "learning_rate": 9.126154652289571e-05, "loss": 0.7698, "step": 59830 }, { "epoch": 0.3823007040363901, "grad_norm": 0.6932925581932068, "learning_rate": 9.125871234942008e-05, "loss": 0.6695, "step": 59840 }, { "epoch": 0.3823645911861288, "grad_norm": 1.466614842414856, "learning_rate": 9.125587776043352e-05, "loss": 1.0159, "step": 59850 }, { "epoch": 0.3824284783358675, "grad_norm": 0.5515915155410767, "learning_rate": 9.125304275596458e-05, "loss": 1.0273, "step": 59860 }, { "epoch": 0.3824923654856062, "grad_norm": 0.6064876914024353, "learning_rate": 9.125020733604182e-05, "loss": 0.8891, "step": 59870 }, { "epoch": 0.38255625263534493, "grad_norm": 0.8917511105537415, "learning_rate": 9.124737150069378e-05, "loss": 1.1068, "step": 59880 }, { "epoch": 0.38262013978508363, "grad_norm": 0.7151978611946106, "learning_rate": 9.1244535249949e-05, "loss": 0.8862, "step": 59890 }, { "epoch": 0.38268402693482234, "grad_norm": 0.8112443089485168, "learning_rate": 9.124169858383611e-05, "loss": 0.897, "step": 59900 }, { "epoch": 0.38274791408456105, "grad_norm": 1.089768886566162, "learning_rate": 9.123886150238361e-05, "loss": 0.9832, "step": 59910 }, { "epoch": 0.38281180123429975, "grad_norm": 0.7794529795646667, "learning_rate": 9.12360240056201e-05, "loss": 0.8341, "step": 59920 }, { "epoch": 0.38287568838403846, "grad_norm": 0.5675161480903625, "learning_rate": 9.123318609357417e-05, "loss": 0.9027, "step": 59930 }, { "epoch": 0.3829395755337771, "grad_norm": 0.8330199718475342, "learning_rate": 9.123034776627437e-05, "loss": 0.9739, "step": 59940 }, { "epoch": 0.3830034626835158, "grad_norm": 1.6454709768295288, "learning_rate": 9.12275090237493e-05, "loss": 0.977, "step": 59950 }, { "epoch": 0.3830673498332545, "grad_norm": 0.8024013042449951, "learning_rate": 9.122466986602756e-05, "loss": 0.9452, "step": 59960 }, { "epoch": 0.38313123698299323, "grad_norm": 1.1360933780670166, "learning_rate": 9.122183029313771e-05, "loss": 1.0236, "step": 59970 }, { "epoch": 0.38319512413273193, "grad_norm": 0.7337785959243774, "learning_rate": 9.121899030510839e-05, "loss": 0.9299, "step": 59980 }, { "epoch": 0.38325901128247064, "grad_norm": 0.8636689782142639, "learning_rate": 9.121614990196816e-05, "loss": 0.7671, "step": 59990 }, { "epoch": 0.38332289843220935, "grad_norm": 1.2737140655517578, "learning_rate": 9.121330908374564e-05, "loss": 0.8175, "step": 60000 }, { "epoch": 0.38338678558194805, "grad_norm": 0.6086975336074829, "learning_rate": 9.121046785046945e-05, "loss": 1.1958, "step": 60010 }, { "epoch": 0.38345067273168676, "grad_norm": 0.7334972023963928, "learning_rate": 9.12076262021682e-05, "loss": 0.8533, "step": 60020 }, { "epoch": 0.38351455988142547, "grad_norm": 0.67818683385849, "learning_rate": 9.12047841388705e-05, "loss": 0.7376, "step": 60030 }, { "epoch": 0.38357844703116417, "grad_norm": 0.5810967683792114, "learning_rate": 9.120194166060498e-05, "loss": 0.8313, "step": 60040 }, { "epoch": 0.3836423341809029, "grad_norm": 0.7271260619163513, "learning_rate": 9.119909876740027e-05, "loss": 0.8529, "step": 60050 }, { "epoch": 0.38370622133064153, "grad_norm": 1.0164223909378052, "learning_rate": 9.119625545928499e-05, "loss": 0.919, "step": 60060 }, { "epoch": 0.38377010848038023, "grad_norm": 1.4784969091415405, "learning_rate": 9.119341173628777e-05, "loss": 0.9259, "step": 60070 }, { "epoch": 0.38383399563011894, "grad_norm": 0.8718630075454712, "learning_rate": 9.119056759843724e-05, "loss": 0.918, "step": 60080 }, { "epoch": 0.38389788277985765, "grad_norm": 0.9398227334022522, "learning_rate": 9.118772304576209e-05, "loss": 1.0287, "step": 60090 }, { "epoch": 0.38396176992959635, "grad_norm": 0.7162007689476013, "learning_rate": 9.118487807829093e-05, "loss": 0.8178, "step": 60100 }, { "epoch": 0.38402565707933506, "grad_norm": 1.4307546615600586, "learning_rate": 9.118203269605242e-05, "loss": 0.8535, "step": 60110 }, { "epoch": 0.38408954422907376, "grad_norm": 1.0519388914108276, "learning_rate": 9.11791868990752e-05, "loss": 1.2414, "step": 60120 }, { "epoch": 0.38415343137881247, "grad_norm": 0.8539866805076599, "learning_rate": 9.117634068738794e-05, "loss": 0.8189, "step": 60130 }, { "epoch": 0.3842173185285512, "grad_norm": 0.8897231221199036, "learning_rate": 9.117349406101931e-05, "loss": 1.0583, "step": 60140 }, { "epoch": 0.3842812056782899, "grad_norm": 0.9356622099876404, "learning_rate": 9.117064701999797e-05, "loss": 0.8774, "step": 60150 }, { "epoch": 0.3843450928280286, "grad_norm": 0.934384822845459, "learning_rate": 9.116779956435262e-05, "loss": 1.0653, "step": 60160 }, { "epoch": 0.3844089799777673, "grad_norm": 0.5904353857040405, "learning_rate": 9.11649516941119e-05, "loss": 0.8482, "step": 60170 }, { "epoch": 0.38447286712750595, "grad_norm": 0.840069055557251, "learning_rate": 9.116210340930451e-05, "loss": 1.0966, "step": 60180 }, { "epoch": 0.38453675427724465, "grad_norm": 2.140904188156128, "learning_rate": 9.115925470995912e-05, "loss": 0.9313, "step": 60190 }, { "epoch": 0.38460064142698336, "grad_norm": 1.6145496368408203, "learning_rate": 9.115640559610444e-05, "loss": 0.9065, "step": 60200 }, { "epoch": 0.38466452857672206, "grad_norm": 0.8971934914588928, "learning_rate": 9.115355606776913e-05, "loss": 0.7211, "step": 60210 }, { "epoch": 0.38472841572646077, "grad_norm": 0.740960419178009, "learning_rate": 9.115070612498192e-05, "loss": 0.7915, "step": 60220 }, { "epoch": 0.3847923028761995, "grad_norm": 1.029941201210022, "learning_rate": 9.114785576777149e-05, "loss": 0.9746, "step": 60230 }, { "epoch": 0.3848561900259382, "grad_norm": 1.0357356071472168, "learning_rate": 9.114500499616656e-05, "loss": 0.7439, "step": 60240 }, { "epoch": 0.3849200771756769, "grad_norm": 0.823661208152771, "learning_rate": 9.114215381019584e-05, "loss": 0.8409, "step": 60250 }, { "epoch": 0.3849839643254156, "grad_norm": 1.5275285243988037, "learning_rate": 9.113930220988804e-05, "loss": 0.6833, "step": 60260 }, { "epoch": 0.3850478514751543, "grad_norm": 0.8406334519386292, "learning_rate": 9.113645019527187e-05, "loss": 0.941, "step": 60270 }, { "epoch": 0.385111738624893, "grad_norm": 1.2402430772781372, "learning_rate": 9.113359776637604e-05, "loss": 0.823, "step": 60280 }, { "epoch": 0.3851756257746317, "grad_norm": 0.8033724427223206, "learning_rate": 9.113074492322933e-05, "loss": 1.0329, "step": 60290 }, { "epoch": 0.3852395129243704, "grad_norm": 0.7544481158256531, "learning_rate": 9.112789166586041e-05, "loss": 0.7707, "step": 60300 }, { "epoch": 0.38530340007410907, "grad_norm": 1.0110443830490112, "learning_rate": 9.112503799429805e-05, "loss": 0.8752, "step": 60310 }, { "epoch": 0.3853672872238478, "grad_norm": 0.9389250874519348, "learning_rate": 9.112218390857098e-05, "loss": 1.004, "step": 60320 }, { "epoch": 0.3854311743735865, "grad_norm": 0.7335034608840942, "learning_rate": 9.111932940870793e-05, "loss": 0.9463, "step": 60330 }, { "epoch": 0.3854950615233252, "grad_norm": 0.9130538105964661, "learning_rate": 9.111647449473766e-05, "loss": 0.8286, "step": 60340 }, { "epoch": 0.3855589486730639, "grad_norm": 1.8311418294906616, "learning_rate": 9.111361916668894e-05, "loss": 0.9905, "step": 60350 }, { "epoch": 0.3856228358228026, "grad_norm": 0.7370787858963013, "learning_rate": 9.111076342459051e-05, "loss": 1.105, "step": 60360 }, { "epoch": 0.3856867229725413, "grad_norm": 0.8268787860870361, "learning_rate": 9.110790726847109e-05, "loss": 0.779, "step": 60370 }, { "epoch": 0.38575061012228, "grad_norm": 0.7258269190788269, "learning_rate": 9.110505069835952e-05, "loss": 0.8981, "step": 60380 }, { "epoch": 0.3858144972720187, "grad_norm": 1.1114614009857178, "learning_rate": 9.11021937142845e-05, "loss": 0.9508, "step": 60390 }, { "epoch": 0.3858783844217574, "grad_norm": 0.6973649263381958, "learning_rate": 9.109933631627485e-05, "loss": 0.9868, "step": 60400 }, { "epoch": 0.38594227157149613, "grad_norm": 0.8535771369934082, "learning_rate": 9.109647850435931e-05, "loss": 0.9278, "step": 60410 }, { "epoch": 0.38600615872123484, "grad_norm": 0.9913718104362488, "learning_rate": 9.10936202785667e-05, "loss": 0.9525, "step": 60420 }, { "epoch": 0.3860700458709735, "grad_norm": 0.9371497631072998, "learning_rate": 9.109076163892577e-05, "loss": 0.9669, "step": 60430 }, { "epoch": 0.3861339330207122, "grad_norm": 0.6546643972396851, "learning_rate": 9.108790258546533e-05, "loss": 0.6787, "step": 60440 }, { "epoch": 0.3861978201704509, "grad_norm": 0.8154623508453369, "learning_rate": 9.108504311821416e-05, "loss": 1.0956, "step": 60450 }, { "epoch": 0.3862617073201896, "grad_norm": 0.5797396898269653, "learning_rate": 9.108218323720104e-05, "loss": 1.2229, "step": 60460 }, { "epoch": 0.3863255944699283, "grad_norm": 1.2264608144760132, "learning_rate": 9.107932294245483e-05, "loss": 0.9712, "step": 60470 }, { "epoch": 0.386389481619667, "grad_norm": 0.9331986904144287, "learning_rate": 9.107646223400428e-05, "loss": 0.8631, "step": 60480 }, { "epoch": 0.3864533687694057, "grad_norm": 1.17788827419281, "learning_rate": 9.107360111187821e-05, "loss": 0.7527, "step": 60490 }, { "epoch": 0.38651725591914443, "grad_norm": 0.9666171073913574, "learning_rate": 9.107073957610546e-05, "loss": 0.745, "step": 60500 }, { "epoch": 0.38658114306888314, "grad_norm": 0.7744701504707336, "learning_rate": 9.106787762671483e-05, "loss": 0.9245, "step": 60510 }, { "epoch": 0.38664503021862184, "grad_norm": 0.7567153573036194, "learning_rate": 9.106501526373514e-05, "loss": 0.8483, "step": 60520 }, { "epoch": 0.38670891736836055, "grad_norm": 1.0141370296478271, "learning_rate": 9.106215248719522e-05, "loss": 0.8139, "step": 60530 }, { "epoch": 0.38677280451809926, "grad_norm": 0.924473762512207, "learning_rate": 9.10592892971239e-05, "loss": 1.1207, "step": 60540 }, { "epoch": 0.3868366916678379, "grad_norm": 0.6461699604988098, "learning_rate": 9.105642569355002e-05, "loss": 1.1942, "step": 60550 }, { "epoch": 0.3869005788175766, "grad_norm": 0.7070831060409546, "learning_rate": 9.105356167650241e-05, "loss": 0.7269, "step": 60560 }, { "epoch": 0.3869644659673153, "grad_norm": 1.24761962890625, "learning_rate": 9.105069724600992e-05, "loss": 0.9219, "step": 60570 }, { "epoch": 0.387028353117054, "grad_norm": 0.9694204330444336, "learning_rate": 9.104783240210137e-05, "loss": 1.1463, "step": 60580 }, { "epoch": 0.38709224026679273, "grad_norm": 0.7237581014633179, "learning_rate": 9.104496714480567e-05, "loss": 1.1098, "step": 60590 }, { "epoch": 0.38715612741653144, "grad_norm": 0.9114017486572266, "learning_rate": 9.104210147415163e-05, "loss": 1.0888, "step": 60600 }, { "epoch": 0.38722001456627014, "grad_norm": 0.5623325705528259, "learning_rate": 9.103923539016813e-05, "loss": 0.9529, "step": 60610 }, { "epoch": 0.38728390171600885, "grad_norm": 0.6232447624206543, "learning_rate": 9.1036368892884e-05, "loss": 1.0587, "step": 60620 }, { "epoch": 0.38734778886574756, "grad_norm": 0.9023538827896118, "learning_rate": 9.103350198232816e-05, "loss": 1.0181, "step": 60630 }, { "epoch": 0.38741167601548626, "grad_norm": 1.999245047569275, "learning_rate": 9.103063465852945e-05, "loss": 0.9449, "step": 60640 }, { "epoch": 0.38747556316522497, "grad_norm": 1.0726778507232666, "learning_rate": 9.102776692151675e-05, "loss": 0.8554, "step": 60650 }, { "epoch": 0.3875394503149637, "grad_norm": 0.9312451481819153, "learning_rate": 9.102489877131894e-05, "loss": 0.8106, "step": 60660 }, { "epoch": 0.3876033374647023, "grad_norm": 0.7528103590011597, "learning_rate": 9.102203020796491e-05, "loss": 0.9015, "step": 60670 }, { "epoch": 0.38766722461444103, "grad_norm": 0.6276060342788696, "learning_rate": 9.101916123148356e-05, "loss": 0.8222, "step": 60680 }, { "epoch": 0.38773111176417974, "grad_norm": 0.818074107170105, "learning_rate": 9.101629184190375e-05, "loss": 1.1241, "step": 60690 }, { "epoch": 0.38779499891391844, "grad_norm": 0.8359874486923218, "learning_rate": 9.10134220392544e-05, "loss": 0.9222, "step": 60700 }, { "epoch": 0.38785888606365715, "grad_norm": 0.846093475818634, "learning_rate": 9.101055182356442e-05, "loss": 0.9757, "step": 60710 }, { "epoch": 0.38792277321339586, "grad_norm": 0.7747712731361389, "learning_rate": 9.100768119486269e-05, "loss": 0.7789, "step": 60720 }, { "epoch": 0.38798666036313456, "grad_norm": 1.336980938911438, "learning_rate": 9.100481015317814e-05, "loss": 1.1395, "step": 60730 }, { "epoch": 0.38805054751287327, "grad_norm": 1.1585602760314941, "learning_rate": 9.100193869853968e-05, "loss": 1.0321, "step": 60740 }, { "epoch": 0.388114434662612, "grad_norm": 0.9213445782661438, "learning_rate": 9.099906683097623e-05, "loss": 0.9182, "step": 60750 }, { "epoch": 0.3881783218123507, "grad_norm": 0.7520207166671753, "learning_rate": 9.09961945505167e-05, "loss": 0.6615, "step": 60760 }, { "epoch": 0.3882422089620894, "grad_norm": 1.0059177875518799, "learning_rate": 9.099332185719003e-05, "loss": 0.7059, "step": 60770 }, { "epoch": 0.3883060961118281, "grad_norm": 1.6132454872131348, "learning_rate": 9.099044875102513e-05, "loss": 1.1878, "step": 60780 }, { "epoch": 0.38836998326156674, "grad_norm": 0.8192178010940552, "learning_rate": 9.098757523205097e-05, "loss": 1.0932, "step": 60790 }, { "epoch": 0.38843387041130545, "grad_norm": 0.9005227088928223, "learning_rate": 9.098470130029645e-05, "loss": 0.762, "step": 60800 }, { "epoch": 0.38849775756104415, "grad_norm": 0.7836887240409851, "learning_rate": 9.098182695579054e-05, "loss": 0.8244, "step": 60810 }, { "epoch": 0.38856164471078286, "grad_norm": 0.7896131277084351, "learning_rate": 9.097895219856218e-05, "loss": 0.7864, "step": 60820 }, { "epoch": 0.38862553186052157, "grad_norm": 1.6993827819824219, "learning_rate": 9.09760770286403e-05, "loss": 0.7552, "step": 60830 }, { "epoch": 0.3886894190102603, "grad_norm": 0.8872599601745605, "learning_rate": 9.09732014460539e-05, "loss": 1.1259, "step": 60840 }, { "epoch": 0.388753306159999, "grad_norm": 0.8446595072746277, "learning_rate": 9.097032545083191e-05, "loss": 0.7728, "step": 60850 }, { "epoch": 0.3888171933097377, "grad_norm": 0.7190898656845093, "learning_rate": 9.09674490430033e-05, "loss": 1.0357, "step": 60860 }, { "epoch": 0.3888810804594764, "grad_norm": 0.8590859770774841, "learning_rate": 9.096457222259702e-05, "loss": 0.7801, "step": 60870 }, { "epoch": 0.3889449676092151, "grad_norm": 0.994317889213562, "learning_rate": 9.096169498964206e-05, "loss": 0.9578, "step": 60880 }, { "epoch": 0.3890088547589538, "grad_norm": 1.0959383249282837, "learning_rate": 9.095881734416742e-05, "loss": 0.7354, "step": 60890 }, { "epoch": 0.3890727419086925, "grad_norm": 1.1300466060638428, "learning_rate": 9.095593928620203e-05, "loss": 1.2792, "step": 60900 }, { "epoch": 0.38913662905843116, "grad_norm": 0.9118770360946655, "learning_rate": 9.095306081577491e-05, "loss": 0.8323, "step": 60910 }, { "epoch": 0.38920051620816987, "grad_norm": 0.5770663022994995, "learning_rate": 9.095018193291504e-05, "loss": 0.7813, "step": 60920 }, { "epoch": 0.3892644033579086, "grad_norm": 1.2142269611358643, "learning_rate": 9.094730263765141e-05, "loss": 0.6744, "step": 60930 }, { "epoch": 0.3893282905076473, "grad_norm": 0.6319569945335388, "learning_rate": 9.094442293001301e-05, "loss": 0.7512, "step": 60940 }, { "epoch": 0.389392177657386, "grad_norm": 0.9332210421562195, "learning_rate": 9.094154281002884e-05, "loss": 0.9045, "step": 60950 }, { "epoch": 0.3894560648071247, "grad_norm": 0.786271870136261, "learning_rate": 9.093866227772794e-05, "loss": 1.1151, "step": 60960 }, { "epoch": 0.3895199519568634, "grad_norm": 0.8566588163375854, "learning_rate": 9.093578133313928e-05, "loss": 0.7992, "step": 60970 }, { "epoch": 0.3895838391066021, "grad_norm": 0.7604480385780334, "learning_rate": 9.093289997629188e-05, "loss": 0.924, "step": 60980 }, { "epoch": 0.3896477262563408, "grad_norm": 1.0149980783462524, "learning_rate": 9.093001820721479e-05, "loss": 1.0535, "step": 60990 }, { "epoch": 0.3897116134060795, "grad_norm": 1.085911512374878, "learning_rate": 9.092713602593699e-05, "loss": 0.7629, "step": 61000 }, { "epoch": 0.3897755005558182, "grad_norm": 1.1118038892745972, "learning_rate": 9.092425343248753e-05, "loss": 0.8315, "step": 61010 }, { "epoch": 0.38983938770555693, "grad_norm": 0.49953410029411316, "learning_rate": 9.092137042689542e-05, "loss": 0.8272, "step": 61020 }, { "epoch": 0.3899032748552956, "grad_norm": 0.703426718711853, "learning_rate": 9.091848700918973e-05, "loss": 0.8759, "step": 61030 }, { "epoch": 0.3899671620050343, "grad_norm": 1.1554392576217651, "learning_rate": 9.091560317939946e-05, "loss": 0.9506, "step": 61040 }, { "epoch": 0.390031049154773, "grad_norm": 0.745389997959137, "learning_rate": 9.091271893755367e-05, "loss": 0.7726, "step": 61050 }, { "epoch": 0.3900949363045117, "grad_norm": 0.6152491569519043, "learning_rate": 9.090983428368141e-05, "loss": 0.896, "step": 61060 }, { "epoch": 0.3901588234542504, "grad_norm": 2.3798322677612305, "learning_rate": 9.09069492178117e-05, "loss": 0.9652, "step": 61070 }, { "epoch": 0.3902227106039891, "grad_norm": 0.8589335680007935, "learning_rate": 9.090435230629522e-05, "loss": 0.772, "step": 61080 }, { "epoch": 0.3902865977537278, "grad_norm": 0.7509768009185791, "learning_rate": 9.090146645771047e-05, "loss": 0.9196, "step": 61090 }, { "epoch": 0.3903504849034665, "grad_norm": 0.9738487005233765, "learning_rate": 9.089858019721258e-05, "loss": 0.8836, "step": 61100 }, { "epoch": 0.3904143720532052, "grad_norm": 2.166499137878418, "learning_rate": 9.089569352483061e-05, "loss": 1.2521, "step": 61110 }, { "epoch": 0.39047825920294393, "grad_norm": 0.9337096214294434, "learning_rate": 9.089280644059361e-05, "loss": 0.8933, "step": 61120 }, { "epoch": 0.39054214635268264, "grad_norm": 1.1011388301849365, "learning_rate": 9.088991894453069e-05, "loss": 0.7827, "step": 61130 }, { "epoch": 0.39060603350242135, "grad_norm": 1.5726940631866455, "learning_rate": 9.08870310366709e-05, "loss": 1.0105, "step": 61140 }, { "epoch": 0.39066992065216005, "grad_norm": 0.6980756521224976, "learning_rate": 9.088414271704334e-05, "loss": 1.1352, "step": 61150 }, { "epoch": 0.3907338078018987, "grad_norm": 0.9901998043060303, "learning_rate": 9.088125398567708e-05, "loss": 0.8634, "step": 61160 }, { "epoch": 0.3907976949516374, "grad_norm": 0.7848410606384277, "learning_rate": 9.087836484260125e-05, "loss": 0.968, "step": 61170 }, { "epoch": 0.3908615821013761, "grad_norm": 2.4346492290496826, "learning_rate": 9.08754752878449e-05, "loss": 0.8797, "step": 61180 }, { "epoch": 0.3909254692511148, "grad_norm": 0.5621653199195862, "learning_rate": 9.087258532143716e-05, "loss": 0.8708, "step": 61190 }, { "epoch": 0.3909893564008535, "grad_norm": 0.6077272891998291, "learning_rate": 9.086969494340714e-05, "loss": 0.9137, "step": 61200 }, { "epoch": 0.39105324355059223, "grad_norm": 1.679137945175171, "learning_rate": 9.08668041537839e-05, "loss": 1.0507, "step": 61210 }, { "epoch": 0.39111713070033094, "grad_norm": 0.7337985634803772, "learning_rate": 9.086391295259662e-05, "loss": 0.7978, "step": 61220 }, { "epoch": 0.39118101785006965, "grad_norm": 0.8496336340904236, "learning_rate": 9.086102133987436e-05, "loss": 1.0827, "step": 61230 }, { "epoch": 0.39124490499980835, "grad_norm": 1.5202635526657104, "learning_rate": 9.085812931564627e-05, "loss": 0.7946, "step": 61240 }, { "epoch": 0.39130879214954706, "grad_norm": 1.236046314239502, "learning_rate": 9.085523687994148e-05, "loss": 0.6731, "step": 61250 }, { "epoch": 0.39137267929928576, "grad_norm": 0.6897780895233154, "learning_rate": 9.085234403278912e-05, "loss": 0.9761, "step": 61260 }, { "epoch": 0.39143656644902447, "grad_norm": 2.731182098388672, "learning_rate": 9.08494507742183e-05, "loss": 0.6203, "step": 61270 }, { "epoch": 0.3915004535987631, "grad_norm": 1.2654629945755005, "learning_rate": 9.084655710425817e-05, "loss": 0.9412, "step": 61280 }, { "epoch": 0.3915643407485018, "grad_norm": 0.9175102114677429, "learning_rate": 9.084366302293787e-05, "loss": 0.7672, "step": 61290 }, { "epoch": 0.39162822789824053, "grad_norm": 0.8177767395973206, "learning_rate": 9.084076853028656e-05, "loss": 0.9016, "step": 61300 }, { "epoch": 0.39169211504797924, "grad_norm": 0.6506124138832092, "learning_rate": 9.083787362633336e-05, "loss": 0.7544, "step": 61310 }, { "epoch": 0.39175600219771795, "grad_norm": 0.7509700655937195, "learning_rate": 9.083497831110745e-05, "loss": 0.6952, "step": 61320 }, { "epoch": 0.39181988934745665, "grad_norm": 0.8444516658782959, "learning_rate": 9.0832082584638e-05, "loss": 0.7465, "step": 61330 }, { "epoch": 0.39188377649719536, "grad_norm": 0.7589380145072937, "learning_rate": 9.082918644695413e-05, "loss": 0.7664, "step": 61340 }, { "epoch": 0.39194766364693406, "grad_norm": 4.895397186279297, "learning_rate": 9.082628989808504e-05, "loss": 1.0256, "step": 61350 }, { "epoch": 0.39201155079667277, "grad_norm": 0.7632289528846741, "learning_rate": 9.082339293805988e-05, "loss": 1.2474, "step": 61360 }, { "epoch": 0.3920754379464115, "grad_norm": 0.765465259552002, "learning_rate": 9.082049556690786e-05, "loss": 0.9572, "step": 61370 }, { "epoch": 0.3921393250961502, "grad_norm": 0.6176701188087463, "learning_rate": 9.081759778465811e-05, "loss": 0.8701, "step": 61380 }, { "epoch": 0.3922032122458889, "grad_norm": 1.1706359386444092, "learning_rate": 9.081469959133986e-05, "loss": 0.8748, "step": 61390 }, { "epoch": 0.39226709939562754, "grad_norm": 1.032160758972168, "learning_rate": 9.081180098698225e-05, "loss": 0.8505, "step": 61400 }, { "epoch": 0.39233098654536624, "grad_norm": 1.0283243656158447, "learning_rate": 9.080890197161452e-05, "loss": 0.7096, "step": 61410 }, { "epoch": 0.39239487369510495, "grad_norm": 1.100449800491333, "learning_rate": 9.080600254526583e-05, "loss": 0.9363, "step": 61420 }, { "epoch": 0.39245876084484366, "grad_norm": 1.9551182985305786, "learning_rate": 9.080310270796539e-05, "loss": 0.795, "step": 61430 }, { "epoch": 0.39252264799458236, "grad_norm": 1.056577205657959, "learning_rate": 9.080020245974241e-05, "loss": 0.8075, "step": 61440 }, { "epoch": 0.39258653514432107, "grad_norm": 0.6849813461303711, "learning_rate": 9.07973018006261e-05, "loss": 0.9542, "step": 61450 }, { "epoch": 0.3926504222940598, "grad_norm": 0.8313121199607849, "learning_rate": 9.079440073064567e-05, "loss": 1.0857, "step": 61460 }, { "epoch": 0.3927143094437985, "grad_norm": 0.7464626431465149, "learning_rate": 9.079149924983031e-05, "loss": 0.6962, "step": 61470 }, { "epoch": 0.3927781965935372, "grad_norm": 1.59227454662323, "learning_rate": 9.078859735820928e-05, "loss": 0.8309, "step": 61480 }, { "epoch": 0.3928420837432759, "grad_norm": 0.6378403306007385, "learning_rate": 9.078569505581178e-05, "loss": 0.7235, "step": 61490 }, { "epoch": 0.3929059708930146, "grad_norm": 0.43592649698257446, "learning_rate": 9.078279234266705e-05, "loss": 0.8301, "step": 61500 }, { "epoch": 0.3929698580427533, "grad_norm": 0.9196266531944275, "learning_rate": 9.077988921880431e-05, "loss": 0.8455, "step": 61510 }, { "epoch": 0.39303374519249196, "grad_norm": 0.7270370721817017, "learning_rate": 9.077698568425283e-05, "loss": 0.8118, "step": 61520 }, { "epoch": 0.39309763234223066, "grad_norm": 0.693191647529602, "learning_rate": 9.07740817390418e-05, "loss": 0.9402, "step": 61530 }, { "epoch": 0.39316151949196937, "grad_norm": 0.7091450691223145, "learning_rate": 9.077117738320051e-05, "loss": 0.7799, "step": 61540 }, { "epoch": 0.3932254066417081, "grad_norm": 1.02108633518219, "learning_rate": 9.07682726167582e-05, "loss": 0.9665, "step": 61550 }, { "epoch": 0.3932892937914468, "grad_norm": 1.1987274885177612, "learning_rate": 9.07653674397441e-05, "loss": 0.9019, "step": 61560 }, { "epoch": 0.3933531809411855, "grad_norm": 0.7170557379722595, "learning_rate": 9.076246185218747e-05, "loss": 0.9895, "step": 61570 }, { "epoch": 0.3934170680909242, "grad_norm": 1.2851723432540894, "learning_rate": 9.07595558541176e-05, "loss": 0.8972, "step": 61580 }, { "epoch": 0.3934809552406629, "grad_norm": 0.7113538384437561, "learning_rate": 9.075664944556374e-05, "loss": 0.8101, "step": 61590 }, { "epoch": 0.3935448423904016, "grad_norm": 1.113052487373352, "learning_rate": 9.075374262655516e-05, "loss": 0.8718, "step": 61600 }, { "epoch": 0.3936087295401403, "grad_norm": 0.9161044955253601, "learning_rate": 9.075083539712113e-05, "loss": 0.8209, "step": 61610 }, { "epoch": 0.393672616689879, "grad_norm": 0.9524838328361511, "learning_rate": 9.074792775729096e-05, "loss": 0.9234, "step": 61620 }, { "epoch": 0.3937365038396177, "grad_norm": 1.2486933469772339, "learning_rate": 9.074501970709385e-05, "loss": 0.7753, "step": 61630 }, { "epoch": 0.3938003909893564, "grad_norm": 0.8280370831489563, "learning_rate": 9.07421112465592e-05, "loss": 1.0375, "step": 61640 }, { "epoch": 0.3938642781390951, "grad_norm": 0.9013057947158813, "learning_rate": 9.07392023757162e-05, "loss": 0.9043, "step": 61650 }, { "epoch": 0.3939281652888338, "grad_norm": 0.9092079401016235, "learning_rate": 9.073629309459422e-05, "loss": 0.9026, "step": 61660 }, { "epoch": 0.3939920524385725, "grad_norm": 1.4664134979248047, "learning_rate": 9.07333834032225e-05, "loss": 0.6136, "step": 61670 }, { "epoch": 0.3940559395883112, "grad_norm": 1.106016755104065, "learning_rate": 9.07304733016304e-05, "loss": 1.0314, "step": 61680 }, { "epoch": 0.3941198267380499, "grad_norm": 0.8790785670280457, "learning_rate": 9.072756278984717e-05, "loss": 1.0497, "step": 61690 }, { "epoch": 0.3941837138877886, "grad_norm": 1.431808590888977, "learning_rate": 9.072465186790215e-05, "loss": 0.9975, "step": 61700 }, { "epoch": 0.3942476010375273, "grad_norm": 0.8433964252471924, "learning_rate": 9.072174053582468e-05, "loss": 0.6958, "step": 61710 }, { "epoch": 0.394311488187266, "grad_norm": 0.829806923866272, "learning_rate": 9.071882879364402e-05, "loss": 1.0986, "step": 61720 }, { "epoch": 0.39437537533700473, "grad_norm": 0.8924597501754761, "learning_rate": 9.071591664138954e-05, "loss": 0.9314, "step": 61730 }, { "epoch": 0.39443926248674344, "grad_norm": 0.7619827389717102, "learning_rate": 9.071300407909056e-05, "loss": 0.9549, "step": 61740 }, { "epoch": 0.39450314963648214, "grad_norm": 0.6050899028778076, "learning_rate": 9.07100911067764e-05, "loss": 0.9636, "step": 61750 }, { "epoch": 0.3945670367862208, "grad_norm": 1.1481192111968994, "learning_rate": 9.070717772447641e-05, "loss": 0.743, "step": 61760 }, { "epoch": 0.3946309239359595, "grad_norm": 1.505147099494934, "learning_rate": 9.070426393221993e-05, "loss": 0.7202, "step": 61770 }, { "epoch": 0.3946948110856982, "grad_norm": 1.0512402057647705, "learning_rate": 9.070134973003628e-05, "loss": 0.8743, "step": 61780 }, { "epoch": 0.3947586982354369, "grad_norm": 0.7054274082183838, "learning_rate": 9.069843511795484e-05, "loss": 0.9366, "step": 61790 }, { "epoch": 0.3948225853851756, "grad_norm": 0.6536909937858582, "learning_rate": 9.069552009600494e-05, "loss": 0.7258, "step": 61800 }, { "epoch": 0.3948864725349143, "grad_norm": 0.7718044519424438, "learning_rate": 9.069260466421596e-05, "loss": 1.0622, "step": 61810 }, { "epoch": 0.39495035968465303, "grad_norm": 0.991255521774292, "learning_rate": 9.068968882261723e-05, "loss": 1.0272, "step": 61820 }, { "epoch": 0.39501424683439174, "grad_norm": 5.583859443664551, "learning_rate": 9.068677257123815e-05, "loss": 0.8138, "step": 61830 }, { "epoch": 0.39507813398413044, "grad_norm": 3.004866123199463, "learning_rate": 9.068385591010805e-05, "loss": 1.1612, "step": 61840 }, { "epoch": 0.39514202113386915, "grad_norm": 0.7518250346183777, "learning_rate": 9.068093883925633e-05, "loss": 0.8184, "step": 61850 }, { "epoch": 0.39520590828360785, "grad_norm": 1.5399583578109741, "learning_rate": 9.067802135871237e-05, "loss": 0.8756, "step": 61860 }, { "epoch": 0.39526979543334656, "grad_norm": 2.1497974395751953, "learning_rate": 9.067510346850554e-05, "loss": 1.1971, "step": 61870 }, { "epoch": 0.3953336825830852, "grad_norm": 0.8201958537101746, "learning_rate": 9.067218516866523e-05, "loss": 1.0288, "step": 61880 }, { "epoch": 0.3953975697328239, "grad_norm": 1.203514575958252, "learning_rate": 9.066926645922084e-05, "loss": 1.0717, "step": 61890 }, { "epoch": 0.3954614568825626, "grad_norm": 0.8252068161964417, "learning_rate": 9.066634734020174e-05, "loss": 0.8844, "step": 61900 }, { "epoch": 0.39552534403230133, "grad_norm": 0.7639890313148499, "learning_rate": 9.066342781163733e-05, "loss": 0.907, "step": 61910 }, { "epoch": 0.39558923118204004, "grad_norm": 0.8897015452384949, "learning_rate": 9.066050787355704e-05, "loss": 0.7727, "step": 61920 }, { "epoch": 0.39565311833177874, "grad_norm": 0.7301774024963379, "learning_rate": 9.065758752599026e-05, "loss": 0.9699, "step": 61930 }, { "epoch": 0.39571700548151745, "grad_norm": 1.1246193647384644, "learning_rate": 9.065466676896639e-05, "loss": 0.7621, "step": 61940 }, { "epoch": 0.39578089263125615, "grad_norm": 0.7929351329803467, "learning_rate": 9.065174560251487e-05, "loss": 0.8905, "step": 61950 }, { "epoch": 0.39584477978099486, "grad_norm": 1.0358200073242188, "learning_rate": 9.064882402666508e-05, "loss": 0.7801, "step": 61960 }, { "epoch": 0.39590866693073357, "grad_norm": 0.7412776947021484, "learning_rate": 9.064590204144647e-05, "loss": 0.9169, "step": 61970 }, { "epoch": 0.39597255408047227, "grad_norm": 0.7912229895591736, "learning_rate": 9.064297964688848e-05, "loss": 0.7336, "step": 61980 }, { "epoch": 0.396036441230211, "grad_norm": 0.7048013210296631, "learning_rate": 9.064005684302051e-05, "loss": 0.7669, "step": 61990 }, { "epoch": 0.3961003283799497, "grad_norm": 0.594420850276947, "learning_rate": 9.063713362987201e-05, "loss": 0.858, "step": 62000 }, { "epoch": 0.39616421552968833, "grad_norm": 0.6770893931388855, "learning_rate": 9.063421000747243e-05, "loss": 0.931, "step": 62010 }, { "epoch": 0.39622810267942704, "grad_norm": 0.9604712128639221, "learning_rate": 9.06312859758512e-05, "loss": 0.8397, "step": 62020 }, { "epoch": 0.39629198982916575, "grad_norm": 0.693006157875061, "learning_rate": 9.062836153503775e-05, "loss": 0.9519, "step": 62030 }, { "epoch": 0.39635587697890445, "grad_norm": 0.6312511563301086, "learning_rate": 9.062543668506156e-05, "loss": 0.9113, "step": 62040 }, { "epoch": 0.39641976412864316, "grad_norm": 0.7017596364021301, "learning_rate": 9.062251142595208e-05, "loss": 0.6917, "step": 62050 }, { "epoch": 0.39648365127838187, "grad_norm": 0.5928127765655518, "learning_rate": 9.061958575773876e-05, "loss": 0.9722, "step": 62060 }, { "epoch": 0.39654753842812057, "grad_norm": 1.742937445640564, "learning_rate": 9.06166596804511e-05, "loss": 0.9624, "step": 62070 }, { "epoch": 0.3966114255778593, "grad_norm": 0.7170588374137878, "learning_rate": 9.06137331941185e-05, "loss": 1.2726, "step": 62080 }, { "epoch": 0.396675312727598, "grad_norm": 1.1127740144729614, "learning_rate": 9.06108062987705e-05, "loss": 1.0994, "step": 62090 }, { "epoch": 0.3967391998773367, "grad_norm": 1.1925806999206543, "learning_rate": 9.060787899443652e-05, "loss": 0.8594, "step": 62100 }, { "epoch": 0.3968030870270754, "grad_norm": 0.9429210424423218, "learning_rate": 9.060495128114607e-05, "loss": 0.8756, "step": 62110 }, { "epoch": 0.3968669741768141, "grad_norm": 0.5963910222053528, "learning_rate": 9.060202315892866e-05, "loss": 0.9714, "step": 62120 }, { "epoch": 0.39693086132655275, "grad_norm": 1.1170175075531006, "learning_rate": 9.059909462781373e-05, "loss": 0.8631, "step": 62130 }, { "epoch": 0.39699474847629146, "grad_norm": 0.8226674199104309, "learning_rate": 9.05961656878308e-05, "loss": 0.8723, "step": 62140 }, { "epoch": 0.39705863562603017, "grad_norm": 0.8132166862487793, "learning_rate": 9.059323633900936e-05, "loss": 0.8302, "step": 62150 }, { "epoch": 0.39712252277576887, "grad_norm": 1.661969542503357, "learning_rate": 9.059030658137892e-05, "loss": 0.6955, "step": 62160 }, { "epoch": 0.3971864099255076, "grad_norm": 1.1347659826278687, "learning_rate": 9.058737641496896e-05, "loss": 0.8491, "step": 62170 }, { "epoch": 0.3972502970752463, "grad_norm": 0.8600234985351562, "learning_rate": 9.058444583980901e-05, "loss": 0.8166, "step": 62180 }, { "epoch": 0.397314184224985, "grad_norm": 1.037380337715149, "learning_rate": 9.058151485592858e-05, "loss": 1.0938, "step": 62190 }, { "epoch": 0.3973780713747237, "grad_norm": 0.8460977673530579, "learning_rate": 9.057858346335719e-05, "loss": 0.7327, "step": 62200 }, { "epoch": 0.3974419585244624, "grad_norm": 1.0163371562957764, "learning_rate": 9.057565166212436e-05, "loss": 0.8889, "step": 62210 }, { "epoch": 0.3975058456742011, "grad_norm": 0.8460572361946106, "learning_rate": 9.057271945225962e-05, "loss": 0.9403, "step": 62220 }, { "epoch": 0.3975697328239398, "grad_norm": 0.8837966322898865, "learning_rate": 9.056978683379249e-05, "loss": 0.8961, "step": 62230 }, { "epoch": 0.3976336199736785, "grad_norm": 2.452587127685547, "learning_rate": 9.056685380675251e-05, "loss": 0.7814, "step": 62240 }, { "epoch": 0.39769750712341717, "grad_norm": 1.0550041198730469, "learning_rate": 9.056392037116922e-05, "loss": 0.944, "step": 62250 }, { "epoch": 0.3977613942731559, "grad_norm": 0.6195732951164246, "learning_rate": 9.056098652707215e-05, "loss": 0.8217, "step": 62260 }, { "epoch": 0.3978252814228946, "grad_norm": 0.9907122254371643, "learning_rate": 9.055805227449086e-05, "loss": 0.9645, "step": 62270 }, { "epoch": 0.3978891685726333, "grad_norm": 0.8403761386871338, "learning_rate": 9.05551176134549e-05, "loss": 0.8914, "step": 62280 }, { "epoch": 0.397953055722372, "grad_norm": 0.880027174949646, "learning_rate": 9.055218254399382e-05, "loss": 0.8041, "step": 62290 }, { "epoch": 0.3980169428721107, "grad_norm": 1.1135274171829224, "learning_rate": 9.054924706613716e-05, "loss": 0.9497, "step": 62300 }, { "epoch": 0.3980808300218494, "grad_norm": 0.6633336544036865, "learning_rate": 9.054631117991453e-05, "loss": 0.9701, "step": 62310 }, { "epoch": 0.3981447171715881, "grad_norm": 0.9444105625152588, "learning_rate": 9.054337488535546e-05, "loss": 0.7332, "step": 62320 }, { "epoch": 0.3982086043213268, "grad_norm": 1.4530045986175537, "learning_rate": 9.054043818248952e-05, "loss": 0.7233, "step": 62330 }, { "epoch": 0.3982724914710655, "grad_norm": 0.8733393549919128, "learning_rate": 9.053750107134631e-05, "loss": 0.9144, "step": 62340 }, { "epoch": 0.39833637862080423, "grad_norm": 0.6644203662872314, "learning_rate": 9.053456355195537e-05, "loss": 0.874, "step": 62350 }, { "epoch": 0.39840026577054294, "grad_norm": 0.9878085851669312, "learning_rate": 9.053162562434633e-05, "loss": 1.2423, "step": 62360 }, { "epoch": 0.3984641529202816, "grad_norm": 1.3864879608154297, "learning_rate": 9.052868728854876e-05, "loss": 0.9242, "step": 62370 }, { "epoch": 0.3985280400700203, "grad_norm": 0.777885377407074, "learning_rate": 9.052574854459223e-05, "loss": 0.9149, "step": 62380 }, { "epoch": 0.398591927219759, "grad_norm": 0.8761781454086304, "learning_rate": 9.052280939250636e-05, "loss": 1.0996, "step": 62390 }, { "epoch": 0.3986558143694977, "grad_norm": 0.5955054759979248, "learning_rate": 9.051986983232073e-05, "loss": 0.8387, "step": 62400 }, { "epoch": 0.3987197015192364, "grad_norm": 0.9722325205802917, "learning_rate": 9.051692986406496e-05, "loss": 1.0697, "step": 62410 }, { "epoch": 0.3987835886689751, "grad_norm": 0.6643452048301697, "learning_rate": 9.051398948776868e-05, "loss": 1.2275, "step": 62420 }, { "epoch": 0.3988474758187138, "grad_norm": 1.9424246549606323, "learning_rate": 9.051104870346146e-05, "loss": 0.8924, "step": 62430 }, { "epoch": 0.39891136296845253, "grad_norm": 0.771974503993988, "learning_rate": 9.050810751117292e-05, "loss": 0.7818, "step": 62440 }, { "epoch": 0.39897525011819124, "grad_norm": 1.0370486974716187, "learning_rate": 9.05051659109327e-05, "loss": 0.9449, "step": 62450 }, { "epoch": 0.39903913726792994, "grad_norm": 1.289140224456787, "learning_rate": 9.050222390277041e-05, "loss": 0.7505, "step": 62460 }, { "epoch": 0.39910302441766865, "grad_norm": 0.7696613669395447, "learning_rate": 9.049928148671569e-05, "loss": 0.9424, "step": 62470 }, { "epoch": 0.39916691156740736, "grad_norm": 2.606376886367798, "learning_rate": 9.049633866279819e-05, "loss": 0.9175, "step": 62480 }, { "epoch": 0.399230798717146, "grad_norm": 0.9909952282905579, "learning_rate": 9.049339543104751e-05, "loss": 0.6305, "step": 62490 }, { "epoch": 0.3992946858668847, "grad_norm": 0.9084514379501343, "learning_rate": 9.04904517914933e-05, "loss": 0.8609, "step": 62500 }, { "epoch": 0.3993585730166234, "grad_norm": 1.2386984825134277, "learning_rate": 9.048750774416521e-05, "loss": 0.863, "step": 62510 }, { "epoch": 0.3994224601663621, "grad_norm": 1.0682573318481445, "learning_rate": 9.04845632890929e-05, "loss": 0.9319, "step": 62520 }, { "epoch": 0.39948634731610083, "grad_norm": 0.7610236406326294, "learning_rate": 9.048161842630602e-05, "loss": 0.7901, "step": 62530 }, { "epoch": 0.39955023446583954, "grad_norm": 0.8096383810043335, "learning_rate": 9.04786731558342e-05, "loss": 0.8542, "step": 62540 }, { "epoch": 0.39961412161557824, "grad_norm": 0.6308041214942932, "learning_rate": 9.047572747770713e-05, "loss": 0.9005, "step": 62550 }, { "epoch": 0.39967800876531695, "grad_norm": 1.0608285665512085, "learning_rate": 9.047278139195447e-05, "loss": 0.9082, "step": 62560 }, { "epoch": 0.39974189591505566, "grad_norm": 1.0790623426437378, "learning_rate": 9.04698348986059e-05, "loss": 0.9912, "step": 62570 }, { "epoch": 0.39980578306479436, "grad_norm": 0.5858973860740662, "learning_rate": 9.046688799769107e-05, "loss": 0.7241, "step": 62580 }, { "epoch": 0.39986967021453307, "grad_norm": 0.7396795153617859, "learning_rate": 9.046394068923967e-05, "loss": 0.8767, "step": 62590 }, { "epoch": 0.3999335573642718, "grad_norm": 0.5871313214302063, "learning_rate": 9.046099297328138e-05, "loss": 0.6491, "step": 62600 }, { "epoch": 0.3999974445140104, "grad_norm": 1.0462760925292969, "learning_rate": 9.045804484984588e-05, "loss": 0.9854, "step": 62610 }, { "epoch": 0.40006133166374913, "grad_norm": 1.0738905668258667, "learning_rate": 9.045509631896287e-05, "loss": 0.8662, "step": 62620 }, { "epoch": 0.40012521881348784, "grad_norm": 0.7057567834854126, "learning_rate": 9.045214738066206e-05, "loss": 0.736, "step": 62630 }, { "epoch": 0.40018910596322654, "grad_norm": 0.9611753225326538, "learning_rate": 9.044919803497312e-05, "loss": 0.794, "step": 62640 }, { "epoch": 0.40025299311296525, "grad_norm": 0.9139066934585571, "learning_rate": 9.044624828192573e-05, "loss": 0.7416, "step": 62650 }, { "epoch": 0.40031688026270396, "grad_norm": 0.7299910187721252, "learning_rate": 9.044329812154966e-05, "loss": 1.1855, "step": 62660 }, { "epoch": 0.40038076741244266, "grad_norm": 1.0329594612121582, "learning_rate": 9.04403475538746e-05, "loss": 0.7538, "step": 62670 }, { "epoch": 0.40044465456218137, "grad_norm": 0.8256815671920776, "learning_rate": 9.043739657893025e-05, "loss": 0.8794, "step": 62680 }, { "epoch": 0.4005085417119201, "grad_norm": 0.7287086248397827, "learning_rate": 9.043444519674631e-05, "loss": 1.0395, "step": 62690 }, { "epoch": 0.4005724288616588, "grad_norm": 0.7934675216674805, "learning_rate": 9.043149340735253e-05, "loss": 0.7567, "step": 62700 }, { "epoch": 0.4006363160113975, "grad_norm": 0.5604273676872253, "learning_rate": 9.042854121077865e-05, "loss": 1.0449, "step": 62710 }, { "epoch": 0.4007002031611362, "grad_norm": 1.5315760374069214, "learning_rate": 9.042558860705436e-05, "loss": 1.1271, "step": 62720 }, { "epoch": 0.40076409031087484, "grad_norm": 1.0402827262878418, "learning_rate": 9.042263559620945e-05, "loss": 0.8949, "step": 62730 }, { "epoch": 0.40082797746061355, "grad_norm": 0.6324530243873596, "learning_rate": 9.041968217827363e-05, "loss": 0.7596, "step": 62740 }, { "epoch": 0.40089186461035226, "grad_norm": 0.6953981518745422, "learning_rate": 9.041672835327661e-05, "loss": 0.6683, "step": 62750 }, { "epoch": 0.40095575176009096, "grad_norm": 0.8645387887954712, "learning_rate": 9.04137741212482e-05, "loss": 1.1533, "step": 62760 }, { "epoch": 0.40101963890982967, "grad_norm": 0.8752760291099548, "learning_rate": 9.04108194822181e-05, "loss": 0.9688, "step": 62770 }, { "epoch": 0.4010835260595684, "grad_norm": 1.0620766878128052, "learning_rate": 9.040786443621609e-05, "loss": 0.9483, "step": 62780 }, { "epoch": 0.4011474132093071, "grad_norm": 0.9805885553359985, "learning_rate": 9.040490898327194e-05, "loss": 0.9889, "step": 62790 }, { "epoch": 0.4012113003590458, "grad_norm": 1.2980453968048096, "learning_rate": 9.04019531234154e-05, "loss": 1.0088, "step": 62800 }, { "epoch": 0.4012751875087845, "grad_norm": 0.6901305913925171, "learning_rate": 9.039899685667624e-05, "loss": 0.841, "step": 62810 }, { "epoch": 0.4013390746585232, "grad_norm": 0.6811827421188354, "learning_rate": 9.039604018308423e-05, "loss": 0.7313, "step": 62820 }, { "epoch": 0.4014029618082619, "grad_norm": 1.0031507015228271, "learning_rate": 9.039308310266914e-05, "loss": 0.9193, "step": 62830 }, { "epoch": 0.4014668489580006, "grad_norm": 0.66957688331604, "learning_rate": 9.039012561546076e-05, "loss": 0.9917, "step": 62840 }, { "epoch": 0.4015307361077393, "grad_norm": 1.3045806884765625, "learning_rate": 9.038716772148888e-05, "loss": 0.8695, "step": 62850 }, { "epoch": 0.40159462325747797, "grad_norm": 0.8219857811927795, "learning_rate": 9.038420942078327e-05, "loss": 0.913, "step": 62860 }, { "epoch": 0.4016585104072167, "grad_norm": 1.7274596691131592, "learning_rate": 9.038125071337374e-05, "loss": 0.9524, "step": 62870 }, { "epoch": 0.4017223975569554, "grad_norm": 1.0028507709503174, "learning_rate": 9.037829159929008e-05, "loss": 0.8358, "step": 62880 }, { "epoch": 0.4017862847066941, "grad_norm": 1.3326009511947632, "learning_rate": 9.03753320785621e-05, "loss": 1.1543, "step": 62890 }, { "epoch": 0.4018501718564328, "grad_norm": 0.4937954246997833, "learning_rate": 9.037237215121958e-05, "loss": 1.0826, "step": 62900 }, { "epoch": 0.4019140590061715, "grad_norm": 1.1819350719451904, "learning_rate": 9.036941181729236e-05, "loss": 0.8164, "step": 62910 }, { "epoch": 0.4019779461559102, "grad_norm": 0.710355281829834, "learning_rate": 9.036645107681023e-05, "loss": 0.8995, "step": 62920 }, { "epoch": 0.4020418333056489, "grad_norm": 0.6797157526016235, "learning_rate": 9.036348992980301e-05, "loss": 0.9323, "step": 62930 }, { "epoch": 0.4021057204553876, "grad_norm": 0.6142218112945557, "learning_rate": 9.036052837630054e-05, "loss": 1.0316, "step": 62940 }, { "epoch": 0.4021696076051263, "grad_norm": 0.5623320937156677, "learning_rate": 9.035756641633264e-05, "loss": 1.0354, "step": 62950 }, { "epoch": 0.40223349475486503, "grad_norm": 0.6780490279197693, "learning_rate": 9.03546040499291e-05, "loss": 0.8209, "step": 62960 }, { "epoch": 0.40229738190460373, "grad_norm": 0.8299171328544617, "learning_rate": 9.035164127711981e-05, "loss": 0.9596, "step": 62970 }, { "epoch": 0.4023612690543424, "grad_norm": 0.6555722951889038, "learning_rate": 9.03486780979346e-05, "loss": 1.0825, "step": 62980 }, { "epoch": 0.4024251562040811, "grad_norm": 1.324913501739502, "learning_rate": 9.034571451240325e-05, "loss": 0.9062, "step": 62990 }, { "epoch": 0.4024890433538198, "grad_norm": 1.155165672302246, "learning_rate": 9.034275052055568e-05, "loss": 0.9358, "step": 63000 }, { "epoch": 0.4025529305035585, "grad_norm": 0.9214060306549072, "learning_rate": 9.03397861224217e-05, "loss": 0.6413, "step": 63010 }, { "epoch": 0.4026168176532972, "grad_norm": 0.9040579199790955, "learning_rate": 9.033682131803119e-05, "loss": 0.9746, "step": 63020 }, { "epoch": 0.4026807048030359, "grad_norm": 0.9403018355369568, "learning_rate": 9.033385610741398e-05, "loss": 0.8279, "step": 63030 }, { "epoch": 0.4027445919527746, "grad_norm": 0.9676703810691833, "learning_rate": 9.033089049059996e-05, "loss": 1.2033, "step": 63040 }, { "epoch": 0.40280847910251333, "grad_norm": 3.454418420791626, "learning_rate": 9.032792446761896e-05, "loss": 0.8704, "step": 63050 }, { "epoch": 0.40287236625225203, "grad_norm": 3.037147283554077, "learning_rate": 9.032495803850088e-05, "loss": 0.8457, "step": 63060 }, { "epoch": 0.40293625340199074, "grad_norm": 0.6617047190666199, "learning_rate": 9.032199120327558e-05, "loss": 0.8883, "step": 63070 }, { "epoch": 0.40300014055172945, "grad_norm": 0.987816333770752, "learning_rate": 9.031902396197296e-05, "loss": 0.791, "step": 63080 }, { "epoch": 0.40306402770146815, "grad_norm": 0.7392496466636658, "learning_rate": 9.031605631462288e-05, "loss": 0.8478, "step": 63090 }, { "epoch": 0.4031279148512068, "grad_norm": 1.0428085327148438, "learning_rate": 9.031308826125524e-05, "loss": 0.9056, "step": 63100 }, { "epoch": 0.4031918020009455, "grad_norm": 1.2448642253875732, "learning_rate": 9.031011980189992e-05, "loss": 0.8957, "step": 63110 }, { "epoch": 0.4032556891506842, "grad_norm": 0.9294689893722534, "learning_rate": 9.030715093658681e-05, "loss": 0.6793, "step": 63120 }, { "epoch": 0.4033195763004229, "grad_norm": 1.0956008434295654, "learning_rate": 9.030418166534585e-05, "loss": 1.106, "step": 63130 }, { "epoch": 0.40338346345016163, "grad_norm": 0.8035675883293152, "learning_rate": 9.030121198820688e-05, "loss": 0.8668, "step": 63140 }, { "epoch": 0.40344735059990033, "grad_norm": 1.7835280895233154, "learning_rate": 9.029824190519986e-05, "loss": 1.0177, "step": 63150 }, { "epoch": 0.40351123774963904, "grad_norm": 1.057437777519226, "learning_rate": 9.029527141635467e-05, "loss": 0.8812, "step": 63160 }, { "epoch": 0.40357512489937775, "grad_norm": 0.8696292638778687, "learning_rate": 9.029230052170123e-05, "loss": 0.8662, "step": 63170 }, { "epoch": 0.40363901204911645, "grad_norm": 0.9994892477989197, "learning_rate": 9.02893292212695e-05, "loss": 1.1962, "step": 63180 }, { "epoch": 0.40370289919885516, "grad_norm": 1.2672826051712036, "learning_rate": 9.028635751508933e-05, "loss": 0.95, "step": 63190 }, { "epoch": 0.40376678634859386, "grad_norm": 0.6766789555549622, "learning_rate": 9.02833854031907e-05, "loss": 0.7107, "step": 63200 }, { "epoch": 0.40383067349833257, "grad_norm": 1.4297183752059937, "learning_rate": 9.028041288560354e-05, "loss": 0.9061, "step": 63210 }, { "epoch": 0.4038945606480712, "grad_norm": 0.7802338004112244, "learning_rate": 9.027743996235775e-05, "loss": 0.858, "step": 63220 }, { "epoch": 0.4039584477978099, "grad_norm": 1.5008245706558228, "learning_rate": 9.027446663348333e-05, "loss": 1.1954, "step": 63230 }, { "epoch": 0.40402233494754863, "grad_norm": 0.9021018743515015, "learning_rate": 9.027149289901016e-05, "loss": 0.8044, "step": 63240 }, { "epoch": 0.40408622209728734, "grad_norm": 0.7308499217033386, "learning_rate": 9.026851875896822e-05, "loss": 1.0717, "step": 63250 }, { "epoch": 0.40415010924702605, "grad_norm": 0.8657183051109314, "learning_rate": 9.026554421338748e-05, "loss": 1.0214, "step": 63260 }, { "epoch": 0.40421399639676475, "grad_norm": 0.9111654162406921, "learning_rate": 9.026256926229786e-05, "loss": 0.863, "step": 63270 }, { "epoch": 0.40427788354650346, "grad_norm": 0.9648974537849426, "learning_rate": 9.025959390572933e-05, "loss": 0.7586, "step": 63280 }, { "epoch": 0.40434177069624216, "grad_norm": 0.8858680725097656, "learning_rate": 9.025661814371187e-05, "loss": 1.1628, "step": 63290 }, { "epoch": 0.40440565784598087, "grad_norm": 0.7507526874542236, "learning_rate": 9.025364197627543e-05, "loss": 0.7555, "step": 63300 }, { "epoch": 0.4044695449957196, "grad_norm": 0.7680438160896301, "learning_rate": 9.025066540345e-05, "loss": 1.0276, "step": 63310 }, { "epoch": 0.4045334321454583, "grad_norm": 1.0163004398345947, "learning_rate": 9.024768842526554e-05, "loss": 0.9563, "step": 63320 }, { "epoch": 0.404597319295197, "grad_norm": 0.7688309550285339, "learning_rate": 9.024471104175203e-05, "loss": 0.9156, "step": 63330 }, { "epoch": 0.40466120644493564, "grad_norm": 1.04693603515625, "learning_rate": 9.024173325293949e-05, "loss": 0.8006, "step": 63340 }, { "epoch": 0.40472509359467435, "grad_norm": 0.6474732160568237, "learning_rate": 9.023875505885786e-05, "loss": 0.5947, "step": 63350 }, { "epoch": 0.40478898074441305, "grad_norm": 0.6486718058586121, "learning_rate": 9.023577645953718e-05, "loss": 0.8847, "step": 63360 }, { "epoch": 0.40485286789415176, "grad_norm": 0.8913542628288269, "learning_rate": 9.023279745500738e-05, "loss": 1.0172, "step": 63370 }, { "epoch": 0.40491675504389046, "grad_norm": 0.629562497138977, "learning_rate": 9.022981804529853e-05, "loss": 0.9124, "step": 63380 }, { "epoch": 0.40498064219362917, "grad_norm": 0.6215500831604004, "learning_rate": 9.022683823044061e-05, "loss": 0.7679, "step": 63390 }, { "epoch": 0.4050445293433679, "grad_norm": 1.2027158737182617, "learning_rate": 9.022385801046363e-05, "loss": 0.9516, "step": 63400 }, { "epoch": 0.4051084164931066, "grad_norm": 1.0377594232559204, "learning_rate": 9.02208773853976e-05, "loss": 0.791, "step": 63410 }, { "epoch": 0.4051723036428453, "grad_norm": 0.5519084334373474, "learning_rate": 9.021789635527252e-05, "loss": 1.0029, "step": 63420 }, { "epoch": 0.405236190792584, "grad_norm": 0.8896793723106384, "learning_rate": 9.021491492011844e-05, "loss": 0.9838, "step": 63430 }, { "epoch": 0.4053000779423227, "grad_norm": 0.8860163688659668, "learning_rate": 9.021193307996538e-05, "loss": 1.2256, "step": 63440 }, { "epoch": 0.4053639650920614, "grad_norm": 1.1644326448440552, "learning_rate": 9.020895083484337e-05, "loss": 0.8291, "step": 63450 }, { "epoch": 0.40542785224180006, "grad_norm": 0.8265649676322937, "learning_rate": 9.020596818478244e-05, "loss": 1.0556, "step": 63460 }, { "epoch": 0.40549173939153876, "grad_norm": 1.3576620817184448, "learning_rate": 9.020298512981262e-05, "loss": 0.9018, "step": 63470 }, { "epoch": 0.40555562654127747, "grad_norm": 0.8418384194374084, "learning_rate": 9.020000166996397e-05, "loss": 0.949, "step": 63480 }, { "epoch": 0.4056195136910162, "grad_norm": 0.9804365634918213, "learning_rate": 9.01970178052665e-05, "loss": 1.0751, "step": 63490 }, { "epoch": 0.4056834008407549, "grad_norm": 1.0201619863510132, "learning_rate": 9.01940335357503e-05, "loss": 0.9828, "step": 63500 }, { "epoch": 0.4057472879904936, "grad_norm": 0.6420082449913025, "learning_rate": 9.019104886144543e-05, "loss": 0.7166, "step": 63510 }, { "epoch": 0.4058111751402323, "grad_norm": 0.6462534070014954, "learning_rate": 9.01880637823819e-05, "loss": 0.883, "step": 63520 }, { "epoch": 0.405875062289971, "grad_norm": 0.44123539328575134, "learning_rate": 9.018507829858981e-05, "loss": 0.8291, "step": 63530 }, { "epoch": 0.4059389494397097, "grad_norm": 0.8670223951339722, "learning_rate": 9.018209241009921e-05, "loss": 1.0204, "step": 63540 }, { "epoch": 0.4060028365894484, "grad_norm": 1.4019068479537964, "learning_rate": 9.017910611694018e-05, "loss": 0.6407, "step": 63550 }, { "epoch": 0.4060667237391871, "grad_norm": 0.8712426424026489, "learning_rate": 9.01761194191428e-05, "loss": 0.7192, "step": 63560 }, { "epoch": 0.4061306108889258, "grad_norm": 1.283201813697815, "learning_rate": 9.017313231673714e-05, "loss": 0.9573, "step": 63570 }, { "epoch": 0.4061944980386645, "grad_norm": 0.5912864208221436, "learning_rate": 9.017014480975327e-05, "loss": 0.6543, "step": 63580 }, { "epoch": 0.4062583851884032, "grad_norm": 0.8346386551856995, "learning_rate": 9.01671568982213e-05, "loss": 0.9464, "step": 63590 }, { "epoch": 0.4063222723381419, "grad_norm": 0.7532115578651428, "learning_rate": 9.016416858217131e-05, "loss": 0.7063, "step": 63600 }, { "epoch": 0.4063861594878806, "grad_norm": 0.8477169275283813, "learning_rate": 9.016117986163339e-05, "loss": 1.3564, "step": 63610 }, { "epoch": 0.4064500466376193, "grad_norm": 0.8860613703727722, "learning_rate": 9.015819073663765e-05, "loss": 0.9798, "step": 63620 }, { "epoch": 0.406513933787358, "grad_norm": 1.1103487014770508, "learning_rate": 9.015520120721419e-05, "loss": 1.222, "step": 63630 }, { "epoch": 0.4065778209370967, "grad_norm": 0.9043728709220886, "learning_rate": 9.015221127339311e-05, "loss": 1.0145, "step": 63640 }, { "epoch": 0.4066417080868354, "grad_norm": 1.244024634361267, "learning_rate": 9.01492209352045e-05, "loss": 0.8443, "step": 63650 }, { "epoch": 0.4067055952365741, "grad_norm": 0.982548713684082, "learning_rate": 9.014623019267853e-05, "loss": 0.9263, "step": 63660 }, { "epoch": 0.40676948238631283, "grad_norm": 0.7706730961799622, "learning_rate": 9.01432390458453e-05, "loss": 1.0515, "step": 63670 }, { "epoch": 0.40683336953605154, "grad_norm": 1.0974234342575073, "learning_rate": 9.014024749473491e-05, "loss": 0.8391, "step": 63680 }, { "epoch": 0.40689725668579024, "grad_norm": 0.8479081392288208, "learning_rate": 9.01372555393775e-05, "loss": 1.1546, "step": 63690 }, { "epoch": 0.40696114383552895, "grad_norm": 0.672386884689331, "learning_rate": 9.01342631798032e-05, "loss": 0.7925, "step": 63700 }, { "epoch": 0.4070250309852676, "grad_norm": 0.6552578806877136, "learning_rate": 9.013127041604217e-05, "loss": 0.8039, "step": 63710 }, { "epoch": 0.4070889181350063, "grad_norm": 0.7471428513526917, "learning_rate": 9.01282772481245e-05, "loss": 0.8832, "step": 63720 }, { "epoch": 0.407152805284745, "grad_norm": 0.871847927570343, "learning_rate": 9.012528367608037e-05, "loss": 0.87, "step": 63730 }, { "epoch": 0.4072166924344837, "grad_norm": 0.8684267401695251, "learning_rate": 9.012228969993992e-05, "loss": 1.0718, "step": 63740 }, { "epoch": 0.4072805795842224, "grad_norm": 1.1163896322250366, "learning_rate": 9.01192953197333e-05, "loss": 0.8171, "step": 63750 }, { "epoch": 0.40734446673396113, "grad_norm": 0.8885432481765747, "learning_rate": 9.011630053549069e-05, "loss": 0.7901, "step": 63760 }, { "epoch": 0.40740835388369984, "grad_norm": 0.9396836161613464, "learning_rate": 9.011330534724221e-05, "loss": 0.8013, "step": 63770 }, { "epoch": 0.40747224103343854, "grad_norm": 1.0267233848571777, "learning_rate": 9.011030975501804e-05, "loss": 0.8062, "step": 63780 }, { "epoch": 0.40753612818317725, "grad_norm": 0.8495022654533386, "learning_rate": 9.010731375884835e-05, "loss": 0.7481, "step": 63790 }, { "epoch": 0.40760001533291595, "grad_norm": 0.9368879199028015, "learning_rate": 9.010431735876332e-05, "loss": 1.0601, "step": 63800 }, { "epoch": 0.40766390248265466, "grad_norm": 0.8383790254592896, "learning_rate": 9.010132055479313e-05, "loss": 0.7068, "step": 63810 }, { "epoch": 0.40772778963239337, "grad_norm": 0.7061938643455505, "learning_rate": 9.009832334696792e-05, "loss": 1.0569, "step": 63820 }, { "epoch": 0.407791676782132, "grad_norm": 0.8540278673171997, "learning_rate": 9.009532573531793e-05, "loss": 1.0359, "step": 63830 }, { "epoch": 0.4078555639318707, "grad_norm": 1.0595225095748901, "learning_rate": 9.009232771987331e-05, "loss": 0.8767, "step": 63840 }, { "epoch": 0.40791945108160943, "grad_norm": 0.6768961548805237, "learning_rate": 9.008932930066428e-05, "loss": 1.0288, "step": 63850 }, { "epoch": 0.40798333823134814, "grad_norm": 0.642488420009613, "learning_rate": 9.0086330477721e-05, "loss": 0.9504, "step": 63860 }, { "epoch": 0.40804722538108684, "grad_norm": 0.9785758852958679, "learning_rate": 9.008333125107371e-05, "loss": 0.9287, "step": 63870 }, { "epoch": 0.40811111253082555, "grad_norm": 0.949464738368988, "learning_rate": 9.008033162075259e-05, "loss": 0.9448, "step": 63880 }, { "epoch": 0.40817499968056425, "grad_norm": 1.017958164215088, "learning_rate": 9.007733158678787e-05, "loss": 0.8016, "step": 63890 }, { "epoch": 0.40823888683030296, "grad_norm": 3.1285202503204346, "learning_rate": 9.007433114920972e-05, "loss": 0.9056, "step": 63900 }, { "epoch": 0.40830277398004167, "grad_norm": 0.8148319721221924, "learning_rate": 9.00713303080484e-05, "loss": 1.0595, "step": 63910 }, { "epoch": 0.4083666611297804, "grad_norm": 0.773764967918396, "learning_rate": 9.006832906333411e-05, "loss": 0.8172, "step": 63920 }, { "epoch": 0.4084305482795191, "grad_norm": 0.8310877084732056, "learning_rate": 9.00653274150971e-05, "loss": 0.9826, "step": 63930 }, { "epoch": 0.4084944354292578, "grad_norm": 0.904560923576355, "learning_rate": 9.006232536336756e-05, "loss": 1.1305, "step": 63940 }, { "epoch": 0.40855832257899644, "grad_norm": 0.9716793298721313, "learning_rate": 9.005932290817576e-05, "loss": 0.7498, "step": 63950 }, { "epoch": 0.40862220972873514, "grad_norm": 1.2290844917297363, "learning_rate": 9.005632004955192e-05, "loss": 0.8095, "step": 63960 }, { "epoch": 0.40868609687847385, "grad_norm": 0.5113322734832764, "learning_rate": 9.005331678752629e-05, "loss": 0.9543, "step": 63970 }, { "epoch": 0.40874998402821255, "grad_norm": 0.6806952357292175, "learning_rate": 9.00503131221291e-05, "loss": 1.0935, "step": 63980 }, { "epoch": 0.40881387117795126, "grad_norm": 1.0288503170013428, "learning_rate": 9.00473090533906e-05, "loss": 0.8507, "step": 63990 }, { "epoch": 0.40887775832768997, "grad_norm": 0.6882049441337585, "learning_rate": 9.004430458134107e-05, "loss": 1.0034, "step": 64000 }, { "epoch": 0.4089416454774287, "grad_norm": 1.0259922742843628, "learning_rate": 9.004129970601074e-05, "loss": 1.11, "step": 64010 }, { "epoch": 0.4090055326271674, "grad_norm": 0.7232376337051392, "learning_rate": 9.003829442742989e-05, "loss": 1.0349, "step": 64020 }, { "epoch": 0.4090694197769061, "grad_norm": 0.7843562960624695, "learning_rate": 9.003528874562875e-05, "loss": 0.8303, "step": 64030 }, { "epoch": 0.4091333069266448, "grad_norm": 0.7340204119682312, "learning_rate": 9.003228266063765e-05, "loss": 0.8455, "step": 64040 }, { "epoch": 0.4091971940763835, "grad_norm": 0.618037760257721, "learning_rate": 9.00292761724868e-05, "loss": 0.8546, "step": 64050 }, { "epoch": 0.4092610812261222, "grad_norm": 0.4729726314544678, "learning_rate": 9.002626928120654e-05, "loss": 0.8056, "step": 64060 }, { "epoch": 0.40932496837586085, "grad_norm": 4.078080654144287, "learning_rate": 9.002326198682712e-05, "loss": 1.038, "step": 64070 }, { "epoch": 0.40938885552559956, "grad_norm": 0.8431742787361145, "learning_rate": 9.002025428937879e-05, "loss": 0.9887, "step": 64080 }, { "epoch": 0.40945274267533827, "grad_norm": 1.1777523756027222, "learning_rate": 9.00172461888919e-05, "loss": 0.729, "step": 64090 }, { "epoch": 0.40951662982507697, "grad_norm": 0.9393613338470459, "learning_rate": 9.001423768539672e-05, "loss": 0.9377, "step": 64100 }, { "epoch": 0.4095805169748157, "grad_norm": 0.6365240812301636, "learning_rate": 9.001122877892356e-05, "loss": 0.6823, "step": 64110 }, { "epoch": 0.4096444041245544, "grad_norm": 0.9078708291053772, "learning_rate": 9.00082194695027e-05, "loss": 0.8199, "step": 64120 }, { "epoch": 0.4097082912742931, "grad_norm": 2.3477323055267334, "learning_rate": 9.000520975716445e-05, "loss": 0.955, "step": 64130 }, { "epoch": 0.4097721784240318, "grad_norm": 0.5948389768600464, "learning_rate": 9.000219964193914e-05, "loss": 0.8302, "step": 64140 }, { "epoch": 0.4098360655737705, "grad_norm": 0.6397002935409546, "learning_rate": 8.999918912385708e-05, "loss": 0.8583, "step": 64150 }, { "epoch": 0.4098999527235092, "grad_norm": 1.9273061752319336, "learning_rate": 8.999617820294857e-05, "loss": 0.9524, "step": 64160 }, { "epoch": 0.4099638398732479, "grad_norm": 0.8526675701141357, "learning_rate": 8.999316687924395e-05, "loss": 0.9583, "step": 64170 }, { "epoch": 0.4100277270229866, "grad_norm": 0.9683032035827637, "learning_rate": 8.999015515277352e-05, "loss": 1.1449, "step": 64180 }, { "epoch": 0.41009161417272527, "grad_norm": 0.6847209930419922, "learning_rate": 8.998714302356766e-05, "loss": 0.8517, "step": 64190 }, { "epoch": 0.410155501322464, "grad_norm": 0.6209728717803955, "learning_rate": 8.998413049165666e-05, "loss": 0.8522, "step": 64200 }, { "epoch": 0.4102193884722027, "grad_norm": 0.6670452952384949, "learning_rate": 8.998111755707088e-05, "loss": 0.9287, "step": 64210 }, { "epoch": 0.4102832756219414, "grad_norm": 1.0811097621917725, "learning_rate": 8.997810421984065e-05, "loss": 0.9053, "step": 64220 }, { "epoch": 0.4103471627716801, "grad_norm": 0.8516783118247986, "learning_rate": 8.997509047999634e-05, "loss": 0.9318, "step": 64230 }, { "epoch": 0.4104110499214188, "grad_norm": 0.880136251449585, "learning_rate": 8.997207633756828e-05, "loss": 0.8013, "step": 64240 }, { "epoch": 0.4104749370711575, "grad_norm": 0.804787278175354, "learning_rate": 8.996906179258681e-05, "loss": 1.0225, "step": 64250 }, { "epoch": 0.4105388242208962, "grad_norm": 0.7522739171981812, "learning_rate": 8.996604684508234e-05, "loss": 0.9695, "step": 64260 }, { "epoch": 0.4106027113706349, "grad_norm": 0.702925443649292, "learning_rate": 8.996303149508518e-05, "loss": 0.7292, "step": 64270 }, { "epoch": 0.4106665985203736, "grad_norm": 1.3208075761795044, "learning_rate": 8.996001574262574e-05, "loss": 0.7754, "step": 64280 }, { "epoch": 0.41073048567011233, "grad_norm": 0.8626548051834106, "learning_rate": 8.995699958773435e-05, "loss": 0.8081, "step": 64290 }, { "epoch": 0.41079437281985104, "grad_norm": 0.8688634037971497, "learning_rate": 8.995398303044142e-05, "loss": 0.8649, "step": 64300 }, { "epoch": 0.4108582599695897, "grad_norm": 0.9219092130661011, "learning_rate": 8.995096607077731e-05, "loss": 0.8441, "step": 64310 }, { "epoch": 0.4109221471193284, "grad_norm": 1.0303871631622314, "learning_rate": 8.994794870877241e-05, "loss": 0.9238, "step": 64320 }, { "epoch": 0.4109860342690671, "grad_norm": 0.938761830329895, "learning_rate": 8.994493094445711e-05, "loss": 0.6409, "step": 64330 }, { "epoch": 0.4110499214188058, "grad_norm": 0.8566682934761047, "learning_rate": 8.99419127778618e-05, "loss": 0.9025, "step": 64340 }, { "epoch": 0.4111138085685445, "grad_norm": 0.8266015648841858, "learning_rate": 8.993889420901687e-05, "loss": 0.811, "step": 64350 }, { "epoch": 0.4111776957182832, "grad_norm": 0.9890789985656738, "learning_rate": 8.993587523795271e-05, "loss": 0.921, "step": 64360 }, { "epoch": 0.4112415828680219, "grad_norm": 0.8247410655021667, "learning_rate": 8.993285586469976e-05, "loss": 1.0017, "step": 64370 }, { "epoch": 0.41130547001776063, "grad_norm": 1.1178898811340332, "learning_rate": 8.992983608928839e-05, "loss": 0.9229, "step": 64380 }, { "epoch": 0.41136935716749934, "grad_norm": 0.6633570194244385, "learning_rate": 8.992681591174903e-05, "loss": 0.7906, "step": 64390 }, { "epoch": 0.41143324431723804, "grad_norm": 0.9946048855781555, "learning_rate": 8.99237953321121e-05, "loss": 0.8584, "step": 64400 }, { "epoch": 0.41149713146697675, "grad_norm": 1.1637663841247559, "learning_rate": 8.992077435040799e-05, "loss": 0.7197, "step": 64410 }, { "epoch": 0.41156101861671546, "grad_norm": 0.9917969703674316, "learning_rate": 8.991775296666717e-05, "loss": 0.9235, "step": 64420 }, { "epoch": 0.4116249057664541, "grad_norm": 0.6718196868896484, "learning_rate": 8.991473118092003e-05, "loss": 0.891, "step": 64430 }, { "epoch": 0.4116887929161928, "grad_norm": 0.6841692924499512, "learning_rate": 8.991170899319702e-05, "loss": 1.2224, "step": 64440 }, { "epoch": 0.4117526800659315, "grad_norm": 0.8956950306892395, "learning_rate": 8.990868640352857e-05, "loss": 0.7681, "step": 64450 }, { "epoch": 0.4118165672156702, "grad_norm": 0.8539284467697144, "learning_rate": 8.990566341194513e-05, "loss": 0.9718, "step": 64460 }, { "epoch": 0.41188045436540893, "grad_norm": 1.2791920900344849, "learning_rate": 8.990294237590787e-05, "loss": 0.885, "step": 64470 }, { "epoch": 0.41194434151514764, "grad_norm": 0.7224549651145935, "learning_rate": 8.989991862076981e-05, "loss": 0.8548, "step": 64480 }, { "epoch": 0.41200822866488634, "grad_norm": 0.8494675159454346, "learning_rate": 8.989689446380503e-05, "loss": 1.2577, "step": 64490 }, { "epoch": 0.41207211581462505, "grad_norm": 1.3127714395523071, "learning_rate": 8.989386990504402e-05, "loss": 0.9295, "step": 64500 }, { "epoch": 0.41213600296436376, "grad_norm": 1.0279886722564697, "learning_rate": 8.989084494451725e-05, "loss": 1.096, "step": 64510 }, { "epoch": 0.41219989011410246, "grad_norm": 1.2837331295013428, "learning_rate": 8.988781958225515e-05, "loss": 0.744, "step": 64520 }, { "epoch": 0.41226377726384117, "grad_norm": 0.7983502745628357, "learning_rate": 8.988479381828817e-05, "loss": 0.7822, "step": 64530 }, { "epoch": 0.4123276644135799, "grad_norm": 1.4588627815246582, "learning_rate": 8.988176765264684e-05, "loss": 0.9902, "step": 64540 }, { "epoch": 0.4123915515633186, "grad_norm": 0.8244463205337524, "learning_rate": 8.98787410853616e-05, "loss": 0.7243, "step": 64550 }, { "epoch": 0.41245543871305723, "grad_norm": 0.6158170104026794, "learning_rate": 8.987571411646292e-05, "loss": 0.9385, "step": 64560 }, { "epoch": 0.41251932586279594, "grad_norm": 1.447361707687378, "learning_rate": 8.987268674598133e-05, "loss": 0.8133, "step": 64570 }, { "epoch": 0.41258321301253464, "grad_norm": 0.8146925568580627, "learning_rate": 8.986965897394728e-05, "loss": 0.8676, "step": 64580 }, { "epoch": 0.41264710016227335, "grad_norm": 0.798591136932373, "learning_rate": 8.986663080039126e-05, "loss": 0.9362, "step": 64590 }, { "epoch": 0.41271098731201206, "grad_norm": 0.6926212906837463, "learning_rate": 8.986360222534377e-05, "loss": 0.7352, "step": 64600 }, { "epoch": 0.41277487446175076, "grad_norm": 0.8038867115974426, "learning_rate": 8.986057324883535e-05, "loss": 1.2618, "step": 64610 }, { "epoch": 0.41283876161148947, "grad_norm": 1.9613544940948486, "learning_rate": 8.985754387089647e-05, "loss": 0.7561, "step": 64620 }, { "epoch": 0.4129026487612282, "grad_norm": 0.8577353358268738, "learning_rate": 8.985451409155762e-05, "loss": 0.8224, "step": 64630 }, { "epoch": 0.4129665359109669, "grad_norm": 0.5765991806983948, "learning_rate": 8.985148391084934e-05, "loss": 0.8005, "step": 64640 }, { "epoch": 0.4130304230607056, "grad_norm": 0.4820258319377899, "learning_rate": 8.984845332880213e-05, "loss": 0.8121, "step": 64650 }, { "epoch": 0.4130943102104443, "grad_norm": 1.0253220796585083, "learning_rate": 8.984542234544656e-05, "loss": 0.667, "step": 64660 }, { "epoch": 0.413158197360183, "grad_norm": 0.6785098910331726, "learning_rate": 8.984239096081308e-05, "loss": 0.9457, "step": 64670 }, { "epoch": 0.41322208450992165, "grad_norm": 0.9571899771690369, "learning_rate": 8.983935917493227e-05, "loss": 1.1104, "step": 64680 }, { "epoch": 0.41328597165966036, "grad_norm": 0.9146550893783569, "learning_rate": 8.983632698783463e-05, "loss": 1.0036, "step": 64690 }, { "epoch": 0.41334985880939906, "grad_norm": 1.2323453426361084, "learning_rate": 8.983329439955075e-05, "loss": 0.8704, "step": 64700 }, { "epoch": 0.41341374595913777, "grad_norm": 1.2397221326828003, "learning_rate": 8.98302614101111e-05, "loss": 0.9207, "step": 64710 }, { "epoch": 0.4134776331088765, "grad_norm": 0.7488781213760376, "learning_rate": 8.982722801954627e-05, "loss": 0.7842, "step": 64720 }, { "epoch": 0.4135415202586152, "grad_norm": 2.860487461090088, "learning_rate": 8.98241942278868e-05, "loss": 0.7993, "step": 64730 }, { "epoch": 0.4136054074083539, "grad_norm": 0.9510349035263062, "learning_rate": 8.982116003516324e-05, "loss": 0.9748, "step": 64740 }, { "epoch": 0.4136692945580926, "grad_norm": 0.7281776666641235, "learning_rate": 8.981812544140615e-05, "loss": 0.8138, "step": 64750 }, { "epoch": 0.4137331817078313, "grad_norm": 1.0613374710083008, "learning_rate": 8.981509044664608e-05, "loss": 0.7931, "step": 64760 }, { "epoch": 0.41379706885757, "grad_norm": 0.5681692361831665, "learning_rate": 8.981205505091363e-05, "loss": 0.7882, "step": 64770 }, { "epoch": 0.4138609560073087, "grad_norm": 0.9813446998596191, "learning_rate": 8.980901925423932e-05, "loss": 0.8314, "step": 64780 }, { "epoch": 0.4139248431570474, "grad_norm": 0.9293444156646729, "learning_rate": 8.980598305665375e-05, "loss": 0.6781, "step": 64790 }, { "epoch": 0.41398873030678607, "grad_norm": 0.8885220289230347, "learning_rate": 8.98029464581875e-05, "loss": 0.8954, "step": 64800 }, { "epoch": 0.4140526174565248, "grad_norm": 0.7418517470359802, "learning_rate": 8.979990945887114e-05, "loss": 0.8223, "step": 64810 }, { "epoch": 0.4141165046062635, "grad_norm": 0.9399496912956238, "learning_rate": 8.979687205873526e-05, "loss": 0.9294, "step": 64820 }, { "epoch": 0.4141803917560022, "grad_norm": 0.7863808274269104, "learning_rate": 8.979383425781046e-05, "loss": 0.7421, "step": 64830 }, { "epoch": 0.4142442789057409, "grad_norm": 0.8157973289489746, "learning_rate": 8.97907960561273e-05, "loss": 0.7436, "step": 64840 }, { "epoch": 0.4143081660554796, "grad_norm": 0.8442412614822388, "learning_rate": 8.978775745371642e-05, "loss": 0.8952, "step": 64850 }, { "epoch": 0.4143720532052183, "grad_norm": 1.1635627746582031, "learning_rate": 8.978471845060838e-05, "loss": 0.9926, "step": 64860 }, { "epoch": 0.414435940354957, "grad_norm": 0.975397527217865, "learning_rate": 8.978167904683383e-05, "loss": 0.814, "step": 64870 }, { "epoch": 0.4144998275046957, "grad_norm": 0.6589390635490417, "learning_rate": 8.977863924242335e-05, "loss": 0.8215, "step": 64880 }, { "epoch": 0.4145637146544344, "grad_norm": 1.0602787733078003, "learning_rate": 8.977559903740756e-05, "loss": 0.7723, "step": 64890 }, { "epoch": 0.41462760180417313, "grad_norm": 0.7909052968025208, "learning_rate": 8.977255843181707e-05, "loss": 0.9256, "step": 64900 }, { "epoch": 0.41469148895391184, "grad_norm": 3.674098491668701, "learning_rate": 8.976951742568249e-05, "loss": 0.822, "step": 64910 }, { "epoch": 0.4147553761036505, "grad_norm": 1.0264021158218384, "learning_rate": 8.97664760190345e-05, "loss": 0.9519, "step": 64920 }, { "epoch": 0.4148192632533892, "grad_norm": 0.8971536755561829, "learning_rate": 8.976343421190367e-05, "loss": 0.8374, "step": 64930 }, { "epoch": 0.4148831504031279, "grad_norm": 0.4284789562225342, "learning_rate": 8.976039200432067e-05, "loss": 0.7519, "step": 64940 }, { "epoch": 0.4149470375528666, "grad_norm": 0.5948058366775513, "learning_rate": 8.975734939631612e-05, "loss": 0.847, "step": 64950 }, { "epoch": 0.4150109247026053, "grad_norm": 1.361910104751587, "learning_rate": 8.975430638792066e-05, "loss": 0.8689, "step": 64960 }, { "epoch": 0.415074811852344, "grad_norm": 1.295660376548767, "learning_rate": 8.975126297916495e-05, "loss": 0.8311, "step": 64970 }, { "epoch": 0.4151386990020827, "grad_norm": 1.1786941289901733, "learning_rate": 8.974821917007962e-05, "loss": 1.0239, "step": 64980 }, { "epoch": 0.41520258615182143, "grad_norm": 1.0737582445144653, "learning_rate": 8.974517496069536e-05, "loss": 0.9261, "step": 64990 }, { "epoch": 0.41526647330156014, "grad_norm": 0.853832483291626, "learning_rate": 8.97421303510428e-05, "loss": 0.9344, "step": 65000 }, { "epoch": 0.41533036045129884, "grad_norm": 2.251110553741455, "learning_rate": 8.973908534115259e-05, "loss": 1.0998, "step": 65010 }, { "epoch": 0.41539424760103755, "grad_norm": 0.8067615628242493, "learning_rate": 8.973603993105542e-05, "loss": 0.9716, "step": 65020 }, { "epoch": 0.41545813475077625, "grad_norm": 0.5543524622917175, "learning_rate": 8.973299412078194e-05, "loss": 0.7536, "step": 65030 }, { "epoch": 0.4155220219005149, "grad_norm": 1.8256611824035645, "learning_rate": 8.972994791036284e-05, "loss": 1.0459, "step": 65040 }, { "epoch": 0.4155859090502536, "grad_norm": 0.8897950053215027, "learning_rate": 8.97269012998288e-05, "loss": 0.9819, "step": 65050 }, { "epoch": 0.4156497961999923, "grad_norm": 0.9131116271018982, "learning_rate": 8.97238542892105e-05, "loss": 0.8402, "step": 65060 }, { "epoch": 0.415713683349731, "grad_norm": 1.1356046199798584, "learning_rate": 8.972080687853861e-05, "loss": 0.841, "step": 65070 }, { "epoch": 0.41577757049946973, "grad_norm": 0.8197834491729736, "learning_rate": 8.971775906784383e-05, "loss": 0.7874, "step": 65080 }, { "epoch": 0.41584145764920843, "grad_norm": 0.6989075541496277, "learning_rate": 8.971471085715686e-05, "loss": 0.8665, "step": 65090 }, { "epoch": 0.41590534479894714, "grad_norm": 0.732832670211792, "learning_rate": 8.97116622465084e-05, "loss": 0.6657, "step": 65100 }, { "epoch": 0.41596923194868585, "grad_norm": 0.8469944000244141, "learning_rate": 8.970861323592913e-05, "loss": 0.8977, "step": 65110 }, { "epoch": 0.41603311909842455, "grad_norm": 1.300403118133545, "learning_rate": 8.970556382544978e-05, "loss": 1.0034, "step": 65120 }, { "epoch": 0.41609700624816326, "grad_norm": 1.2970237731933594, "learning_rate": 8.970251401510107e-05, "loss": 1.0144, "step": 65130 }, { "epoch": 0.41616089339790197, "grad_norm": 1.0652505159378052, "learning_rate": 8.969946380491367e-05, "loss": 0.7254, "step": 65140 }, { "epoch": 0.41622478054764067, "grad_norm": 0.9304187297821045, "learning_rate": 8.969641319491833e-05, "loss": 0.884, "step": 65150 }, { "epoch": 0.4162886676973793, "grad_norm": 0.8894677758216858, "learning_rate": 8.969336218514579e-05, "loss": 1.1695, "step": 65160 }, { "epoch": 0.41635255484711803, "grad_norm": 0.7384070158004761, "learning_rate": 8.969031077562673e-05, "loss": 0.8618, "step": 65170 }, { "epoch": 0.41641644199685673, "grad_norm": 0.8503040671348572, "learning_rate": 8.968725896639189e-05, "loss": 1.1173, "step": 65180 }, { "epoch": 0.41648032914659544, "grad_norm": 1.213909387588501, "learning_rate": 8.968420675747204e-05, "loss": 0.5525, "step": 65190 }, { "epoch": 0.41654421629633415, "grad_norm": 0.8109204769134521, "learning_rate": 8.968115414889791e-05, "loss": 0.8147, "step": 65200 }, { "epoch": 0.41660810344607285, "grad_norm": 0.9055116772651672, "learning_rate": 8.967810114070022e-05, "loss": 0.8597, "step": 65210 }, { "epoch": 0.41667199059581156, "grad_norm": 0.7332736849784851, "learning_rate": 8.96750477329097e-05, "loss": 0.8135, "step": 65220 }, { "epoch": 0.41673587774555026, "grad_norm": 1.6133145093917847, "learning_rate": 8.967199392555714e-05, "loss": 0.5944, "step": 65230 }, { "epoch": 0.41679976489528897, "grad_norm": 1.0285025835037231, "learning_rate": 8.966893971867329e-05, "loss": 1.2345, "step": 65240 }, { "epoch": 0.4168636520450277, "grad_norm": 0.7970749139785767, "learning_rate": 8.966588511228888e-05, "loss": 0.9716, "step": 65250 }, { "epoch": 0.4169275391947664, "grad_norm": 0.9936865568161011, "learning_rate": 8.96628301064347e-05, "loss": 0.6991, "step": 65260 }, { "epoch": 0.4169914263445051, "grad_norm": 0.6631901264190674, "learning_rate": 8.965977470114151e-05, "loss": 1.0921, "step": 65270 }, { "epoch": 0.41705531349424374, "grad_norm": 1.0194664001464844, "learning_rate": 8.965671889644007e-05, "loss": 0.8967, "step": 65280 }, { "epoch": 0.41711920064398245, "grad_norm": 1.145621657371521, "learning_rate": 8.965366269236117e-05, "loss": 0.9233, "step": 65290 }, { "epoch": 0.41718308779372115, "grad_norm": 0.7853092551231384, "learning_rate": 8.965060608893559e-05, "loss": 0.8627, "step": 65300 }, { "epoch": 0.41724697494345986, "grad_norm": 0.7077251672744751, "learning_rate": 8.96475490861941e-05, "loss": 1.0426, "step": 65310 }, { "epoch": 0.41731086209319856, "grad_norm": 0.9070340394973755, "learning_rate": 8.964449168416749e-05, "loss": 0.9206, "step": 65320 }, { "epoch": 0.41737474924293727, "grad_norm": 1.0521044731140137, "learning_rate": 8.964143388288653e-05, "loss": 0.886, "step": 65330 }, { "epoch": 0.417438636392676, "grad_norm": 0.46310827136039734, "learning_rate": 8.963837568238205e-05, "loss": 0.8873, "step": 65340 }, { "epoch": 0.4175025235424147, "grad_norm": 0.7745985388755798, "learning_rate": 8.963531708268485e-05, "loss": 0.9885, "step": 65350 }, { "epoch": 0.4175664106921534, "grad_norm": 0.7876570820808411, "learning_rate": 8.96322580838257e-05, "loss": 0.6772, "step": 65360 }, { "epoch": 0.4176302978418921, "grad_norm": 1.1240822076797485, "learning_rate": 8.962919868583544e-05, "loss": 0.9992, "step": 65370 }, { "epoch": 0.4176941849916308, "grad_norm": 1.488118290901184, "learning_rate": 8.962613888874485e-05, "loss": 1.1016, "step": 65380 }, { "epoch": 0.4177580721413695, "grad_norm": 1.2619564533233643, "learning_rate": 8.962307869258476e-05, "loss": 1.1059, "step": 65390 }, { "epoch": 0.4178219592911082, "grad_norm": 3.796415328979492, "learning_rate": 8.962001809738599e-05, "loss": 1.0094, "step": 65400 }, { "epoch": 0.41788584644084686, "grad_norm": 0.6639039516448975, "learning_rate": 8.961695710317936e-05, "loss": 0.9859, "step": 65410 }, { "epoch": 0.41794973359058557, "grad_norm": 1.1306976079940796, "learning_rate": 8.961389570999573e-05, "loss": 0.6482, "step": 65420 }, { "epoch": 0.4180136207403243, "grad_norm": 1.1172826290130615, "learning_rate": 8.961083391786585e-05, "loss": 0.9486, "step": 65430 }, { "epoch": 0.418077507890063, "grad_norm": 0.6498112678527832, "learning_rate": 8.960777172682063e-05, "loss": 0.7618, "step": 65440 }, { "epoch": 0.4181413950398017, "grad_norm": 0.7367339134216309, "learning_rate": 8.960470913689088e-05, "loss": 0.9973, "step": 65450 }, { "epoch": 0.4182052821895404, "grad_norm": 0.7694912552833557, "learning_rate": 8.960164614810744e-05, "loss": 0.8996, "step": 65460 }, { "epoch": 0.4182691693392791, "grad_norm": 0.9336798191070557, "learning_rate": 8.959858276050118e-05, "loss": 0.9093, "step": 65470 }, { "epoch": 0.4183330564890178, "grad_norm": 0.7847772836685181, "learning_rate": 8.959551897410292e-05, "loss": 0.7734, "step": 65480 }, { "epoch": 0.4183969436387565, "grad_norm": 0.8503950834274292, "learning_rate": 8.959245478894353e-05, "loss": 0.9816, "step": 65490 }, { "epoch": 0.4184608307884952, "grad_norm": 0.9452194571495056, "learning_rate": 8.958939020505388e-05, "loss": 0.9682, "step": 65500 }, { "epoch": 0.4185247179382339, "grad_norm": 0.7661617398262024, "learning_rate": 8.95863252224648e-05, "loss": 0.6985, "step": 65510 }, { "epoch": 0.41858860508797263, "grad_norm": 0.4529026448726654, "learning_rate": 8.958325984120718e-05, "loss": 0.6564, "step": 65520 }, { "epoch": 0.4186524922377113, "grad_norm": 2.9925787448883057, "learning_rate": 8.958019406131191e-05, "loss": 0.6851, "step": 65530 }, { "epoch": 0.41871637938745, "grad_norm": 0.726306676864624, "learning_rate": 8.957712788280982e-05, "loss": 0.8031, "step": 65540 }, { "epoch": 0.4187802665371887, "grad_norm": 0.6993584036827087, "learning_rate": 8.957406130573183e-05, "loss": 0.8313, "step": 65550 }, { "epoch": 0.4188441536869274, "grad_norm": 0.9798833131790161, "learning_rate": 8.957099433010881e-05, "loss": 0.9016, "step": 65560 }, { "epoch": 0.4189080408366661, "grad_norm": 0.7420501708984375, "learning_rate": 8.956792695597163e-05, "loss": 0.6753, "step": 65570 }, { "epoch": 0.4189719279864048, "grad_norm": 0.7620697617530823, "learning_rate": 8.95648591833512e-05, "loss": 0.8782, "step": 65580 }, { "epoch": 0.4190358151361435, "grad_norm": 1.2457002401351929, "learning_rate": 8.956179101227842e-05, "loss": 1.1031, "step": 65590 }, { "epoch": 0.4190997022858822, "grad_norm": 1.005566120147705, "learning_rate": 8.955872244278416e-05, "loss": 1.1262, "step": 65600 }, { "epoch": 0.41916358943562093, "grad_norm": 1.014225959777832, "learning_rate": 8.955565347489935e-05, "loss": 0.9578, "step": 65610 }, { "epoch": 0.41922747658535964, "grad_norm": 1.1588691473007202, "learning_rate": 8.955258410865488e-05, "loss": 1.1571, "step": 65620 }, { "epoch": 0.41929136373509834, "grad_norm": 0.5882399678230286, "learning_rate": 8.954951434408168e-05, "loss": 0.8187, "step": 65630 }, { "epoch": 0.41935525088483705, "grad_norm": 0.5177319645881653, "learning_rate": 8.954644418121065e-05, "loss": 0.9707, "step": 65640 }, { "epoch": 0.4194191380345757, "grad_norm": 1.1925745010375977, "learning_rate": 8.954337362007273e-05, "loss": 0.9326, "step": 65650 }, { "epoch": 0.4194830251843144, "grad_norm": 7.771919250488281, "learning_rate": 8.954030266069882e-05, "loss": 0.9108, "step": 65660 }, { "epoch": 0.4195469123340531, "grad_norm": 0.7512636184692383, "learning_rate": 8.953723130311984e-05, "loss": 0.7775, "step": 65670 }, { "epoch": 0.4196107994837918, "grad_norm": 1.3332923650741577, "learning_rate": 8.953415954736675e-05, "loss": 1.0754, "step": 65680 }, { "epoch": 0.4196746866335305, "grad_norm": 0.5721650719642639, "learning_rate": 8.953108739347047e-05, "loss": 0.9334, "step": 65690 }, { "epoch": 0.41973857378326923, "grad_norm": 0.9686694741249084, "learning_rate": 8.952801484146194e-05, "loss": 1.0833, "step": 65700 }, { "epoch": 0.41980246093300794, "grad_norm": 0.7527593374252319, "learning_rate": 8.95249418913721e-05, "loss": 0.9329, "step": 65710 }, { "epoch": 0.41986634808274664, "grad_norm": 0.8230323195457458, "learning_rate": 8.95218685432319e-05, "loss": 1.1346, "step": 65720 }, { "epoch": 0.41993023523248535, "grad_norm": 2.3285200595855713, "learning_rate": 8.95187947970723e-05, "loss": 1.1473, "step": 65730 }, { "epoch": 0.41999412238222406, "grad_norm": 0.7577224969863892, "learning_rate": 8.951572065292424e-05, "loss": 0.9537, "step": 65740 }, { "epoch": 0.42005800953196276, "grad_norm": 0.5602964758872986, "learning_rate": 8.95126461108187e-05, "loss": 0.9735, "step": 65750 }, { "epoch": 0.42012189668170147, "grad_norm": 0.63779217004776, "learning_rate": 8.950957117078662e-05, "loss": 0.7456, "step": 65760 }, { "epoch": 0.4201857838314401, "grad_norm": 1.0952467918395996, "learning_rate": 8.950649583285898e-05, "loss": 0.906, "step": 65770 }, { "epoch": 0.4202496709811788, "grad_norm": 0.687418520450592, "learning_rate": 8.950342009706675e-05, "loss": 0.7934, "step": 65780 }, { "epoch": 0.42031355813091753, "grad_norm": 1.1235476732254028, "learning_rate": 8.95003439634409e-05, "loss": 0.9785, "step": 65790 }, { "epoch": 0.42037744528065624, "grad_norm": 0.9413420557975769, "learning_rate": 8.949726743201242e-05, "loss": 0.8127, "step": 65800 }, { "epoch": 0.42044133243039494, "grad_norm": 0.9101980924606323, "learning_rate": 8.949419050281228e-05, "loss": 1.1065, "step": 65810 }, { "epoch": 0.42050521958013365, "grad_norm": 0.6493207812309265, "learning_rate": 8.94911131758715e-05, "loss": 0.7957, "step": 65820 }, { "epoch": 0.42056910672987236, "grad_norm": 0.753649890422821, "learning_rate": 8.9488035451221e-05, "loss": 1.1482, "step": 65830 }, { "epoch": 0.42063299387961106, "grad_norm": 0.9392029047012329, "learning_rate": 8.948495732889185e-05, "loss": 0.8979, "step": 65840 }, { "epoch": 0.42069688102934977, "grad_norm": 1.159693717956543, "learning_rate": 8.948187880891501e-05, "loss": 0.9243, "step": 65850 }, { "epoch": 0.4207607681790885, "grad_norm": 0.7073017358779907, "learning_rate": 8.947879989132151e-05, "loss": 0.7319, "step": 65860 }, { "epoch": 0.4208246553288272, "grad_norm": 0.5303799510002136, "learning_rate": 8.947572057614231e-05, "loss": 0.7607, "step": 65870 }, { "epoch": 0.4208885424785659, "grad_norm": 0.734471321105957, "learning_rate": 8.947264086340847e-05, "loss": 0.8881, "step": 65880 }, { "epoch": 0.42095242962830454, "grad_norm": 0.990151047706604, "learning_rate": 8.9469560753151e-05, "loss": 1.0643, "step": 65890 }, { "epoch": 0.42101631677804324, "grad_norm": 0.6194562315940857, "learning_rate": 8.94664802454009e-05, "loss": 1.0016, "step": 65900 }, { "epoch": 0.42108020392778195, "grad_norm": 0.8033568263053894, "learning_rate": 8.946339934018919e-05, "loss": 0.9705, "step": 65910 }, { "epoch": 0.42114409107752065, "grad_norm": 0.9492495656013489, "learning_rate": 8.946031803754693e-05, "loss": 0.7755, "step": 65920 }, { "epoch": 0.42120797822725936, "grad_norm": 0.7201482057571411, "learning_rate": 8.945723633750512e-05, "loss": 0.9378, "step": 65930 }, { "epoch": 0.42127186537699807, "grad_norm": 0.9046865701675415, "learning_rate": 8.945415424009478e-05, "loss": 0.9119, "step": 65940 }, { "epoch": 0.4213357525267368, "grad_norm": 0.8343170881271362, "learning_rate": 8.945107174534699e-05, "loss": 0.9199, "step": 65950 }, { "epoch": 0.4213996396764755, "grad_norm": 1.330496072769165, "learning_rate": 8.94479888532928e-05, "loss": 0.7923, "step": 65960 }, { "epoch": 0.4214635268262142, "grad_norm": 0.7059889435768127, "learning_rate": 8.94449055639632e-05, "loss": 0.9541, "step": 65970 }, { "epoch": 0.4215274139759529, "grad_norm": 0.8712106347084045, "learning_rate": 8.944182187738929e-05, "loss": 1.1176, "step": 65980 }, { "epoch": 0.4215913011256916, "grad_norm": 0.9229239821434021, "learning_rate": 8.943873779360213e-05, "loss": 0.7503, "step": 65990 }, { "epoch": 0.4216551882754303, "grad_norm": 1.1028259992599487, "learning_rate": 8.943565331263274e-05, "loss": 1.1007, "step": 66000 }, { "epoch": 0.42171907542516895, "grad_norm": 0.8142592906951904, "learning_rate": 8.943256843451221e-05, "loss": 0.9841, "step": 66010 }, { "epoch": 0.42178296257490766, "grad_norm": 1.184031367301941, "learning_rate": 8.94294831592716e-05, "loss": 0.9047, "step": 66020 }, { "epoch": 0.42184684972464637, "grad_norm": 1.0467089414596558, "learning_rate": 8.9426397486942e-05, "loss": 1.0965, "step": 66030 }, { "epoch": 0.4219107368743851, "grad_norm": 2.1014811992645264, "learning_rate": 8.942331141755445e-05, "loss": 0.7972, "step": 66040 }, { "epoch": 0.4219746240241238, "grad_norm": 0.9660546183586121, "learning_rate": 8.942022495114004e-05, "loss": 0.8632, "step": 66050 }, { "epoch": 0.4220385111738625, "grad_norm": 1.027685523033142, "learning_rate": 8.941713808772986e-05, "loss": 0.9732, "step": 66060 }, { "epoch": 0.4221023983236012, "grad_norm": 1.0367803573608398, "learning_rate": 8.941405082735503e-05, "loss": 0.8984, "step": 66070 }, { "epoch": 0.4221662854733399, "grad_norm": 0.9707443714141846, "learning_rate": 8.941096317004658e-05, "loss": 0.8234, "step": 66080 }, { "epoch": 0.4222301726230786, "grad_norm": 0.9538044929504395, "learning_rate": 8.940787511583567e-05, "loss": 0.8863, "step": 66090 }, { "epoch": 0.4222940597728173, "grad_norm": 0.920036256313324, "learning_rate": 8.940478666475333e-05, "loss": 0.9216, "step": 66100 }, { "epoch": 0.422357946922556, "grad_norm": 1.042989730834961, "learning_rate": 8.94016978168307e-05, "loss": 0.9341, "step": 66110 }, { "epoch": 0.4224218340722947, "grad_norm": 0.8969932794570923, "learning_rate": 8.93986085720989e-05, "loss": 0.9116, "step": 66120 }, { "epoch": 0.42248572122203343, "grad_norm": 1.1264333724975586, "learning_rate": 8.939551893058902e-05, "loss": 0.8614, "step": 66130 }, { "epoch": 0.4225496083717721, "grad_norm": 0.9502818584442139, "learning_rate": 8.939242889233219e-05, "loss": 0.7416, "step": 66140 }, { "epoch": 0.4226134955215108, "grad_norm": 1.023710012435913, "learning_rate": 8.93893384573595e-05, "loss": 0.6771, "step": 66150 }, { "epoch": 0.4226773826712495, "grad_norm": 0.9767735004425049, "learning_rate": 8.938624762570213e-05, "loss": 0.8853, "step": 66160 }, { "epoch": 0.4227412698209882, "grad_norm": 0.9584904909133911, "learning_rate": 8.938315639739115e-05, "loss": 0.8563, "step": 66170 }, { "epoch": 0.4228051569707269, "grad_norm": 0.7890828847885132, "learning_rate": 8.938006477245773e-05, "loss": 0.6442, "step": 66180 }, { "epoch": 0.4228690441204656, "grad_norm": 0.7645769119262695, "learning_rate": 8.937697275093298e-05, "loss": 0.5916, "step": 66190 }, { "epoch": 0.4229329312702043, "grad_norm": 1.5121580362319946, "learning_rate": 8.937388033284804e-05, "loss": 0.8618, "step": 66200 }, { "epoch": 0.422996818419943, "grad_norm": 0.7250730395317078, "learning_rate": 8.937078751823406e-05, "loss": 0.7188, "step": 66210 }, { "epoch": 0.4230607055696817, "grad_norm": 1.0403555631637573, "learning_rate": 8.93676943071222e-05, "loss": 0.7553, "step": 66220 }, { "epoch": 0.42312459271942043, "grad_norm": 0.6218414902687073, "learning_rate": 8.93646006995436e-05, "loss": 0.921, "step": 66230 }, { "epoch": 0.42318847986915914, "grad_norm": 0.6752848029136658, "learning_rate": 8.93615066955294e-05, "loss": 0.9711, "step": 66240 }, { "epoch": 0.42325236701889785, "grad_norm": 0.8991808295249939, "learning_rate": 8.935841229511079e-05, "loss": 0.8345, "step": 66250 }, { "epoch": 0.4233162541686365, "grad_norm": 0.5273988246917725, "learning_rate": 8.935531749831892e-05, "loss": 1.0576, "step": 66260 }, { "epoch": 0.4233801413183752, "grad_norm": 0.6460761427879333, "learning_rate": 8.935222230518496e-05, "loss": 0.7446, "step": 66270 }, { "epoch": 0.4234440284681139, "grad_norm": 0.8064502477645874, "learning_rate": 8.934912671574007e-05, "loss": 0.9758, "step": 66280 }, { "epoch": 0.4235079156178526, "grad_norm": 1.1486152410507202, "learning_rate": 8.934603073001542e-05, "loss": 0.9056, "step": 66290 }, { "epoch": 0.4235718027675913, "grad_norm": 0.8460824489593506, "learning_rate": 8.934293434804221e-05, "loss": 1.0032, "step": 66300 }, { "epoch": 0.42363568991733, "grad_norm": 1.1093133687973022, "learning_rate": 8.933983756985163e-05, "loss": 0.9909, "step": 66310 }, { "epoch": 0.42369957706706873, "grad_norm": 0.9142333269119263, "learning_rate": 8.933674039547484e-05, "loss": 0.7974, "step": 66320 }, { "epoch": 0.42376346421680744, "grad_norm": 1.893848180770874, "learning_rate": 8.933364282494304e-05, "loss": 0.8881, "step": 66330 }, { "epoch": 0.42382735136654615, "grad_norm": 0.6643238663673401, "learning_rate": 8.933054485828742e-05, "loss": 0.6162, "step": 66340 }, { "epoch": 0.42389123851628485, "grad_norm": 0.853939950466156, "learning_rate": 8.932744649553921e-05, "loss": 0.8599, "step": 66350 }, { "epoch": 0.42395512566602356, "grad_norm": 0.6014842987060547, "learning_rate": 8.932434773672958e-05, "loss": 0.7083, "step": 66360 }, { "epoch": 0.42401901281576226, "grad_norm": 0.7437098622322083, "learning_rate": 8.932124858188975e-05, "loss": 0.8959, "step": 66370 }, { "epoch": 0.4240828999655009, "grad_norm": 0.731093168258667, "learning_rate": 8.931814903105092e-05, "loss": 0.7875, "step": 66380 }, { "epoch": 0.4241467871152396, "grad_norm": 0.7877000570297241, "learning_rate": 8.931504908424431e-05, "loss": 0.8525, "step": 66390 }, { "epoch": 0.4242106742649783, "grad_norm": 0.929058849811554, "learning_rate": 8.931194874150116e-05, "loss": 0.7897, "step": 66400 }, { "epoch": 0.42427456141471703, "grad_norm": 1.1878929138183594, "learning_rate": 8.930884800285266e-05, "loss": 0.9998, "step": 66410 }, { "epoch": 0.42433844856445574, "grad_norm": 0.5432239174842834, "learning_rate": 8.930574686833008e-05, "loss": 0.8411, "step": 66420 }, { "epoch": 0.42440233571419445, "grad_norm": 0.9339645504951477, "learning_rate": 8.930264533796459e-05, "loss": 0.9499, "step": 66430 }, { "epoch": 0.42446622286393315, "grad_norm": 0.762392520904541, "learning_rate": 8.929954341178749e-05, "loss": 0.8893, "step": 66440 }, { "epoch": 0.42453011001367186, "grad_norm": 0.6582323908805847, "learning_rate": 8.929644108982998e-05, "loss": 0.8409, "step": 66450 }, { "epoch": 0.42459399716341056, "grad_norm": 0.8481007218360901, "learning_rate": 8.92933383721233e-05, "loss": 0.9136, "step": 66460 }, { "epoch": 0.42465788431314927, "grad_norm": 0.6672992706298828, "learning_rate": 8.929023525869872e-05, "loss": 0.8445, "step": 66470 }, { "epoch": 0.424721771462888, "grad_norm": 1.0847039222717285, "learning_rate": 8.928713174958748e-05, "loss": 0.9611, "step": 66480 }, { "epoch": 0.4247856586126267, "grad_norm": 0.881767213344574, "learning_rate": 8.928402784482084e-05, "loss": 0.9177, "step": 66490 }, { "epoch": 0.42484954576236533, "grad_norm": 1.421280026435852, "learning_rate": 8.928123399227131e-05, "loss": 0.852, "step": 66500 }, { "epoch": 0.42491343291210404, "grad_norm": 0.6856515407562256, "learning_rate": 8.927812933584552e-05, "loss": 1.2407, "step": 66510 }, { "epoch": 0.42497732006184274, "grad_norm": 0.8232982754707336, "learning_rate": 8.927502428385498e-05, "loss": 1.111, "step": 66520 }, { "epoch": 0.42504120721158145, "grad_norm": 0.7256129384040833, "learning_rate": 8.927191883633097e-05, "loss": 0.9756, "step": 66530 }, { "epoch": 0.42510509436132016, "grad_norm": 0.5605076551437378, "learning_rate": 8.926881299330476e-05, "loss": 0.8828, "step": 66540 }, { "epoch": 0.42516898151105886, "grad_norm": 1.3462023735046387, "learning_rate": 8.926570675480764e-05, "loss": 0.9569, "step": 66550 }, { "epoch": 0.42523286866079757, "grad_norm": 1.0285893678665161, "learning_rate": 8.926260012087087e-05, "loss": 0.9012, "step": 66560 }, { "epoch": 0.4252967558105363, "grad_norm": 1.9040067195892334, "learning_rate": 8.925949309152577e-05, "loss": 1.0781, "step": 66570 }, { "epoch": 0.425360642960275, "grad_norm": 0.7241610288619995, "learning_rate": 8.925638566680359e-05, "loss": 0.7973, "step": 66580 }, { "epoch": 0.4254245301100137, "grad_norm": 1.2173702716827393, "learning_rate": 8.925327784673564e-05, "loss": 1.047, "step": 66590 }, { "epoch": 0.4254884172597524, "grad_norm": 0.8426626920700073, "learning_rate": 8.925016963135324e-05, "loss": 0.78, "step": 66600 }, { "epoch": 0.4255523044094911, "grad_norm": 1.3263126611709595, "learning_rate": 8.924706102068767e-05, "loss": 0.7994, "step": 66610 }, { "epoch": 0.42561619155922975, "grad_norm": 1.6536914110183716, "learning_rate": 8.924395201477025e-05, "loss": 0.8917, "step": 66620 }, { "epoch": 0.42568007870896846, "grad_norm": 0.7909001111984253, "learning_rate": 8.924084261363228e-05, "loss": 0.7676, "step": 66630 }, { "epoch": 0.42574396585870716, "grad_norm": 0.40761637687683105, "learning_rate": 8.923773281730505e-05, "loss": 0.7697, "step": 66640 }, { "epoch": 0.42580785300844587, "grad_norm": 0.8846787810325623, "learning_rate": 8.923462262581994e-05, "loss": 0.8687, "step": 66650 }, { "epoch": 0.4258717401581846, "grad_norm": 0.4814871847629547, "learning_rate": 8.923151203920822e-05, "loss": 0.6312, "step": 66660 }, { "epoch": 0.4259356273079233, "grad_norm": 0.6910040378570557, "learning_rate": 8.922840105750124e-05, "loss": 0.8927, "step": 66670 }, { "epoch": 0.425999514457662, "grad_norm": 1.046462893486023, "learning_rate": 8.922528968073032e-05, "loss": 0.7882, "step": 66680 }, { "epoch": 0.4260634016074007, "grad_norm": 1.2014803886413574, "learning_rate": 8.92221779089268e-05, "loss": 0.9165, "step": 66690 }, { "epoch": 0.4261272887571394, "grad_norm": 0.9146868586540222, "learning_rate": 8.921906574212202e-05, "loss": 1.0733, "step": 66700 }, { "epoch": 0.4261911759068781, "grad_norm": 0.8694166541099548, "learning_rate": 8.92159531803473e-05, "loss": 0.9912, "step": 66710 }, { "epoch": 0.4262550630566168, "grad_norm": 0.904970645904541, "learning_rate": 8.92131515370767e-05, "loss": 1.1384, "step": 66720 }, { "epoch": 0.4263189502063555, "grad_norm": 1.0510960817337036, "learning_rate": 8.92100382249455e-05, "loss": 0.8948, "step": 66730 }, { "epoch": 0.42638283735609417, "grad_norm": 0.8128019571304321, "learning_rate": 8.920692451793531e-05, "loss": 0.7585, "step": 66740 }, { "epoch": 0.4264467245058329, "grad_norm": 0.7644541263580322, "learning_rate": 8.920381041607746e-05, "loss": 1.0066, "step": 66750 }, { "epoch": 0.4265106116555716, "grad_norm": 0.6716737151145935, "learning_rate": 8.920069591940332e-05, "loss": 0.7818, "step": 66760 }, { "epoch": 0.4265744988053103, "grad_norm": 0.5078364610671997, "learning_rate": 8.919758102794427e-05, "loss": 1.0828, "step": 66770 }, { "epoch": 0.426638385955049, "grad_norm": 1.3749090433120728, "learning_rate": 8.919446574173165e-05, "loss": 0.7222, "step": 66780 }, { "epoch": 0.4267022731047877, "grad_norm": 0.9173924922943115, "learning_rate": 8.919135006079686e-05, "loss": 0.9544, "step": 66790 }, { "epoch": 0.4267661602545264, "grad_norm": 2.012134552001953, "learning_rate": 8.918823398517127e-05, "loss": 0.805, "step": 66800 }, { "epoch": 0.4268300474042651, "grad_norm": 1.9749096632003784, "learning_rate": 8.918511751488627e-05, "loss": 0.9767, "step": 66810 }, { "epoch": 0.4268939345540038, "grad_norm": 1.5169198513031006, "learning_rate": 8.918200064997324e-05, "loss": 1.0532, "step": 66820 }, { "epoch": 0.4269578217037425, "grad_norm": 0.8941536545753479, "learning_rate": 8.917888339046354e-05, "loss": 0.9049, "step": 66830 }, { "epoch": 0.42702170885348123, "grad_norm": 0.7928354144096375, "learning_rate": 8.917576573638862e-05, "loss": 0.7091, "step": 66840 }, { "epoch": 0.42708559600321994, "grad_norm": 0.7303599119186401, "learning_rate": 8.917264768777983e-05, "loss": 0.9175, "step": 66850 }, { "epoch": 0.4271494831529586, "grad_norm": 0.6727188229560852, "learning_rate": 8.91695292446686e-05, "loss": 0.9231, "step": 66860 }, { "epoch": 0.4272133703026973, "grad_norm": 0.5279465913772583, "learning_rate": 8.91664104070863e-05, "loss": 0.7118, "step": 66870 }, { "epoch": 0.427277257452436, "grad_norm": 0.7597615122795105, "learning_rate": 8.916329117506439e-05, "loss": 0.9777, "step": 66880 }, { "epoch": 0.4273411446021747, "grad_norm": 1.0109660625457764, "learning_rate": 8.916017154863425e-05, "loss": 0.8774, "step": 66890 }, { "epoch": 0.4274050317519134, "grad_norm": 0.7892023324966431, "learning_rate": 8.91570515278273e-05, "loss": 1.0988, "step": 66900 }, { "epoch": 0.4274689189016521, "grad_norm": 0.7960211038589478, "learning_rate": 8.915393111267496e-05, "loss": 0.8625, "step": 66910 }, { "epoch": 0.4275328060513908, "grad_norm": 0.9462035894393921, "learning_rate": 8.915081030320867e-05, "loss": 0.9255, "step": 66920 }, { "epoch": 0.42759669320112953, "grad_norm": 0.6910783648490906, "learning_rate": 8.914768909945985e-05, "loss": 0.695, "step": 66930 }, { "epoch": 0.42766058035086824, "grad_norm": 0.8994881510734558, "learning_rate": 8.914456750145991e-05, "loss": 0.9296, "step": 66940 }, { "epoch": 0.42772446750060694, "grad_norm": 0.7058424949645996, "learning_rate": 8.914144550924034e-05, "loss": 0.9154, "step": 66950 }, { "epoch": 0.42778835465034565, "grad_norm": 0.9544531106948853, "learning_rate": 8.913832312283254e-05, "loss": 1.0751, "step": 66960 }, { "epoch": 0.42785224180008435, "grad_norm": 1.3822722434997559, "learning_rate": 8.913520034226797e-05, "loss": 0.9816, "step": 66970 }, { "epoch": 0.42791612894982306, "grad_norm": 0.7017986178398132, "learning_rate": 8.913207716757807e-05, "loss": 0.7602, "step": 66980 }, { "epoch": 0.4279800160995617, "grad_norm": 0.4731631577014923, "learning_rate": 8.912895359879431e-05, "loss": 1.0528, "step": 66990 }, { "epoch": 0.4280439032493004, "grad_norm": 0.7982561588287354, "learning_rate": 8.912582963594813e-05, "loss": 0.831, "step": 67000 }, { "epoch": 0.4281077903990391, "grad_norm": 1.1041196584701538, "learning_rate": 8.912270527907099e-05, "loss": 0.8662, "step": 67010 }, { "epoch": 0.42817167754877783, "grad_norm": 1.0753505229949951, "learning_rate": 8.911958052819436e-05, "loss": 0.8874, "step": 67020 }, { "epoch": 0.42823556469851654, "grad_norm": 1.0988258123397827, "learning_rate": 8.911645538334971e-05, "loss": 1.1105, "step": 67030 }, { "epoch": 0.42829945184825524, "grad_norm": 3.2240488529205322, "learning_rate": 8.911332984456854e-05, "loss": 0.8623, "step": 67040 }, { "epoch": 0.42836333899799395, "grad_norm": 0.5571461915969849, "learning_rate": 8.911020391188229e-05, "loss": 0.8196, "step": 67050 }, { "epoch": 0.42842722614773265, "grad_norm": 1.0421580076217651, "learning_rate": 8.910707758532244e-05, "loss": 0.8394, "step": 67060 }, { "epoch": 0.42849111329747136, "grad_norm": 1.025112509727478, "learning_rate": 8.91039508649205e-05, "loss": 0.7116, "step": 67070 }, { "epoch": 0.42855500044721007, "grad_norm": 1.0207161903381348, "learning_rate": 8.910082375070792e-05, "loss": 1.3015, "step": 67080 }, { "epoch": 0.42861888759694877, "grad_norm": 0.5158314108848572, "learning_rate": 8.909769624271625e-05, "loss": 0.873, "step": 67090 }, { "epoch": 0.4286827747466875, "grad_norm": 2.0511646270751953, "learning_rate": 8.909456834097693e-05, "loss": 0.7935, "step": 67100 }, { "epoch": 0.42874666189642613, "grad_norm": 0.5745459198951721, "learning_rate": 8.909144004552148e-05, "loss": 1.0678, "step": 67110 }, { "epoch": 0.42881054904616484, "grad_norm": 1.3202085494995117, "learning_rate": 8.908831135638143e-05, "loss": 0.6992, "step": 67120 }, { "epoch": 0.42887443619590354, "grad_norm": 1.3148435354232788, "learning_rate": 8.908518227358826e-05, "loss": 0.7438, "step": 67130 }, { "epoch": 0.42893832334564225, "grad_norm": 1.0374261140823364, "learning_rate": 8.908205279717349e-05, "loss": 0.9165, "step": 67140 }, { "epoch": 0.42900221049538095, "grad_norm": 1.0254135131835938, "learning_rate": 8.907892292716864e-05, "loss": 1.087, "step": 67150 }, { "epoch": 0.42906609764511966, "grad_norm": 0.7220088839530945, "learning_rate": 8.907579266360523e-05, "loss": 1.0477, "step": 67160 }, { "epoch": 0.42912998479485837, "grad_norm": 0.8464503884315491, "learning_rate": 8.907266200651478e-05, "loss": 0.9686, "step": 67170 }, { "epoch": 0.42919387194459707, "grad_norm": 0.6220828890800476, "learning_rate": 8.906953095592882e-05, "loss": 0.7929, "step": 67180 }, { "epoch": 0.4292577590943358, "grad_norm": 0.8189134001731873, "learning_rate": 8.906639951187889e-05, "loss": 0.9221, "step": 67190 }, { "epoch": 0.4293216462440745, "grad_norm": 0.879299521446228, "learning_rate": 8.906326767439651e-05, "loss": 0.8583, "step": 67200 }, { "epoch": 0.4293855333938132, "grad_norm": 0.9902644157409668, "learning_rate": 8.906013544351323e-05, "loss": 0.8649, "step": 67210 }, { "epoch": 0.4294494205435519, "grad_norm": 0.727925717830658, "learning_rate": 8.905700281926061e-05, "loss": 0.8093, "step": 67220 }, { "epoch": 0.42951330769329055, "grad_norm": 0.6252252459526062, "learning_rate": 8.905386980167016e-05, "loss": 0.7309, "step": 67230 }, { "epoch": 0.42957719484302925, "grad_norm": 1.9642329216003418, "learning_rate": 8.905073639077347e-05, "loss": 0.9235, "step": 67240 }, { "epoch": 0.42964108199276796, "grad_norm": 0.7746663689613342, "learning_rate": 8.904760258660208e-05, "loss": 0.9314, "step": 67250 }, { "epoch": 0.42970496914250667, "grad_norm": 0.423170804977417, "learning_rate": 8.904446838918754e-05, "loss": 0.9009, "step": 67260 }, { "epoch": 0.42976885629224537, "grad_norm": 1.2594034671783447, "learning_rate": 8.904133379856143e-05, "loss": 0.9342, "step": 67270 }, { "epoch": 0.4298327434419841, "grad_norm": 0.9244500994682312, "learning_rate": 8.903819881475532e-05, "loss": 0.9128, "step": 67280 }, { "epoch": 0.4298966305917228, "grad_norm": 0.9682210683822632, "learning_rate": 8.903506343780077e-05, "loss": 0.8821, "step": 67290 }, { "epoch": 0.4299605177414615, "grad_norm": 1.104791283607483, "learning_rate": 8.903192766772936e-05, "loss": 1.0183, "step": 67300 }, { "epoch": 0.4300244048912002, "grad_norm": 1.1504932641983032, "learning_rate": 8.902879150457269e-05, "loss": 0.7472, "step": 67310 }, { "epoch": 0.4300882920409389, "grad_norm": 0.5592100024223328, "learning_rate": 8.90256549483623e-05, "loss": 0.8123, "step": 67320 }, { "epoch": 0.4301521791906776, "grad_norm": 1.0708913803100586, "learning_rate": 8.902251799912981e-05, "loss": 0.7882, "step": 67330 }, { "epoch": 0.4302160663404163, "grad_norm": 0.6294905543327332, "learning_rate": 8.90193806569068e-05, "loss": 0.8449, "step": 67340 }, { "epoch": 0.43027995349015496, "grad_norm": 1.0562630891799927, "learning_rate": 8.901624292172488e-05, "loss": 1.2612, "step": 67350 }, { "epoch": 0.43034384063989367, "grad_norm": 0.6391942501068115, "learning_rate": 8.901310479361564e-05, "loss": 0.9626, "step": 67360 }, { "epoch": 0.4304077277896324, "grad_norm": 0.8884569406509399, "learning_rate": 8.900996627261067e-05, "loss": 0.9499, "step": 67370 }, { "epoch": 0.4304716149393711, "grad_norm": 1.3086752891540527, "learning_rate": 8.90068273587416e-05, "loss": 0.9847, "step": 67380 }, { "epoch": 0.4305355020891098, "grad_norm": 0.8015036582946777, "learning_rate": 8.900368805204003e-05, "loss": 0.9094, "step": 67390 }, { "epoch": 0.4305993892388485, "grad_norm": 0.5839217901229858, "learning_rate": 8.900054835253758e-05, "loss": 0.9917, "step": 67400 }, { "epoch": 0.4306632763885872, "grad_norm": 1.5205440521240234, "learning_rate": 8.899740826026587e-05, "loss": 0.7, "step": 67410 }, { "epoch": 0.4307271635383259, "grad_norm": 0.9681718349456787, "learning_rate": 8.899426777525653e-05, "loss": 0.7742, "step": 67420 }, { "epoch": 0.4307910506880646, "grad_norm": 0.8119606375694275, "learning_rate": 8.899112689754117e-05, "loss": 0.8792, "step": 67430 }, { "epoch": 0.4308549378378033, "grad_norm": 0.8435991406440735, "learning_rate": 8.898798562715142e-05, "loss": 1.0099, "step": 67440 }, { "epoch": 0.430918824987542, "grad_norm": 0.4675378203392029, "learning_rate": 8.898484396411894e-05, "loss": 0.8346, "step": 67450 }, { "epoch": 0.43098271213728073, "grad_norm": 0.8612586855888367, "learning_rate": 8.898170190847535e-05, "loss": 0.7461, "step": 67460 }, { "epoch": 0.4310465992870194, "grad_norm": 0.769745409488678, "learning_rate": 8.897855946025228e-05, "loss": 0.9233, "step": 67470 }, { "epoch": 0.4311104864367581, "grad_norm": 1.4678987264633179, "learning_rate": 8.897541661948142e-05, "loss": 0.7533, "step": 67480 }, { "epoch": 0.4311743735864968, "grad_norm": 1.0737018585205078, "learning_rate": 8.897227338619438e-05, "loss": 0.6886, "step": 67490 }, { "epoch": 0.4312382607362355, "grad_norm": 0.6413093209266663, "learning_rate": 8.896912976042285e-05, "loss": 0.9434, "step": 67500 }, { "epoch": 0.4313021478859742, "grad_norm": 0.8239714503288269, "learning_rate": 8.896598574219845e-05, "loss": 0.904, "step": 67510 }, { "epoch": 0.4313660350357129, "grad_norm": 0.8861196041107178, "learning_rate": 8.896284133155288e-05, "loss": 0.909, "step": 67520 }, { "epoch": 0.4314299221854516, "grad_norm": 0.7210700511932373, "learning_rate": 8.895969652851778e-05, "loss": 0.9084, "step": 67530 }, { "epoch": 0.4314938093351903, "grad_norm": 1.172956943511963, "learning_rate": 8.895655133312483e-05, "loss": 0.9011, "step": 67540 }, { "epoch": 0.43155769648492903, "grad_norm": 0.9112328886985779, "learning_rate": 8.895340574540571e-05, "loss": 0.7824, "step": 67550 }, { "epoch": 0.43162158363466774, "grad_norm": 1.0518110990524292, "learning_rate": 8.895025976539209e-05, "loss": 1.2023, "step": 67560 }, { "epoch": 0.43168547078440644, "grad_norm": 0.8246524930000305, "learning_rate": 8.894711339311567e-05, "loss": 1.0688, "step": 67570 }, { "epoch": 0.43174935793414515, "grad_norm": 0.9622389078140259, "learning_rate": 8.894396662860811e-05, "loss": 0.6852, "step": 67580 }, { "epoch": 0.4318132450838838, "grad_norm": 0.7277495265007019, "learning_rate": 8.894081947190112e-05, "loss": 0.8892, "step": 67590 }, { "epoch": 0.4318771322336225, "grad_norm": 0.5220545530319214, "learning_rate": 8.893767192302639e-05, "loss": 0.7688, "step": 67600 }, { "epoch": 0.4319410193833612, "grad_norm": 0.7757664322853088, "learning_rate": 8.893452398201561e-05, "loss": 0.9166, "step": 67610 }, { "epoch": 0.4320049065330999, "grad_norm": 1.4959086179733276, "learning_rate": 8.89313756489005e-05, "loss": 1.0149, "step": 67620 }, { "epoch": 0.4320687936828386, "grad_norm": 1.3954126834869385, "learning_rate": 8.892822692371277e-05, "loss": 0.802, "step": 67630 }, { "epoch": 0.43213268083257733, "grad_norm": 0.7430549263954163, "learning_rate": 8.89250778064841e-05, "loss": 0.8956, "step": 67640 }, { "epoch": 0.43219656798231604, "grad_norm": 1.650481939315796, "learning_rate": 8.892192829724621e-05, "loss": 1.1669, "step": 67650 }, { "epoch": 0.43226045513205474, "grad_norm": 1.7691149711608887, "learning_rate": 8.891877839603085e-05, "loss": 1.1042, "step": 67660 }, { "epoch": 0.43232434228179345, "grad_norm": 0.6639987230300903, "learning_rate": 8.891562810286971e-05, "loss": 0.8842, "step": 67670 }, { "epoch": 0.43238822943153216, "grad_norm": 0.9073365926742554, "learning_rate": 8.891247741779454e-05, "loss": 1.1714, "step": 67680 }, { "epoch": 0.43245211658127086, "grad_norm": 1.1349682807922363, "learning_rate": 8.890932634083704e-05, "loss": 0.9899, "step": 67690 }, { "epoch": 0.43251600373100957, "grad_norm": 0.7573813796043396, "learning_rate": 8.890617487202899e-05, "loss": 0.8316, "step": 67700 }, { "epoch": 0.4325798908807482, "grad_norm": 0.7431557178497314, "learning_rate": 8.890302301140208e-05, "loss": 0.8598, "step": 67710 }, { "epoch": 0.4326437780304869, "grad_norm": 0.6789889931678772, "learning_rate": 8.889987075898807e-05, "loss": 1.1971, "step": 67720 }, { "epoch": 0.43270766518022563, "grad_norm": 0.5719479322433472, "learning_rate": 8.889671811481872e-05, "loss": 0.6596, "step": 67730 }, { "epoch": 0.43277155232996434, "grad_norm": 0.8801824450492859, "learning_rate": 8.889356507892575e-05, "loss": 0.8168, "step": 67740 }, { "epoch": 0.43283543947970304, "grad_norm": 1.7005552053451538, "learning_rate": 8.889041165134096e-05, "loss": 0.8598, "step": 67750 }, { "epoch": 0.43289932662944175, "grad_norm": 0.5636479258537292, "learning_rate": 8.888725783209606e-05, "loss": 0.7868, "step": 67760 }, { "epoch": 0.43296321377918046, "grad_norm": 0.9649848937988281, "learning_rate": 8.888410362122283e-05, "loss": 0.8729, "step": 67770 }, { "epoch": 0.43302710092891916, "grad_norm": 1.2120856046676636, "learning_rate": 8.888094901875303e-05, "loss": 1.1061, "step": 67780 }, { "epoch": 0.43309098807865787, "grad_norm": 1.1897577047348022, "learning_rate": 8.887779402471846e-05, "loss": 0.8963, "step": 67790 }, { "epoch": 0.4331548752283966, "grad_norm": 0.8927859663963318, "learning_rate": 8.887463863915087e-05, "loss": 0.985, "step": 67800 }, { "epoch": 0.4332187623781353, "grad_norm": 1.1183792352676392, "learning_rate": 8.887148286208202e-05, "loss": 1.0094, "step": 67810 }, { "epoch": 0.433282649527874, "grad_norm": 1.071887731552124, "learning_rate": 8.886832669354372e-05, "loss": 0.8359, "step": 67820 }, { "epoch": 0.4333465366776127, "grad_norm": 0.6402618885040283, "learning_rate": 8.886517013356774e-05, "loss": 1.0026, "step": 67830 }, { "epoch": 0.43341042382735134, "grad_norm": 1.0560641288757324, "learning_rate": 8.886201318218587e-05, "loss": 0.7045, "step": 67840 }, { "epoch": 0.43347431097709005, "grad_norm": 0.9585883021354675, "learning_rate": 8.88588558394299e-05, "loss": 0.9572, "step": 67850 }, { "epoch": 0.43353819812682876, "grad_norm": 0.7981050610542297, "learning_rate": 8.885569810533166e-05, "loss": 0.7819, "step": 67860 }, { "epoch": 0.43360208527656746, "grad_norm": 1.467461347579956, "learning_rate": 8.88525399799229e-05, "loss": 0.9183, "step": 67870 }, { "epoch": 0.43366597242630617, "grad_norm": 0.9360789060592651, "learning_rate": 8.884938146323546e-05, "loss": 1.0038, "step": 67880 }, { "epoch": 0.4337298595760449, "grad_norm": 1.3165303468704224, "learning_rate": 8.884622255530116e-05, "loss": 0.8743, "step": 67890 }, { "epoch": 0.4337937467257836, "grad_norm": 1.1677271127700806, "learning_rate": 8.884306325615174e-05, "loss": 1.0382, "step": 67900 }, { "epoch": 0.4338576338755223, "grad_norm": 1.1823782920837402, "learning_rate": 8.883990356581911e-05, "loss": 0.8917, "step": 67910 }, { "epoch": 0.433921521025261, "grad_norm": 0.8433313369750977, "learning_rate": 8.883674348433504e-05, "loss": 0.8236, "step": 67920 }, { "epoch": 0.4339854081749997, "grad_norm": 1.1049748659133911, "learning_rate": 8.883358301173138e-05, "loss": 0.7639, "step": 67930 }, { "epoch": 0.4340492953247384, "grad_norm": 0.8020467162132263, "learning_rate": 8.883042214803991e-05, "loss": 0.9805, "step": 67940 }, { "epoch": 0.4341131824744771, "grad_norm": 0.5183336734771729, "learning_rate": 8.882726089329252e-05, "loss": 0.9406, "step": 67950 }, { "epoch": 0.43417706962421576, "grad_norm": 0.9549485445022583, "learning_rate": 8.882409924752102e-05, "loss": 0.7904, "step": 67960 }, { "epoch": 0.43424095677395447, "grad_norm": 0.9031966924667358, "learning_rate": 8.882093721075724e-05, "loss": 0.6085, "step": 67970 }, { "epoch": 0.4343048439236932, "grad_norm": 0.7417629957199097, "learning_rate": 8.881777478303306e-05, "loss": 0.979, "step": 67980 }, { "epoch": 0.4343687310734319, "grad_norm": 0.742239236831665, "learning_rate": 8.881461196438027e-05, "loss": 1.0707, "step": 67990 }, { "epoch": 0.4344326182231706, "grad_norm": 1.0497804880142212, "learning_rate": 8.88114487548308e-05, "loss": 0.8717, "step": 68000 }, { "epoch": 0.4344965053729093, "grad_norm": 0.7527285814285278, "learning_rate": 8.880828515441643e-05, "loss": 1.0762, "step": 68010 }, { "epoch": 0.434560392522648, "grad_norm": 0.8218625783920288, "learning_rate": 8.880512116316908e-05, "loss": 1.0556, "step": 68020 }, { "epoch": 0.4346242796723867, "grad_norm": 1.8415364027023315, "learning_rate": 8.880195678112058e-05, "loss": 1.1582, "step": 68030 }, { "epoch": 0.4346881668221254, "grad_norm": 0.6465769410133362, "learning_rate": 8.87987920083028e-05, "loss": 1.0762, "step": 68040 }, { "epoch": 0.4347520539718641, "grad_norm": 0.6471286416053772, "learning_rate": 8.879562684474762e-05, "loss": 1.2511, "step": 68050 }, { "epoch": 0.4348159411216028, "grad_norm": 0.6721779704093933, "learning_rate": 8.879246129048693e-05, "loss": 0.8825, "step": 68060 }, { "epoch": 0.43487982827134153, "grad_norm": 0.8682761788368225, "learning_rate": 8.878929534555259e-05, "loss": 1.0418, "step": 68070 }, { "epoch": 0.4349437154210802, "grad_norm": 0.7083001732826233, "learning_rate": 8.878612900997648e-05, "loss": 0.9285, "step": 68080 }, { "epoch": 0.4350076025708189, "grad_norm": 0.7909469604492188, "learning_rate": 8.878296228379048e-05, "loss": 0.9, "step": 68090 }, { "epoch": 0.4350714897205576, "grad_norm": 0.7747198939323425, "learning_rate": 8.877979516702651e-05, "loss": 0.7877, "step": 68100 }, { "epoch": 0.4351353768702963, "grad_norm": 1.1311992406845093, "learning_rate": 8.877662765971646e-05, "loss": 0.9031, "step": 68110 }, { "epoch": 0.435199264020035, "grad_norm": 0.8452590107917786, "learning_rate": 8.877345976189223e-05, "loss": 0.8362, "step": 68120 }, { "epoch": 0.4352631511697737, "grad_norm": 1.3919566869735718, "learning_rate": 8.877029147358571e-05, "loss": 0.8168, "step": 68130 }, { "epoch": 0.4353270383195124, "grad_norm": 1.0793455839157104, "learning_rate": 8.87671227948288e-05, "loss": 0.6657, "step": 68140 }, { "epoch": 0.4353909254692511, "grad_norm": 0.6547946929931641, "learning_rate": 8.876395372565344e-05, "loss": 1.1194, "step": 68150 }, { "epoch": 0.43545481261898983, "grad_norm": 1.289340615272522, "learning_rate": 8.876078426609153e-05, "loss": 1.0495, "step": 68160 }, { "epoch": 0.43551869976872853, "grad_norm": 1.287331223487854, "learning_rate": 8.875761441617498e-05, "loss": 0.8023, "step": 68170 }, { "epoch": 0.43558258691846724, "grad_norm": 1.054658055305481, "learning_rate": 8.875444417593574e-05, "loss": 0.8072, "step": 68180 }, { "epoch": 0.43564647406820595, "grad_norm": 1.5471371412277222, "learning_rate": 8.87512735454057e-05, "loss": 0.8984, "step": 68190 }, { "epoch": 0.4357103612179446, "grad_norm": 0.9853270649909973, "learning_rate": 8.874810252461683e-05, "loss": 0.8457, "step": 68200 }, { "epoch": 0.4357742483676833, "grad_norm": 0.8379093408584595, "learning_rate": 8.874493111360103e-05, "loss": 1.0092, "step": 68210 }, { "epoch": 0.435838135517422, "grad_norm": 0.6254721879959106, "learning_rate": 8.874175931239026e-05, "loss": 0.792, "step": 68220 }, { "epoch": 0.4359020226671607, "grad_norm": 0.5673577189445496, "learning_rate": 8.873858712101645e-05, "loss": 0.7041, "step": 68230 }, { "epoch": 0.4359659098168994, "grad_norm": 0.8581469058990479, "learning_rate": 8.873541453951157e-05, "loss": 1.118, "step": 68240 }, { "epoch": 0.43602979696663813, "grad_norm": 0.7700116634368896, "learning_rate": 8.873224156790754e-05, "loss": 0.9587, "step": 68250 }, { "epoch": 0.43609368411637683, "grad_norm": 1.4901466369628906, "learning_rate": 8.872906820623634e-05, "loss": 0.9082, "step": 68260 }, { "epoch": 0.43615757126611554, "grad_norm": 1.1333754062652588, "learning_rate": 8.872589445452991e-05, "loss": 0.8202, "step": 68270 }, { "epoch": 0.43622145841585425, "grad_norm": 0.4992083013057709, "learning_rate": 8.872272031282022e-05, "loss": 0.8428, "step": 68280 }, { "epoch": 0.43628534556559295, "grad_norm": 0.7288440465927124, "learning_rate": 8.871954578113925e-05, "loss": 0.7839, "step": 68290 }, { "epoch": 0.43634923271533166, "grad_norm": 1.4860522747039795, "learning_rate": 8.871637085951894e-05, "loss": 0.9678, "step": 68300 }, { "epoch": 0.43641311986507036, "grad_norm": 0.8923503756523132, "learning_rate": 8.87131955479913e-05, "loss": 0.9322, "step": 68310 }, { "epoch": 0.436477007014809, "grad_norm": 1.1527504920959473, "learning_rate": 8.871001984658826e-05, "loss": 1.0341, "step": 68320 }, { "epoch": 0.4365408941645477, "grad_norm": 0.9049966931343079, "learning_rate": 8.870684375534185e-05, "loss": 1.0123, "step": 68330 }, { "epoch": 0.4366047813142864, "grad_norm": 0.6281135678291321, "learning_rate": 8.870366727428404e-05, "loss": 0.9563, "step": 68340 }, { "epoch": 0.43666866846402513, "grad_norm": 0.6897270679473877, "learning_rate": 8.870049040344682e-05, "loss": 0.8434, "step": 68350 }, { "epoch": 0.43673255561376384, "grad_norm": 1.3322041034698486, "learning_rate": 8.869731314286215e-05, "loss": 1.0403, "step": 68360 }, { "epoch": 0.43679644276350255, "grad_norm": 0.9318044781684875, "learning_rate": 8.869413549256209e-05, "loss": 0.8422, "step": 68370 }, { "epoch": 0.43686032991324125, "grad_norm": 0.8586065769195557, "learning_rate": 8.86909574525786e-05, "loss": 0.8878, "step": 68380 }, { "epoch": 0.43692421706297996, "grad_norm": 1.9818271398544312, "learning_rate": 8.86877790229437e-05, "loss": 0.7168, "step": 68390 }, { "epoch": 0.43698810421271866, "grad_norm": 0.7556184530258179, "learning_rate": 8.868460020368941e-05, "loss": 1.0074, "step": 68400 }, { "epoch": 0.43705199136245737, "grad_norm": 0.5547859072685242, "learning_rate": 8.868142099484771e-05, "loss": 0.9824, "step": 68410 }, { "epoch": 0.4371158785121961, "grad_norm": 0.8270901441574097, "learning_rate": 8.867824139645063e-05, "loss": 0.6677, "step": 68420 }, { "epoch": 0.4371797656619348, "grad_norm": 0.7375511527061462, "learning_rate": 8.867506140853021e-05, "loss": 0.8542, "step": 68430 }, { "epoch": 0.43724365281167343, "grad_norm": 0.879522979259491, "learning_rate": 8.867188103111845e-05, "loss": 0.8551, "step": 68440 }, { "epoch": 0.43730753996141214, "grad_norm": 1.1079013347625732, "learning_rate": 8.866870026424741e-05, "loss": 1.1122, "step": 68450 }, { "epoch": 0.43737142711115085, "grad_norm": 1.25412917137146, "learning_rate": 8.86655191079491e-05, "loss": 0.8394, "step": 68460 }, { "epoch": 0.43743531426088955, "grad_norm": 0.7833040952682495, "learning_rate": 8.866233756225555e-05, "loss": 0.8275, "step": 68470 }, { "epoch": 0.43749920141062826, "grad_norm": 1.0346733331680298, "learning_rate": 8.865915562719882e-05, "loss": 0.8503, "step": 68480 }, { "epoch": 0.43756308856036696, "grad_norm": 0.9302981495857239, "learning_rate": 8.865597330281096e-05, "loss": 0.7965, "step": 68490 }, { "epoch": 0.43762697571010567, "grad_norm": 0.8941460251808167, "learning_rate": 8.8652790589124e-05, "loss": 0.7823, "step": 68500 }, { "epoch": 0.4376908628598444, "grad_norm": 0.7403380870819092, "learning_rate": 8.864960748617e-05, "loss": 0.9164, "step": 68510 }, { "epoch": 0.4377547500095831, "grad_norm": 1.2985106706619263, "learning_rate": 8.8646423993981e-05, "loss": 0.9005, "step": 68520 }, { "epoch": 0.4378186371593218, "grad_norm": 0.5682730078697205, "learning_rate": 8.864324011258908e-05, "loss": 0.8248, "step": 68530 }, { "epoch": 0.4378825243090605, "grad_norm": 1.3618555068969727, "learning_rate": 8.864005584202632e-05, "loss": 1.1664, "step": 68540 }, { "epoch": 0.4379464114587992, "grad_norm": 0.6019179224967957, "learning_rate": 8.863687118232475e-05, "loss": 0.8097, "step": 68550 }, { "epoch": 0.43801029860853785, "grad_norm": 1.4094189405441284, "learning_rate": 8.863368613351648e-05, "loss": 0.7467, "step": 68560 }, { "epoch": 0.43807418575827656, "grad_norm": 1.509199857711792, "learning_rate": 8.863050069563355e-05, "loss": 0.9534, "step": 68570 }, { "epoch": 0.43813807290801526, "grad_norm": 1.1251524686813354, "learning_rate": 8.862731486870808e-05, "loss": 0.9511, "step": 68580 }, { "epoch": 0.43820196005775397, "grad_norm": 0.9050050973892212, "learning_rate": 8.862412865277211e-05, "loss": 0.9554, "step": 68590 }, { "epoch": 0.4382658472074927, "grad_norm": 0.6649369597434998, "learning_rate": 8.862094204785776e-05, "loss": 0.8778, "step": 68600 }, { "epoch": 0.4383297343572314, "grad_norm": 0.7536949515342712, "learning_rate": 8.86177550539971e-05, "loss": 0.8764, "step": 68610 }, { "epoch": 0.4383936215069701, "grad_norm": 0.898378312587738, "learning_rate": 8.861456767122226e-05, "loss": 1.0107, "step": 68620 }, { "epoch": 0.4384575086567088, "grad_norm": 2.269949436187744, "learning_rate": 8.861137989956529e-05, "loss": 0.7672, "step": 68630 }, { "epoch": 0.4385213958064475, "grad_norm": 6.667402267456055, "learning_rate": 8.860819173905835e-05, "loss": 0.7432, "step": 68640 }, { "epoch": 0.4385852829561862, "grad_norm": 0.7865056395530701, "learning_rate": 8.860500318973351e-05, "loss": 0.9813, "step": 68650 }, { "epoch": 0.4386491701059249, "grad_norm": 2.528974771499634, "learning_rate": 8.860181425162287e-05, "loss": 0.8215, "step": 68660 }, { "epoch": 0.4387130572556636, "grad_norm": 0.5087980031967163, "learning_rate": 8.859862492475858e-05, "loss": 1.2262, "step": 68670 }, { "epoch": 0.4387769444054023, "grad_norm": 1.1823939085006714, "learning_rate": 8.859543520917275e-05, "loss": 0.7388, "step": 68680 }, { "epoch": 0.438840831555141, "grad_norm": 0.7431660294532776, "learning_rate": 8.859224510489747e-05, "loss": 0.6863, "step": 68690 }, { "epoch": 0.4389047187048797, "grad_norm": 1.038490653038025, "learning_rate": 8.858905461196492e-05, "loss": 0.853, "step": 68700 }, { "epoch": 0.4389686058546184, "grad_norm": 0.9958590269088745, "learning_rate": 8.85858637304072e-05, "loss": 0.8119, "step": 68710 }, { "epoch": 0.4390324930043571, "grad_norm": 0.6802636384963989, "learning_rate": 8.858267246025645e-05, "loss": 1.0443, "step": 68720 }, { "epoch": 0.4390963801540958, "grad_norm": 1.3269374370574951, "learning_rate": 8.857948080154481e-05, "loss": 0.8071, "step": 68730 }, { "epoch": 0.4391602673038345, "grad_norm": 0.7724654078483582, "learning_rate": 8.857628875430444e-05, "loss": 0.7978, "step": 68740 }, { "epoch": 0.4392241544535732, "grad_norm": 0.7245962023735046, "learning_rate": 8.857309631856745e-05, "loss": 0.8891, "step": 68750 }, { "epoch": 0.4392880416033119, "grad_norm": 0.9760708808898926, "learning_rate": 8.8569903494366e-05, "loss": 0.9933, "step": 68760 }, { "epoch": 0.4393519287530506, "grad_norm": 0.828381359577179, "learning_rate": 8.856671028173227e-05, "loss": 0.949, "step": 68770 }, { "epoch": 0.43941581590278933, "grad_norm": 0.948192834854126, "learning_rate": 8.85635166806984e-05, "loss": 0.785, "step": 68780 }, { "epoch": 0.43947970305252804, "grad_norm": 1.3965764045715332, "learning_rate": 8.856032269129655e-05, "loss": 0.6816, "step": 68790 }, { "epoch": 0.43954359020226674, "grad_norm": 0.6188552975654602, "learning_rate": 8.85571283135589e-05, "loss": 0.7448, "step": 68800 }, { "epoch": 0.4396074773520054, "grad_norm": 0.7305311560630798, "learning_rate": 8.85539335475176e-05, "loss": 0.8134, "step": 68810 }, { "epoch": 0.4396713645017441, "grad_norm": 0.6679476499557495, "learning_rate": 8.855073839320484e-05, "loss": 0.8225, "step": 68820 }, { "epoch": 0.4397352516514828, "grad_norm": 0.41290047764778137, "learning_rate": 8.85475428506528e-05, "loss": 0.8303, "step": 68830 }, { "epoch": 0.4397991388012215, "grad_norm": 1.389434814453125, "learning_rate": 8.854434691989365e-05, "loss": 1.0117, "step": 68840 }, { "epoch": 0.4398630259509602, "grad_norm": 0.970970869064331, "learning_rate": 8.854115060095958e-05, "loss": 0.9634, "step": 68850 }, { "epoch": 0.4399269131006989, "grad_norm": 0.8621498346328735, "learning_rate": 8.853795389388277e-05, "loss": 0.7216, "step": 68860 }, { "epoch": 0.43999080025043763, "grad_norm": 0.8945342898368835, "learning_rate": 8.853475679869545e-05, "loss": 0.924, "step": 68870 }, { "epoch": 0.44005468740017634, "grad_norm": 0.7587364315986633, "learning_rate": 8.853155931542978e-05, "loss": 0.9817, "step": 68880 }, { "epoch": 0.44011857454991504, "grad_norm": 0.9429205656051636, "learning_rate": 8.852836144411795e-05, "loss": 1.1741, "step": 68890 }, { "epoch": 0.44018246169965375, "grad_norm": 0.9457645416259766, "learning_rate": 8.852516318479223e-05, "loss": 0.8122, "step": 68900 }, { "epoch": 0.44024634884939245, "grad_norm": 0.8908385038375854, "learning_rate": 8.852196453748476e-05, "loss": 1.0426, "step": 68910 }, { "epoch": 0.44031023599913116, "grad_norm": 1.4087450504302979, "learning_rate": 8.851876550222779e-05, "loss": 0.6433, "step": 68920 }, { "epoch": 0.4403741231488698, "grad_norm": 0.8311522006988525, "learning_rate": 8.851556607905351e-05, "loss": 0.8959, "step": 68930 }, { "epoch": 0.4404380102986085, "grad_norm": 0.6747666597366333, "learning_rate": 8.851236626799419e-05, "loss": 1.1469, "step": 68940 }, { "epoch": 0.4405018974483472, "grad_norm": 0.8693909049034119, "learning_rate": 8.850916606908199e-05, "loss": 0.7576, "step": 68950 }, { "epoch": 0.44056578459808593, "grad_norm": 0.6947962045669556, "learning_rate": 8.85059654823492e-05, "loss": 0.9861, "step": 68960 }, { "epoch": 0.44062967174782464, "grad_norm": 4.578150749206543, "learning_rate": 8.850276450782802e-05, "loss": 1.0223, "step": 68970 }, { "epoch": 0.44069355889756334, "grad_norm": 0.8332919478416443, "learning_rate": 8.849956314555068e-05, "loss": 0.7311, "step": 68980 }, { "epoch": 0.44075744604730205, "grad_norm": 0.8713606595993042, "learning_rate": 8.849636139554945e-05, "loss": 0.8488, "step": 68990 }, { "epoch": 0.44082133319704075, "grad_norm": 0.8420679569244385, "learning_rate": 8.849315925785654e-05, "loss": 0.9619, "step": 69000 }, { "epoch": 0.44088522034677946, "grad_norm": 0.9233155846595764, "learning_rate": 8.848995673250421e-05, "loss": 0.9395, "step": 69010 }, { "epoch": 0.44094910749651817, "grad_norm": 1.0304968357086182, "learning_rate": 8.848675381952474e-05, "loss": 0.7857, "step": 69020 }, { "epoch": 0.4410129946462569, "grad_norm": 1.142500638961792, "learning_rate": 8.848355051895035e-05, "loss": 0.7173, "step": 69030 }, { "epoch": 0.4410768817959956, "grad_norm": 1.1199169158935547, "learning_rate": 8.848034683081332e-05, "loss": 0.7658, "step": 69040 }, { "epoch": 0.44114076894573423, "grad_norm": 0.6068952679634094, "learning_rate": 8.84771427551459e-05, "loss": 0.7444, "step": 69050 }, { "epoch": 0.44120465609547294, "grad_norm": 1.1909863948822021, "learning_rate": 8.847393829198036e-05, "loss": 1.017, "step": 69060 }, { "epoch": 0.44126854324521164, "grad_norm": 0.84711092710495, "learning_rate": 8.847073344134898e-05, "loss": 0.7326, "step": 69070 }, { "epoch": 0.44133243039495035, "grad_norm": 1.1196755170822144, "learning_rate": 8.846752820328403e-05, "loss": 0.9662, "step": 69080 }, { "epoch": 0.44139631754468905, "grad_norm": 0.9795490503311157, "learning_rate": 8.846432257781781e-05, "loss": 0.976, "step": 69090 }, { "epoch": 0.44146020469442776, "grad_norm": 0.7674742341041565, "learning_rate": 8.846111656498257e-05, "loss": 0.9718, "step": 69100 }, { "epoch": 0.44152409184416647, "grad_norm": 0.8170384764671326, "learning_rate": 8.845791016481062e-05, "loss": 0.9278, "step": 69110 }, { "epoch": 0.4415879789939052, "grad_norm": 0.8551295399665833, "learning_rate": 8.845470337733423e-05, "loss": 0.9096, "step": 69120 }, { "epoch": 0.4416518661436439, "grad_norm": 1.4373359680175781, "learning_rate": 8.845149620258573e-05, "loss": 1.2263, "step": 69130 }, { "epoch": 0.4417157532933826, "grad_norm": 0.746088981628418, "learning_rate": 8.844828864059738e-05, "loss": 0.9052, "step": 69140 }, { "epoch": 0.4417796404431213, "grad_norm": 0.6683810949325562, "learning_rate": 8.84450806914015e-05, "loss": 0.7266, "step": 69150 }, { "epoch": 0.44184352759286, "grad_norm": 0.9795920848846436, "learning_rate": 8.84418723550304e-05, "loss": 1.1524, "step": 69160 }, { "epoch": 0.44190741474259865, "grad_norm": 0.9631989002227783, "learning_rate": 8.843866363151641e-05, "loss": 1.059, "step": 69170 }, { "epoch": 0.44197130189233735, "grad_norm": 0.7739669680595398, "learning_rate": 8.84354545208918e-05, "loss": 0.7757, "step": 69180 }, { "epoch": 0.44203518904207606, "grad_norm": 0.5001319646835327, "learning_rate": 8.843224502318892e-05, "loss": 0.8741, "step": 69190 }, { "epoch": 0.44209907619181477, "grad_norm": 0.739460825920105, "learning_rate": 8.842903513844008e-05, "loss": 0.9077, "step": 69200 }, { "epoch": 0.44216296334155347, "grad_norm": 0.810375452041626, "learning_rate": 8.842582486667762e-05, "loss": 0.7811, "step": 69210 }, { "epoch": 0.4422268504912922, "grad_norm": 1.088107705116272, "learning_rate": 8.842261420793385e-05, "loss": 0.8338, "step": 69220 }, { "epoch": 0.4422907376410309, "grad_norm": 0.6136099100112915, "learning_rate": 8.841940316224111e-05, "loss": 0.6255, "step": 69230 }, { "epoch": 0.4423546247907696, "grad_norm": 0.7968172430992126, "learning_rate": 8.841619172963175e-05, "loss": 0.8622, "step": 69240 }, { "epoch": 0.4424185119405083, "grad_norm": 0.8373786211013794, "learning_rate": 8.84129799101381e-05, "loss": 0.9056, "step": 69250 }, { "epoch": 0.442482399090247, "grad_norm": 0.47702914476394653, "learning_rate": 8.840976770379252e-05, "loss": 0.7324, "step": 69260 }, { "epoch": 0.4425462862399857, "grad_norm": 0.8604845404624939, "learning_rate": 8.840655511062734e-05, "loss": 0.82, "step": 69270 }, { "epoch": 0.4426101733897244, "grad_norm": 1.151459813117981, "learning_rate": 8.840334213067493e-05, "loss": 1.1619, "step": 69280 }, { "epoch": 0.44267406053946307, "grad_norm": 1.4183622598648071, "learning_rate": 8.840012876396765e-05, "loss": 0.9147, "step": 69290 }, { "epoch": 0.44273794768920177, "grad_norm": 0.7213853001594543, "learning_rate": 8.839691501053784e-05, "loss": 0.9717, "step": 69300 }, { "epoch": 0.4428018348389405, "grad_norm": 0.8650780320167542, "learning_rate": 8.839370087041787e-05, "loss": 1.0401, "step": 69310 }, { "epoch": 0.4428657219886792, "grad_norm": 0.9786863923072815, "learning_rate": 8.839048634364014e-05, "loss": 0.8482, "step": 69320 }, { "epoch": 0.4429296091384179, "grad_norm": 0.907888650894165, "learning_rate": 8.838727143023698e-05, "loss": 1.0272, "step": 69330 }, { "epoch": 0.4429934962881566, "grad_norm": 0.5181243419647217, "learning_rate": 8.83840561302408e-05, "loss": 1.1205, "step": 69340 }, { "epoch": 0.4430573834378953, "grad_norm": 1.089030146598816, "learning_rate": 8.838084044368396e-05, "loss": 0.9977, "step": 69350 }, { "epoch": 0.443121270587634, "grad_norm": 0.7841888070106506, "learning_rate": 8.837762437059884e-05, "loss": 0.9291, "step": 69360 }, { "epoch": 0.4431851577373727, "grad_norm": 0.706368088722229, "learning_rate": 8.837440791101787e-05, "loss": 0.8566, "step": 69370 }, { "epoch": 0.4432490448871114, "grad_norm": 0.8301064968109131, "learning_rate": 8.83711910649734e-05, "loss": 0.8693, "step": 69380 }, { "epoch": 0.4433129320368501, "grad_norm": 0.95965576171875, "learning_rate": 8.836797383249784e-05, "loss": 0.8735, "step": 69390 }, { "epoch": 0.44337681918658883, "grad_norm": 0.644489586353302, "learning_rate": 8.836475621362359e-05, "loss": 1.1971, "step": 69400 }, { "epoch": 0.4434407063363275, "grad_norm": 0.834976851940155, "learning_rate": 8.836153820838304e-05, "loss": 0.9157, "step": 69410 }, { "epoch": 0.4435045934860662, "grad_norm": 0.6156612634658813, "learning_rate": 8.835831981680864e-05, "loss": 0.7013, "step": 69420 }, { "epoch": 0.4435684806358049, "grad_norm": 0.5868956446647644, "learning_rate": 8.835510103893276e-05, "loss": 1.0189, "step": 69430 }, { "epoch": 0.4436323677855436, "grad_norm": 1.2473644018173218, "learning_rate": 8.835188187478782e-05, "loss": 0.7598, "step": 69440 }, { "epoch": 0.4436962549352823, "grad_norm": 1.8413316011428833, "learning_rate": 8.834866232440627e-05, "loss": 0.7408, "step": 69450 }, { "epoch": 0.443760142085021, "grad_norm": 1.211452603340149, "learning_rate": 8.83454423878205e-05, "loss": 0.9004, "step": 69460 }, { "epoch": 0.4438240292347597, "grad_norm": 1.3288507461547852, "learning_rate": 8.834222206506297e-05, "loss": 0.9584, "step": 69470 }, { "epoch": 0.4438879163844984, "grad_norm": 1.8809562921524048, "learning_rate": 8.833900135616608e-05, "loss": 0.7489, "step": 69480 }, { "epoch": 0.44395180353423713, "grad_norm": 0.8909973502159119, "learning_rate": 8.833578026116228e-05, "loss": 0.6701, "step": 69490 }, { "epoch": 0.44401569068397584, "grad_norm": 0.9204776287078857, "learning_rate": 8.833255878008402e-05, "loss": 0.8157, "step": 69500 }, { "epoch": 0.44407957783371454, "grad_norm": 0.722482442855835, "learning_rate": 8.832933691296371e-05, "loss": 1.1273, "step": 69510 }, { "epoch": 0.44414346498345325, "grad_norm": 0.8715541958808899, "learning_rate": 8.832611465983383e-05, "loss": 1.2145, "step": 69520 }, { "epoch": 0.44420735213319196, "grad_norm": 0.9637245535850525, "learning_rate": 8.832289202072681e-05, "loss": 0.973, "step": 69530 }, { "epoch": 0.4442712392829306, "grad_norm": 0.8205868005752563, "learning_rate": 8.831966899567512e-05, "loss": 0.7592, "step": 69540 }, { "epoch": 0.4443351264326693, "grad_norm": 1.58009672164917, "learning_rate": 8.831644558471122e-05, "loss": 1.2691, "step": 69550 }, { "epoch": 0.444399013582408, "grad_norm": 1.08955717086792, "learning_rate": 8.831322178786754e-05, "loss": 0.9777, "step": 69560 }, { "epoch": 0.4444629007321467, "grad_norm": 0.9413936138153076, "learning_rate": 8.830999760517659e-05, "loss": 1.029, "step": 69570 }, { "epoch": 0.44452678788188543, "grad_norm": 1.7037255764007568, "learning_rate": 8.830677303667081e-05, "loss": 1.2211, "step": 69580 }, { "epoch": 0.44459067503162414, "grad_norm": 1.0269652605056763, "learning_rate": 8.83035480823827e-05, "loss": 1.3183, "step": 69590 }, { "epoch": 0.44465456218136284, "grad_norm": 0.8793505430221558, "learning_rate": 8.830032274234472e-05, "loss": 0.9107, "step": 69600 }, { "epoch": 0.44471844933110155, "grad_norm": 1.2114499807357788, "learning_rate": 8.829709701658934e-05, "loss": 0.9675, "step": 69610 }, { "epoch": 0.44478233648084026, "grad_norm": 1.1938707828521729, "learning_rate": 8.82938709051491e-05, "loss": 0.85, "step": 69620 }, { "epoch": 0.44484622363057896, "grad_norm": 1.2485358715057373, "learning_rate": 8.829064440805641e-05, "loss": 0.8547, "step": 69630 }, { "epoch": 0.44491011078031767, "grad_norm": 0.7239115238189697, "learning_rate": 8.828741752534382e-05, "loss": 1.1611, "step": 69640 }, { "epoch": 0.4449739979300564, "grad_norm": 0.9881543517112732, "learning_rate": 8.82841902570438e-05, "loss": 0.668, "step": 69650 }, { "epoch": 0.445037885079795, "grad_norm": 1.0397281646728516, "learning_rate": 8.828096260318888e-05, "loss": 0.7762, "step": 69660 }, { "epoch": 0.44510177222953373, "grad_norm": 1.2399822473526, "learning_rate": 8.827773456381155e-05, "loss": 0.7973, "step": 69670 }, { "epoch": 0.44516565937927244, "grad_norm": 0.8497468829154968, "learning_rate": 8.82745061389443e-05, "loss": 0.9985, "step": 69680 }, { "epoch": 0.44522954652901114, "grad_norm": 0.73412024974823, "learning_rate": 8.827127732861967e-05, "loss": 0.812, "step": 69690 }, { "epoch": 0.44529343367874985, "grad_norm": 0.7889323234558105, "learning_rate": 8.826804813287017e-05, "loss": 0.9489, "step": 69700 }, { "epoch": 0.44535732082848856, "grad_norm": 0.7215690612792969, "learning_rate": 8.826481855172832e-05, "loss": 1.0469, "step": 69710 }, { "epoch": 0.44542120797822726, "grad_norm": 1.6253806352615356, "learning_rate": 8.826158858522665e-05, "loss": 0.7258, "step": 69720 }, { "epoch": 0.44548509512796597, "grad_norm": 1.0504227876663208, "learning_rate": 8.825835823339768e-05, "loss": 0.9111, "step": 69730 }, { "epoch": 0.4455489822777047, "grad_norm": 0.9772189855575562, "learning_rate": 8.825512749627393e-05, "loss": 0.9676, "step": 69740 }, { "epoch": 0.4456128694274434, "grad_norm": 0.7481646537780762, "learning_rate": 8.825189637388795e-05, "loss": 0.9435, "step": 69750 }, { "epoch": 0.4456767565771821, "grad_norm": 0.6458262801170349, "learning_rate": 8.824866486627231e-05, "loss": 0.9124, "step": 69760 }, { "epoch": 0.4457406437269208, "grad_norm": 0.9859530925750732, "learning_rate": 8.824543297345949e-05, "loss": 1.0758, "step": 69770 }, { "epoch": 0.44580453087665944, "grad_norm": 0.8648393750190735, "learning_rate": 8.82422006954821e-05, "loss": 1.0258, "step": 69780 }, { "epoch": 0.44586841802639815, "grad_norm": 2.013597249984741, "learning_rate": 8.823896803237264e-05, "loss": 0.9565, "step": 69790 }, { "epoch": 0.44593230517613686, "grad_norm": 0.8398522138595581, "learning_rate": 8.823573498416371e-05, "loss": 0.8652, "step": 69800 }, { "epoch": 0.44599619232587556, "grad_norm": 0.751560389995575, "learning_rate": 8.823250155088785e-05, "loss": 0.948, "step": 69810 }, { "epoch": 0.44606007947561427, "grad_norm": 0.7580850124359131, "learning_rate": 8.82292677325776e-05, "loss": 0.8804, "step": 69820 }, { "epoch": 0.446123966625353, "grad_norm": 2.6924216747283936, "learning_rate": 8.822603352926558e-05, "loss": 0.814, "step": 69830 }, { "epoch": 0.4461878537750917, "grad_norm": 1.0442085266113281, "learning_rate": 8.82227989409843e-05, "loss": 0.9705, "step": 69840 }, { "epoch": 0.4462517409248304, "grad_norm": 0.6417388319969177, "learning_rate": 8.821956396776641e-05, "loss": 0.8304, "step": 69850 }, { "epoch": 0.4463156280745691, "grad_norm": 0.49614080786705017, "learning_rate": 8.821632860964442e-05, "loss": 1.1193, "step": 69860 }, { "epoch": 0.4463795152243078, "grad_norm": 0.6962358355522156, "learning_rate": 8.821309286665094e-05, "loss": 1.02, "step": 69870 }, { "epoch": 0.4464434023740465, "grad_norm": 0.9865720868110657, "learning_rate": 8.820985673881857e-05, "loss": 1.191, "step": 69880 }, { "epoch": 0.4465072895237852, "grad_norm": 0.9626466631889343, "learning_rate": 8.820662022617987e-05, "loss": 0.9506, "step": 69890 }, { "epoch": 0.44657117667352386, "grad_norm": 0.40864917635917664, "learning_rate": 8.820338332876745e-05, "loss": 0.9994, "step": 69900 }, { "epoch": 0.44663506382326257, "grad_norm": 0.5569325089454651, "learning_rate": 8.82001460466139e-05, "loss": 0.8532, "step": 69910 }, { "epoch": 0.4466989509730013, "grad_norm": 0.6157374978065491, "learning_rate": 8.819690837975185e-05, "loss": 0.834, "step": 69920 }, { "epoch": 0.44676283812274, "grad_norm": 0.9512416124343872, "learning_rate": 8.819367032821389e-05, "loss": 0.7586, "step": 69930 }, { "epoch": 0.4468267252724787, "grad_norm": 0.6513834595680237, "learning_rate": 8.819043189203262e-05, "loss": 1.0077, "step": 69940 }, { "epoch": 0.4468906124222174, "grad_norm": 0.6988425254821777, "learning_rate": 8.818719307124066e-05, "loss": 0.9777, "step": 69950 }, { "epoch": 0.4469544995719561, "grad_norm": 0.7577906250953674, "learning_rate": 8.818395386587064e-05, "loss": 0.8364, "step": 69960 }, { "epoch": 0.4470183867216948, "grad_norm": 3.7700507640838623, "learning_rate": 8.818071427595515e-05, "loss": 0.9155, "step": 69970 }, { "epoch": 0.4470822738714335, "grad_norm": 0.9014910459518433, "learning_rate": 8.817747430152687e-05, "loss": 0.9437, "step": 69980 }, { "epoch": 0.4471461610211722, "grad_norm": 0.7106698155403137, "learning_rate": 8.817423394261837e-05, "loss": 1.2196, "step": 69990 }, { "epoch": 0.4472100481709109, "grad_norm": 0.7741692066192627, "learning_rate": 8.817099319926231e-05, "loss": 0.8265, "step": 70000 }, { "epoch": 0.44727393532064963, "grad_norm": 0.8128407001495361, "learning_rate": 8.816775207149133e-05, "loss": 0.7937, "step": 70010 }, { "epoch": 0.4473378224703883, "grad_norm": 1.0812875032424927, "learning_rate": 8.816451055933807e-05, "loss": 0.9699, "step": 70020 }, { "epoch": 0.447401709620127, "grad_norm": 0.8170537948608398, "learning_rate": 8.816126866283515e-05, "loss": 0.7516, "step": 70030 }, { "epoch": 0.4474655967698657, "grad_norm": 0.8234254121780396, "learning_rate": 8.815802638201527e-05, "loss": 0.7975, "step": 70040 }, { "epoch": 0.4475294839196044, "grad_norm": 0.5763027667999268, "learning_rate": 8.815478371691104e-05, "loss": 0.8927, "step": 70050 }, { "epoch": 0.4475933710693431, "grad_norm": 0.6996818780899048, "learning_rate": 8.815154066755514e-05, "loss": 0.7487, "step": 70060 }, { "epoch": 0.4476572582190818, "grad_norm": 1.1514983177185059, "learning_rate": 8.814829723398021e-05, "loss": 0.7932, "step": 70070 }, { "epoch": 0.4477211453688205, "grad_norm": 1.56476628780365, "learning_rate": 8.814505341621892e-05, "loss": 1.0774, "step": 70080 }, { "epoch": 0.4477850325185592, "grad_norm": 1.2454763650894165, "learning_rate": 8.814180921430395e-05, "loss": 0.7339, "step": 70090 }, { "epoch": 0.44784891966829793, "grad_norm": 0.7148693799972534, "learning_rate": 8.813856462826794e-05, "loss": 0.8958, "step": 70100 }, { "epoch": 0.44791280681803664, "grad_norm": 1.7030229568481445, "learning_rate": 8.813531965814363e-05, "loss": 0.9031, "step": 70110 }, { "epoch": 0.44797669396777534, "grad_norm": 0.7845126986503601, "learning_rate": 8.813207430396365e-05, "loss": 0.8665, "step": 70120 }, { "epoch": 0.44804058111751405, "grad_norm": 0.6932292580604553, "learning_rate": 8.812882856576066e-05, "loss": 0.9553, "step": 70130 }, { "epoch": 0.4481044682672527, "grad_norm": 1.0173585414886475, "learning_rate": 8.812558244356742e-05, "loss": 1.1723, "step": 70140 }, { "epoch": 0.4481683554169914, "grad_norm": 0.7353670597076416, "learning_rate": 8.812233593741655e-05, "loss": 0.8626, "step": 70150 }, { "epoch": 0.4482322425667301, "grad_norm": 0.4959295392036438, "learning_rate": 8.811908904734079e-05, "loss": 0.9257, "step": 70160 }, { "epoch": 0.4482961297164688, "grad_norm": 0.9090648889541626, "learning_rate": 8.811584177337281e-05, "loss": 0.6679, "step": 70170 }, { "epoch": 0.4483600168662075, "grad_norm": 0.840734601020813, "learning_rate": 8.811259411554536e-05, "loss": 0.7846, "step": 70180 }, { "epoch": 0.44842390401594623, "grad_norm": 0.8319433927536011, "learning_rate": 8.81093460738911e-05, "loss": 0.9433, "step": 70190 }, { "epoch": 0.44848779116568493, "grad_norm": 0.5484992265701294, "learning_rate": 8.810609764844276e-05, "loss": 0.8511, "step": 70200 }, { "epoch": 0.44855167831542364, "grad_norm": 0.8629337549209595, "learning_rate": 8.810284883923304e-05, "loss": 1.033, "step": 70210 }, { "epoch": 0.44861556546516235, "grad_norm": 0.8372594118118286, "learning_rate": 8.809959964629467e-05, "loss": 0.6458, "step": 70220 }, { "epoch": 0.44867945261490105, "grad_norm": 0.6603564620018005, "learning_rate": 8.809635006966037e-05, "loss": 0.9905, "step": 70230 }, { "epoch": 0.44874333976463976, "grad_norm": 0.7497221231460571, "learning_rate": 8.809310010936288e-05, "loss": 0.9827, "step": 70240 }, { "epoch": 0.44880722691437847, "grad_norm": 0.6426061987876892, "learning_rate": 8.80898497654349e-05, "loss": 0.9913, "step": 70250 }, { "epoch": 0.4488711140641171, "grad_norm": 1.1607120037078857, "learning_rate": 8.808659903790919e-05, "loss": 0.9618, "step": 70260 }, { "epoch": 0.4489350012138558, "grad_norm": 0.6175957322120667, "learning_rate": 8.808334792681848e-05, "loss": 0.7507, "step": 70270 }, { "epoch": 0.44899888836359453, "grad_norm": 0.96190345287323, "learning_rate": 8.80800964321955e-05, "loss": 0.7085, "step": 70280 }, { "epoch": 0.44906277551333323, "grad_norm": 2.022925615310669, "learning_rate": 8.807684455407301e-05, "loss": 0.997, "step": 70290 }, { "epoch": 0.44912666266307194, "grad_norm": 0.8769704103469849, "learning_rate": 8.807359229248376e-05, "loss": 1.0706, "step": 70300 }, { "epoch": 0.44919054981281065, "grad_norm": 0.8224455118179321, "learning_rate": 8.80703396474605e-05, "loss": 1.1076, "step": 70310 }, { "epoch": 0.44925443696254935, "grad_norm": 0.5599127411842346, "learning_rate": 8.806708661903598e-05, "loss": 0.7477, "step": 70320 }, { "epoch": 0.44931832411228806, "grad_norm": 0.8950123190879822, "learning_rate": 8.806383320724295e-05, "loss": 0.9558, "step": 70330 }, { "epoch": 0.44938221126202677, "grad_norm": 0.7584883570671082, "learning_rate": 8.80605794121142e-05, "loss": 1.051, "step": 70340 }, { "epoch": 0.44944609841176547, "grad_norm": 0.7890920042991638, "learning_rate": 8.805732523368249e-05, "loss": 0.747, "step": 70350 }, { "epoch": 0.4495099855615042, "grad_norm": 0.7396231889724731, "learning_rate": 8.805407067198059e-05, "loss": 0.9456, "step": 70360 }, { "epoch": 0.4495738727112429, "grad_norm": 1.3219010829925537, "learning_rate": 8.805081572704128e-05, "loss": 0.785, "step": 70370 }, { "epoch": 0.4496377598609816, "grad_norm": 0.5966509580612183, "learning_rate": 8.804756039889735e-05, "loss": 1.1855, "step": 70380 }, { "epoch": 0.44970164701072024, "grad_norm": 0.9530605673789978, "learning_rate": 8.804430468758153e-05, "loss": 0.8681, "step": 70390 }, { "epoch": 0.44976553416045895, "grad_norm": 0.8958638310432434, "learning_rate": 8.804104859312668e-05, "loss": 1.0291, "step": 70400 }, { "epoch": 0.44982942131019765, "grad_norm": 0.8644607663154602, "learning_rate": 8.803779211556555e-05, "loss": 0.7294, "step": 70410 }, { "epoch": 0.44989330845993636, "grad_norm": 0.8499729037284851, "learning_rate": 8.803453525493096e-05, "loss": 0.8267, "step": 70420 }, { "epoch": 0.44995719560967506, "grad_norm": 0.8581739068031311, "learning_rate": 8.803127801125568e-05, "loss": 0.7154, "step": 70430 }, { "epoch": 0.45002108275941377, "grad_norm": 0.8350471258163452, "learning_rate": 8.802802038457253e-05, "loss": 0.9061, "step": 70440 }, { "epoch": 0.4500849699091525, "grad_norm": 0.8576902747154236, "learning_rate": 8.802476237491433e-05, "loss": 0.9597, "step": 70450 }, { "epoch": 0.4501488570588912, "grad_norm": 0.8955521583557129, "learning_rate": 8.802150398231387e-05, "loss": 1.0067, "step": 70460 }, { "epoch": 0.4502127442086299, "grad_norm": 0.8049098253250122, "learning_rate": 8.801824520680397e-05, "loss": 1.002, "step": 70470 }, { "epoch": 0.4502766313583686, "grad_norm": 0.8177332878112793, "learning_rate": 8.801498604841745e-05, "loss": 0.9605, "step": 70480 }, { "epoch": 0.4503405185081073, "grad_norm": 0.5897266864776611, "learning_rate": 8.801172650718711e-05, "loss": 0.9324, "step": 70490 }, { "epoch": 0.450404405657846, "grad_norm": 0.7611057758331299, "learning_rate": 8.800846658314583e-05, "loss": 1.1378, "step": 70500 }, { "epoch": 0.45046829280758466, "grad_norm": 0.9269735813140869, "learning_rate": 8.80052062763264e-05, "loss": 0.7767, "step": 70510 }, { "epoch": 0.45053217995732336, "grad_norm": 0.7874916791915894, "learning_rate": 8.800194558676167e-05, "loss": 1.0075, "step": 70520 }, { "epoch": 0.45059606710706207, "grad_norm": 0.5338902473449707, "learning_rate": 8.799868451448446e-05, "loss": 0.7581, "step": 70530 }, { "epoch": 0.4506599542568008, "grad_norm": 0.6649864315986633, "learning_rate": 8.799542305952764e-05, "loss": 1.309, "step": 70540 }, { "epoch": 0.4507238414065395, "grad_norm": 0.5478102564811707, "learning_rate": 8.799216122192402e-05, "loss": 0.9979, "step": 70550 }, { "epoch": 0.4507877285562782, "grad_norm": 0.48872268199920654, "learning_rate": 8.798889900170648e-05, "loss": 0.862, "step": 70560 }, { "epoch": 0.4508516157060169, "grad_norm": 1.0504260063171387, "learning_rate": 8.798563639890786e-05, "loss": 0.8303, "step": 70570 }, { "epoch": 0.4509155028557556, "grad_norm": 0.7641623616218567, "learning_rate": 8.798237341356102e-05, "loss": 0.8984, "step": 70580 }, { "epoch": 0.4509793900054943, "grad_norm": 0.8579826951026917, "learning_rate": 8.797911004569882e-05, "loss": 0.8908, "step": 70590 }, { "epoch": 0.451043277155233, "grad_norm": 1.6578333377838135, "learning_rate": 8.797584629535412e-05, "loss": 1.0401, "step": 70600 }, { "epoch": 0.4511071643049717, "grad_norm": 0.901781439781189, "learning_rate": 8.79725821625598e-05, "loss": 1.1365, "step": 70610 }, { "epoch": 0.4511710514547104, "grad_norm": 1.3552802801132202, "learning_rate": 8.796931764734873e-05, "loss": 0.9429, "step": 70620 }, { "epoch": 0.4512349386044491, "grad_norm": 0.5758177638053894, "learning_rate": 8.796605274975377e-05, "loss": 0.9782, "step": 70630 }, { "epoch": 0.4512988257541878, "grad_norm": 0.6553876996040344, "learning_rate": 8.796278746980782e-05, "loss": 0.9537, "step": 70640 }, { "epoch": 0.4513627129039265, "grad_norm": 0.6024998426437378, "learning_rate": 8.795952180754376e-05, "loss": 0.9083, "step": 70650 }, { "epoch": 0.4514266000536652, "grad_norm": 0.521595299243927, "learning_rate": 8.795625576299447e-05, "loss": 0.8758, "step": 70660 }, { "epoch": 0.4514904872034039, "grad_norm": 0.9571405053138733, "learning_rate": 8.795298933619284e-05, "loss": 1.0593, "step": 70670 }, { "epoch": 0.4515543743531426, "grad_norm": 1.07502281665802, "learning_rate": 8.79497225271718e-05, "loss": 0.8501, "step": 70680 }, { "epoch": 0.4516182615028813, "grad_norm": 1.2445697784423828, "learning_rate": 8.794645533596422e-05, "loss": 0.8585, "step": 70690 }, { "epoch": 0.45168214865262, "grad_norm": 0.8725454211235046, "learning_rate": 8.794318776260299e-05, "loss": 1.109, "step": 70700 }, { "epoch": 0.4517460358023587, "grad_norm": 0.6738957762718201, "learning_rate": 8.793991980712103e-05, "loss": 0.8554, "step": 70710 }, { "epoch": 0.45180992295209743, "grad_norm": 0.6663877964019775, "learning_rate": 8.793665146955127e-05, "loss": 0.8995, "step": 70720 }, { "epoch": 0.45187381010183614, "grad_norm": 1.05771005153656, "learning_rate": 8.79333827499266e-05, "loss": 0.8408, "step": 70730 }, { "epoch": 0.45193769725157484, "grad_norm": 0.8179357051849365, "learning_rate": 8.793011364827995e-05, "loss": 0.9386, "step": 70740 }, { "epoch": 0.4520015844013135, "grad_norm": 0.8579227328300476, "learning_rate": 8.792684416464425e-05, "loss": 0.9987, "step": 70750 }, { "epoch": 0.4520654715510522, "grad_norm": 1.311963438987732, "learning_rate": 8.79235742990524e-05, "loss": 0.8236, "step": 70760 }, { "epoch": 0.4521293587007909, "grad_norm": 1.2034355401992798, "learning_rate": 8.792030405153736e-05, "loss": 1.0315, "step": 70770 }, { "epoch": 0.4521932458505296, "grad_norm": 0.9574033617973328, "learning_rate": 8.791703342213205e-05, "loss": 0.9112, "step": 70780 }, { "epoch": 0.4522571330002683, "grad_norm": 0.6111446022987366, "learning_rate": 8.791376241086942e-05, "loss": 0.8271, "step": 70790 }, { "epoch": 0.452321020150007, "grad_norm": 0.5406328439712524, "learning_rate": 8.791049101778239e-05, "loss": 0.6997, "step": 70800 }, { "epoch": 0.45238490729974573, "grad_norm": 0.4767141044139862, "learning_rate": 8.790721924290393e-05, "loss": 0.8203, "step": 70810 }, { "epoch": 0.45244879444948444, "grad_norm": 1.9145870208740234, "learning_rate": 8.790394708626697e-05, "loss": 1.0273, "step": 70820 }, { "epoch": 0.45251268159922314, "grad_norm": 1.5563242435455322, "learning_rate": 8.790067454790447e-05, "loss": 1.1937, "step": 70830 }, { "epoch": 0.45257656874896185, "grad_norm": 0.6038081645965576, "learning_rate": 8.789740162784939e-05, "loss": 0.9115, "step": 70840 }, { "epoch": 0.45264045589870056, "grad_norm": 0.8606191873550415, "learning_rate": 8.789412832613468e-05, "loss": 0.8494, "step": 70850 }, { "epoch": 0.45270434304843926, "grad_norm": 0.9102177023887634, "learning_rate": 8.789085464279334e-05, "loss": 0.9009, "step": 70860 }, { "epoch": 0.4527682301981779, "grad_norm": 0.8726232051849365, "learning_rate": 8.788758057785828e-05, "loss": 0.9052, "step": 70870 }, { "epoch": 0.4528321173479166, "grad_norm": 0.8285619020462036, "learning_rate": 8.788430613136254e-05, "loss": 0.9528, "step": 70880 }, { "epoch": 0.4528960044976553, "grad_norm": 1.2340794801712036, "learning_rate": 8.788103130333905e-05, "loss": 0.8517, "step": 70890 }, { "epoch": 0.45295989164739403, "grad_norm": 0.5685308575630188, "learning_rate": 8.787775609382078e-05, "loss": 0.9504, "step": 70900 }, { "epoch": 0.45302377879713274, "grad_norm": 0.7877033948898315, "learning_rate": 8.787448050284077e-05, "loss": 0.8238, "step": 70910 }, { "epoch": 0.45308766594687144, "grad_norm": 1.047734260559082, "learning_rate": 8.787120453043196e-05, "loss": 1.0679, "step": 70920 }, { "epoch": 0.45315155309661015, "grad_norm": 0.5385513305664062, "learning_rate": 8.786792817662737e-05, "loss": 0.8655, "step": 70930 }, { "epoch": 0.45321544024634886, "grad_norm": 0.9814597964286804, "learning_rate": 8.786465144145996e-05, "loss": 0.9863, "step": 70940 }, { "epoch": 0.45327932739608756, "grad_norm": 0.7968815565109253, "learning_rate": 8.786137432496278e-05, "loss": 0.8118, "step": 70950 }, { "epoch": 0.45334321454582627, "grad_norm": 1.0466378927230835, "learning_rate": 8.785809682716879e-05, "loss": 0.8782, "step": 70960 }, { "epoch": 0.453407101695565, "grad_norm": 1.5503062009811401, "learning_rate": 8.7854818948111e-05, "loss": 1.0405, "step": 70970 }, { "epoch": 0.4534709888453037, "grad_norm": 0.6179012060165405, "learning_rate": 8.785154068782246e-05, "loss": 0.8444, "step": 70980 }, { "epoch": 0.45353487599504233, "grad_norm": 1.3943589925765991, "learning_rate": 8.784826204633614e-05, "loss": 0.8734, "step": 70990 }, { "epoch": 0.45359876314478104, "grad_norm": 0.8613284230232239, "learning_rate": 8.784498302368508e-05, "loss": 0.7613, "step": 71000 }, { "epoch": 0.45366265029451974, "grad_norm": 1.1512913703918457, "learning_rate": 8.784170361990232e-05, "loss": 0.7816, "step": 71010 }, { "epoch": 0.45372653744425845, "grad_norm": 0.8666269779205322, "learning_rate": 8.783842383502084e-05, "loss": 1.043, "step": 71020 }, { "epoch": 0.45379042459399715, "grad_norm": 0.900255560874939, "learning_rate": 8.783514366907371e-05, "loss": 0.9663, "step": 71030 }, { "epoch": 0.45385431174373586, "grad_norm": 1.041473150253296, "learning_rate": 8.783186312209395e-05, "loss": 1.0225, "step": 71040 }, { "epoch": 0.45391819889347457, "grad_norm": 1.203635811805725, "learning_rate": 8.78285821941146e-05, "loss": 0.8201, "step": 71050 }, { "epoch": 0.4539820860432133, "grad_norm": 1.2860292196273804, "learning_rate": 8.782530088516869e-05, "loss": 1.0791, "step": 71060 }, { "epoch": 0.454045973192952, "grad_norm": 0.9229752421379089, "learning_rate": 8.782201919528929e-05, "loss": 0.9097, "step": 71070 }, { "epoch": 0.4541098603426907, "grad_norm": 0.5410824418067932, "learning_rate": 8.7819065348727e-05, "loss": 1.1974, "step": 71080 }, { "epoch": 0.4541737474924294, "grad_norm": 1.527845025062561, "learning_rate": 8.7815782935165e-05, "loss": 0.9155, "step": 71090 }, { "epoch": 0.4542376346421681, "grad_norm": 0.6132227182388306, "learning_rate": 8.781250014076534e-05, "loss": 0.9264, "step": 71100 }, { "epoch": 0.45430152179190675, "grad_norm": 1.0122579336166382, "learning_rate": 8.78092169655611e-05, "loss": 1.126, "step": 71110 }, { "epoch": 0.45436540894164545, "grad_norm": 0.9451808333396912, "learning_rate": 8.780593340958535e-05, "loss": 1.0451, "step": 71120 }, { "epoch": 0.45442929609138416, "grad_norm": 1.5981924533843994, "learning_rate": 8.780264947287111e-05, "loss": 1.1555, "step": 71130 }, { "epoch": 0.45449318324112287, "grad_norm": 0.8764825463294983, "learning_rate": 8.779936515545151e-05, "loss": 0.8937, "step": 71140 }, { "epoch": 0.4545570703908616, "grad_norm": 0.6347659230232239, "learning_rate": 8.779608045735959e-05, "loss": 1.2468, "step": 71150 }, { "epoch": 0.4546209575406003, "grad_norm": 0.9502388834953308, "learning_rate": 8.779279537862844e-05, "loss": 0.791, "step": 71160 }, { "epoch": 0.454684844690339, "grad_norm": 0.9341233968734741, "learning_rate": 8.778950991929114e-05, "loss": 0.9172, "step": 71170 }, { "epoch": 0.4547487318400777, "grad_norm": 0.7763635516166687, "learning_rate": 8.77862240793808e-05, "loss": 0.9442, "step": 71180 }, { "epoch": 0.4548126189898164, "grad_norm": 1.2328989505767822, "learning_rate": 8.778293785893048e-05, "loss": 0.7446, "step": 71190 }, { "epoch": 0.4548765061395551, "grad_norm": 0.6243307590484619, "learning_rate": 8.777965125797329e-05, "loss": 0.8242, "step": 71200 }, { "epoch": 0.4549403932892938, "grad_norm": 0.7185580134391785, "learning_rate": 8.777636427654234e-05, "loss": 1.0433, "step": 71210 }, { "epoch": 0.4550042804390325, "grad_norm": 0.7410394549369812, "learning_rate": 8.777307691467072e-05, "loss": 0.9533, "step": 71220 }, { "epoch": 0.4550681675887712, "grad_norm": 0.8406373858451843, "learning_rate": 8.776978917239153e-05, "loss": 0.9858, "step": 71230 }, { "epoch": 0.4551320547385099, "grad_norm": 1.1634323596954346, "learning_rate": 8.776650104973789e-05, "loss": 0.8353, "step": 71240 }, { "epoch": 0.4551959418882486, "grad_norm": 0.847737729549408, "learning_rate": 8.776321254674291e-05, "loss": 0.6618, "step": 71250 }, { "epoch": 0.4552598290379873, "grad_norm": 0.853600025177002, "learning_rate": 8.77599236634397e-05, "loss": 1.011, "step": 71260 }, { "epoch": 0.455323716187726, "grad_norm": 0.6608572602272034, "learning_rate": 8.77566343998614e-05, "loss": 0.988, "step": 71270 }, { "epoch": 0.4553876033374647, "grad_norm": 0.8262060284614563, "learning_rate": 8.775334475604114e-05, "loss": 1.2176, "step": 71280 }, { "epoch": 0.4554514904872034, "grad_norm": 0.7335585355758667, "learning_rate": 8.775005473201202e-05, "loss": 0.9556, "step": 71290 }, { "epoch": 0.4555153776369421, "grad_norm": 1.2570284605026245, "learning_rate": 8.774676432780719e-05, "loss": 1.0209, "step": 71300 }, { "epoch": 0.4555792647866808, "grad_norm": 0.6992619037628174, "learning_rate": 8.774347354345979e-05, "loss": 0.8241, "step": 71310 }, { "epoch": 0.4556431519364195, "grad_norm": 1.3729963302612305, "learning_rate": 8.774018237900297e-05, "loss": 0.9433, "step": 71320 }, { "epoch": 0.4557070390861582, "grad_norm": 0.9504528045654297, "learning_rate": 8.773689083446986e-05, "loss": 0.8976, "step": 71330 }, { "epoch": 0.45577092623589693, "grad_norm": 0.9891476035118103, "learning_rate": 8.773359890989361e-05, "loss": 0.975, "step": 71340 }, { "epoch": 0.45583481338563564, "grad_norm": 1.9540566205978394, "learning_rate": 8.773030660530736e-05, "loss": 1.0329, "step": 71350 }, { "epoch": 0.4558987005353743, "grad_norm": 0.9334406852722168, "learning_rate": 8.77270139207443e-05, "loss": 0.8753, "step": 71360 }, { "epoch": 0.455962587685113, "grad_norm": 0.9712562561035156, "learning_rate": 8.772372085623756e-05, "loss": 1.0013, "step": 71370 }, { "epoch": 0.4560264748348517, "grad_norm": 1.0713132619857788, "learning_rate": 8.772042741182034e-05, "loss": 0.9794, "step": 71380 }, { "epoch": 0.4560903619845904, "grad_norm": 1.1477205753326416, "learning_rate": 8.771713358752575e-05, "loss": 0.9437, "step": 71390 }, { "epoch": 0.4561542491343291, "grad_norm": 0.9125179052352905, "learning_rate": 8.771383938338702e-05, "loss": 1.0213, "step": 71400 }, { "epoch": 0.4562181362840678, "grad_norm": 0.7508702278137207, "learning_rate": 8.771054479943728e-05, "loss": 1.0244, "step": 71410 }, { "epoch": 0.4562820234338065, "grad_norm": 0.7568835616111755, "learning_rate": 8.770724983570974e-05, "loss": 1.0994, "step": 71420 }, { "epoch": 0.45634591058354523, "grad_norm": 1.1442028284072876, "learning_rate": 8.770395449223758e-05, "loss": 0.768, "step": 71430 }, { "epoch": 0.45640979773328394, "grad_norm": 1.0177857875823975, "learning_rate": 8.770065876905396e-05, "loss": 0.762, "step": 71440 }, { "epoch": 0.45647368488302265, "grad_norm": 0.8974249362945557, "learning_rate": 8.76973626661921e-05, "loss": 0.8523, "step": 71450 }, { "epoch": 0.45653757203276135, "grad_norm": 1.9021356105804443, "learning_rate": 8.769406618368519e-05, "loss": 1.068, "step": 71460 }, { "epoch": 0.45660145918250006, "grad_norm": 0.6922752857208252, "learning_rate": 8.769076932156642e-05, "loss": 0.9142, "step": 71470 }, { "epoch": 0.4566653463322387, "grad_norm": 0.853961706161499, "learning_rate": 8.7687472079869e-05, "loss": 0.8104, "step": 71480 }, { "epoch": 0.4567292334819774, "grad_norm": 0.6722909212112427, "learning_rate": 8.768417445862613e-05, "loss": 0.8424, "step": 71490 }, { "epoch": 0.4567931206317161, "grad_norm": 0.8289230465888977, "learning_rate": 8.768087645787102e-05, "loss": 1.0402, "step": 71500 }, { "epoch": 0.4568570077814548, "grad_norm": 0.9513195157051086, "learning_rate": 8.767757807763687e-05, "loss": 0.8345, "step": 71510 }, { "epoch": 0.45692089493119353, "grad_norm": 1.016177773475647, "learning_rate": 8.767427931795694e-05, "loss": 1.0672, "step": 71520 }, { "epoch": 0.45698478208093224, "grad_norm": 0.815597653388977, "learning_rate": 8.767098017886442e-05, "loss": 0.9043, "step": 71530 }, { "epoch": 0.45704866923067095, "grad_norm": 0.6468181610107422, "learning_rate": 8.766768066039252e-05, "loss": 0.7424, "step": 71540 }, { "epoch": 0.45711255638040965, "grad_norm": 1.0520371198654175, "learning_rate": 8.76643807625745e-05, "loss": 0.7597, "step": 71550 }, { "epoch": 0.45717644353014836, "grad_norm": 0.770429790019989, "learning_rate": 8.766108048544359e-05, "loss": 0.9429, "step": 71560 }, { "epoch": 0.45724033067988706, "grad_norm": 1.4733335971832275, "learning_rate": 8.7657779829033e-05, "loss": 0.9293, "step": 71570 }, { "epoch": 0.45730421782962577, "grad_norm": 0.8638445138931274, "learning_rate": 8.765447879337601e-05, "loss": 0.9278, "step": 71580 }, { "epoch": 0.4573681049793645, "grad_norm": 1.1189749240875244, "learning_rate": 8.765117737850584e-05, "loss": 0.9357, "step": 71590 }, { "epoch": 0.4574319921291031, "grad_norm": 0.82524174451828, "learning_rate": 8.764787558445573e-05, "loss": 1.1374, "step": 71600 }, { "epoch": 0.45749587927884183, "grad_norm": 1.3126778602600098, "learning_rate": 8.764457341125894e-05, "loss": 1.1194, "step": 71610 }, { "epoch": 0.45755976642858054, "grad_norm": 0.6606014370918274, "learning_rate": 8.764127085894874e-05, "loss": 0.815, "step": 71620 }, { "epoch": 0.45762365357831924, "grad_norm": 0.8767343163490295, "learning_rate": 8.763796792755836e-05, "loss": 1.0984, "step": 71630 }, { "epoch": 0.45768754072805795, "grad_norm": 0.6078357100486755, "learning_rate": 8.763466461712108e-05, "loss": 0.9051, "step": 71640 }, { "epoch": 0.45775142787779666, "grad_norm": 0.7068758010864258, "learning_rate": 8.763136092767019e-05, "loss": 0.8346, "step": 71650 }, { "epoch": 0.45781531502753536, "grad_norm": 1.372742772102356, "learning_rate": 8.762805685923894e-05, "loss": 0.9978, "step": 71660 }, { "epoch": 0.45787920217727407, "grad_norm": 0.9161148071289062, "learning_rate": 8.762475241186059e-05, "loss": 1.0414, "step": 71670 }, { "epoch": 0.4579430893270128, "grad_norm": 0.8733229637145996, "learning_rate": 8.762144758556846e-05, "loss": 0.5721, "step": 71680 }, { "epoch": 0.4580069764767515, "grad_norm": 0.966275691986084, "learning_rate": 8.761814238039576e-05, "loss": 1.0169, "step": 71690 }, { "epoch": 0.4580708636264902, "grad_norm": 0.8296000957489014, "learning_rate": 8.761483679637585e-05, "loss": 1.0115, "step": 71700 }, { "epoch": 0.4581347507762289, "grad_norm": 0.718637228012085, "learning_rate": 8.761153083354198e-05, "loss": 0.9809, "step": 71710 }, { "epoch": 0.45819863792596754, "grad_norm": 0.6848533153533936, "learning_rate": 8.760822449192747e-05, "loss": 0.9013, "step": 71720 }, { "epoch": 0.45826252507570625, "grad_norm": 0.9018274545669556, "learning_rate": 8.760491777156561e-05, "loss": 1.2462, "step": 71730 }, { "epoch": 0.45832641222544496, "grad_norm": 0.8693473935127258, "learning_rate": 8.760161067248968e-05, "loss": 1.1541, "step": 71740 }, { "epoch": 0.45839029937518366, "grad_norm": 0.7893520593643188, "learning_rate": 8.759830319473302e-05, "loss": 0.7432, "step": 71750 }, { "epoch": 0.45845418652492237, "grad_norm": 0.704264223575592, "learning_rate": 8.759499533832889e-05, "loss": 0.653, "step": 71760 }, { "epoch": 0.4585180736746611, "grad_norm": 0.7048154473304749, "learning_rate": 8.759168710331064e-05, "loss": 0.7997, "step": 71770 }, { "epoch": 0.4585819608243998, "grad_norm": 0.891446590423584, "learning_rate": 8.75883784897116e-05, "loss": 0.8512, "step": 71780 }, { "epoch": 0.4586458479741385, "grad_norm": 0.7560920715332031, "learning_rate": 8.758506949756505e-05, "loss": 1.0229, "step": 71790 }, { "epoch": 0.4587097351238772, "grad_norm": 0.9192051887512207, "learning_rate": 8.758176012690433e-05, "loss": 0.8684, "step": 71800 }, { "epoch": 0.4587736222736159, "grad_norm": 0.9770452380180359, "learning_rate": 8.757845037776279e-05, "loss": 0.7901, "step": 71810 }, { "epoch": 0.4588375094233546, "grad_norm": 0.8596158623695374, "learning_rate": 8.757514025017374e-05, "loss": 1.098, "step": 71820 }, { "epoch": 0.4589013965730933, "grad_norm": 0.8267128467559814, "learning_rate": 8.757182974417051e-05, "loss": 1.2681, "step": 71830 }, { "epoch": 0.45896528372283196, "grad_norm": 0.850165069103241, "learning_rate": 8.756851885978646e-05, "loss": 1.0255, "step": 71840 }, { "epoch": 0.45902917087257067, "grad_norm": 0.8577935695648193, "learning_rate": 8.756520759705494e-05, "loss": 0.729, "step": 71850 }, { "epoch": 0.4590930580223094, "grad_norm": 0.9524192214012146, "learning_rate": 8.756189595600924e-05, "loss": 0.7808, "step": 71860 }, { "epoch": 0.4591569451720481, "grad_norm": 0.7128868699073792, "learning_rate": 8.755858393668278e-05, "loss": 0.9852, "step": 71870 }, { "epoch": 0.4592208323217868, "grad_norm": 0.7015582919120789, "learning_rate": 8.755527153910888e-05, "loss": 1.0348, "step": 71880 }, { "epoch": 0.4592847194715255, "grad_norm": 0.7809120416641235, "learning_rate": 8.755195876332092e-05, "loss": 0.9461, "step": 71890 }, { "epoch": 0.4593486066212642, "grad_norm": 1.345109224319458, "learning_rate": 8.754864560935223e-05, "loss": 0.7244, "step": 71900 }, { "epoch": 0.4594124937710029, "grad_norm": 1.41587233543396, "learning_rate": 8.75453320772362e-05, "loss": 1.0676, "step": 71910 }, { "epoch": 0.4594763809207416, "grad_norm": 0.6541619300842285, "learning_rate": 8.754201816700619e-05, "loss": 0.7355, "step": 71920 }, { "epoch": 0.4595402680704803, "grad_norm": 0.9459201693534851, "learning_rate": 8.753870387869558e-05, "loss": 0.9362, "step": 71930 }, { "epoch": 0.459604155220219, "grad_norm": 1.1776293516159058, "learning_rate": 8.753538921233776e-05, "loss": 1.0149, "step": 71940 }, { "epoch": 0.45966804236995773, "grad_norm": 0.7286693453788757, "learning_rate": 8.753207416796608e-05, "loss": 0.8923, "step": 71950 }, { "epoch": 0.4597319295196964, "grad_norm": 0.6185081601142883, "learning_rate": 8.752875874561395e-05, "loss": 0.7427, "step": 71960 }, { "epoch": 0.4597958166694351, "grad_norm": 1.2175235748291016, "learning_rate": 8.752544294531474e-05, "loss": 0.8131, "step": 71970 }, { "epoch": 0.4598597038191738, "grad_norm": 1.1320040225982666, "learning_rate": 8.752212676710188e-05, "loss": 0.9201, "step": 71980 }, { "epoch": 0.4599235909689125, "grad_norm": 0.9990871548652649, "learning_rate": 8.751881021100874e-05, "loss": 0.7032, "step": 71990 }, { "epoch": 0.4599874781186512, "grad_norm": 0.7369240522384644, "learning_rate": 8.751549327706872e-05, "loss": 0.7436, "step": 72000 }, { "epoch": 0.4600513652683899, "grad_norm": 1.1463274955749512, "learning_rate": 8.75121759653152e-05, "loss": 0.8619, "step": 72010 }, { "epoch": 0.4601152524181286, "grad_norm": 1.0860117673873901, "learning_rate": 8.750885827578165e-05, "loss": 0.8812, "step": 72020 }, { "epoch": 0.4601791395678673, "grad_norm": 1.319459319114685, "learning_rate": 8.750554020850144e-05, "loss": 0.8778, "step": 72030 }, { "epoch": 0.46024302671760603, "grad_norm": 0.8144885301589966, "learning_rate": 8.750222176350798e-05, "loss": 0.9735, "step": 72040 }, { "epoch": 0.46030691386734474, "grad_norm": 0.9385289549827576, "learning_rate": 8.749890294083471e-05, "loss": 0.9461, "step": 72050 }, { "epoch": 0.46037080101708344, "grad_norm": 0.8200061321258545, "learning_rate": 8.749558374051505e-05, "loss": 1.1353, "step": 72060 }, { "epoch": 0.46043468816682215, "grad_norm": 1.0239852666854858, "learning_rate": 8.749226416258242e-05, "loss": 0.8544, "step": 72070 }, { "epoch": 0.46049857531656085, "grad_norm": 0.8354944586753845, "learning_rate": 8.748894420707025e-05, "loss": 0.9432, "step": 72080 }, { "epoch": 0.4605624624662995, "grad_norm": 1.8811396360397339, "learning_rate": 8.748562387401197e-05, "loss": 1.0131, "step": 72090 }, { "epoch": 0.4606263496160382, "grad_norm": 0.9125493764877319, "learning_rate": 8.748230316344106e-05, "loss": 0.8341, "step": 72100 }, { "epoch": 0.4606902367657769, "grad_norm": 0.9623830914497375, "learning_rate": 8.747898207539092e-05, "loss": 0.8355, "step": 72110 }, { "epoch": 0.4607541239155156, "grad_norm": 0.845895528793335, "learning_rate": 8.747566060989498e-05, "loss": 0.6371, "step": 72120 }, { "epoch": 0.46081801106525433, "grad_norm": 1.4472181797027588, "learning_rate": 8.747233876698674e-05, "loss": 0.6948, "step": 72130 }, { "epoch": 0.46088189821499304, "grad_norm": 1.0555568933486938, "learning_rate": 8.746901654669962e-05, "loss": 0.984, "step": 72140 }, { "epoch": 0.46094578536473174, "grad_norm": 0.6194450259208679, "learning_rate": 8.746569394906709e-05, "loss": 0.7546, "step": 72150 }, { "epoch": 0.46100967251447045, "grad_norm": 0.5184745192527771, "learning_rate": 8.746237097412262e-05, "loss": 0.6829, "step": 72160 }, { "epoch": 0.46107355966420915, "grad_norm": 0.696123480796814, "learning_rate": 8.745904762189966e-05, "loss": 0.9351, "step": 72170 }, { "epoch": 0.46113744681394786, "grad_norm": 0.8792576789855957, "learning_rate": 8.745572389243168e-05, "loss": 0.8513, "step": 72180 }, { "epoch": 0.46120133396368657, "grad_norm": 0.8823778629302979, "learning_rate": 8.745239978575215e-05, "loss": 1.1258, "step": 72190 }, { "epoch": 0.46126522111342527, "grad_norm": 0.9086830019950867, "learning_rate": 8.744907530189457e-05, "loss": 1.0118, "step": 72200 }, { "epoch": 0.4613291082631639, "grad_norm": 1.1880900859832764, "learning_rate": 8.74457504408924e-05, "loss": 0.8436, "step": 72210 }, { "epoch": 0.46139299541290263, "grad_norm": 1.3979392051696777, "learning_rate": 8.744242520277912e-05, "loss": 0.8647, "step": 72220 }, { "epoch": 0.46145688256264134, "grad_norm": 1.0479652881622314, "learning_rate": 8.743909958758823e-05, "loss": 0.8345, "step": 72230 }, { "epoch": 0.46152076971238004, "grad_norm": 0.9071272015571594, "learning_rate": 8.743577359535321e-05, "loss": 0.9718, "step": 72240 }, { "epoch": 0.46158465686211875, "grad_norm": 0.838445246219635, "learning_rate": 8.743244722610757e-05, "loss": 0.9786, "step": 72250 }, { "epoch": 0.46164854401185745, "grad_norm": 0.6116316914558411, "learning_rate": 8.742912047988481e-05, "loss": 1.0287, "step": 72260 }, { "epoch": 0.46171243116159616, "grad_norm": 0.7119680047035217, "learning_rate": 8.742579335671841e-05, "loss": 0.8999, "step": 72270 }, { "epoch": 0.46177631831133487, "grad_norm": 0.8352906107902527, "learning_rate": 8.74224658566419e-05, "loss": 0.7239, "step": 72280 }, { "epoch": 0.46184020546107357, "grad_norm": 1.3281790018081665, "learning_rate": 8.741913797968879e-05, "loss": 0.8587, "step": 72290 }, { "epoch": 0.4619040926108123, "grad_norm": 1.4477615356445312, "learning_rate": 8.741580972589258e-05, "loss": 1.1694, "step": 72300 }, { "epoch": 0.461967979760551, "grad_norm": 0.925883948802948, "learning_rate": 8.741248109528679e-05, "loss": 0.8509, "step": 72310 }, { "epoch": 0.4620318669102897, "grad_norm": 1.4060012102127075, "learning_rate": 8.740915208790496e-05, "loss": 0.968, "step": 72320 }, { "epoch": 0.46209575406002834, "grad_norm": 0.7903311848640442, "learning_rate": 8.740582270378061e-05, "loss": 0.8068, "step": 72330 }, { "epoch": 0.46215964120976705, "grad_norm": 0.8597942590713501, "learning_rate": 8.740249294294727e-05, "loss": 0.661, "step": 72340 }, { "epoch": 0.46222352835950575, "grad_norm": 1.3927608728408813, "learning_rate": 8.739916280543845e-05, "loss": 0.9103, "step": 72350 }, { "epoch": 0.46228741550924446, "grad_norm": 1.1652497053146362, "learning_rate": 8.739583229128771e-05, "loss": 0.726, "step": 72360 }, { "epoch": 0.46235130265898317, "grad_norm": 1.1774413585662842, "learning_rate": 8.739250140052859e-05, "loss": 0.8478, "step": 72370 }, { "epoch": 0.46241518980872187, "grad_norm": 1.1708256006240845, "learning_rate": 8.738917013319463e-05, "loss": 0.8796, "step": 72380 }, { "epoch": 0.4624790769584606, "grad_norm": 0.6467223763465881, "learning_rate": 8.738583848931938e-05, "loss": 0.9981, "step": 72390 }, { "epoch": 0.4625429641081993, "grad_norm": 0.9683516621589661, "learning_rate": 8.73825064689364e-05, "loss": 0.9423, "step": 72400 }, { "epoch": 0.462606851257938, "grad_norm": 1.3207199573516846, "learning_rate": 8.737917407207922e-05, "loss": 0.8087, "step": 72410 }, { "epoch": 0.4626707384076767, "grad_norm": 0.6273083090782166, "learning_rate": 8.737584129878145e-05, "loss": 0.8718, "step": 72420 }, { "epoch": 0.4627346255574154, "grad_norm": 0.6665430068969727, "learning_rate": 8.73725081490766e-05, "loss": 0.7988, "step": 72430 }, { "epoch": 0.4627985127071541, "grad_norm": 0.5622584819793701, "learning_rate": 8.736917462299827e-05, "loss": 0.7487, "step": 72440 }, { "epoch": 0.46286239985689276, "grad_norm": 2.063795566558838, "learning_rate": 8.736584072058003e-05, "loss": 0.828, "step": 72450 }, { "epoch": 0.46292628700663147, "grad_norm": 0.960210919380188, "learning_rate": 8.736250644185545e-05, "loss": 0.9685, "step": 72460 }, { "epoch": 0.46299017415637017, "grad_norm": 0.7123231291770935, "learning_rate": 8.735917178685807e-05, "loss": 0.8832, "step": 72470 }, { "epoch": 0.4630540613061089, "grad_norm": 1.0152729749679565, "learning_rate": 8.735583675562154e-05, "loss": 0.8156, "step": 72480 }, { "epoch": 0.4631179484558476, "grad_norm": 0.9183365106582642, "learning_rate": 8.735250134817942e-05, "loss": 0.8343, "step": 72490 }, { "epoch": 0.4631818356055863, "grad_norm": 0.9543598890304565, "learning_rate": 8.734916556456528e-05, "loss": 0.7168, "step": 72500 }, { "epoch": 0.463245722755325, "grad_norm": 0.6270715594291687, "learning_rate": 8.734582940481275e-05, "loss": 0.9325, "step": 72510 }, { "epoch": 0.4633096099050637, "grad_norm": 1.1470279693603516, "learning_rate": 8.73424928689554e-05, "loss": 0.898, "step": 72520 }, { "epoch": 0.4633734970548024, "grad_norm": 1.4141347408294678, "learning_rate": 8.733915595702685e-05, "loss": 1.1052, "step": 72530 }, { "epoch": 0.4634373842045411, "grad_norm": 0.9898306727409363, "learning_rate": 8.733581866906066e-05, "loss": 1.1064, "step": 72540 }, { "epoch": 0.4635012713542798, "grad_norm": 1.889242172241211, "learning_rate": 8.733248100509052e-05, "loss": 0.7938, "step": 72550 }, { "epoch": 0.4635651585040185, "grad_norm": 1.8361594676971436, "learning_rate": 8.732914296514998e-05, "loss": 0.8884, "step": 72560 }, { "epoch": 0.4636290456537572, "grad_norm": 1.3661187887191772, "learning_rate": 8.732580454927267e-05, "loss": 0.7056, "step": 72570 }, { "epoch": 0.4636929328034959, "grad_norm": 0.8329597115516663, "learning_rate": 8.732246575749223e-05, "loss": 0.6071, "step": 72580 }, { "epoch": 0.4637568199532346, "grad_norm": 0.803459107875824, "learning_rate": 8.731912658984227e-05, "loss": 0.8693, "step": 72590 }, { "epoch": 0.4638207071029733, "grad_norm": 0.7911583185195923, "learning_rate": 8.731578704635642e-05, "loss": 0.7637, "step": 72600 }, { "epoch": 0.463884594252712, "grad_norm": 0.9523131847381592, "learning_rate": 8.73124471270683e-05, "loss": 0.9817, "step": 72610 }, { "epoch": 0.4639484814024507, "grad_norm": 0.9202901124954224, "learning_rate": 8.730910683201157e-05, "loss": 0.7682, "step": 72620 }, { "epoch": 0.4640123685521894, "grad_norm": 0.8219014406204224, "learning_rate": 8.730576616121984e-05, "loss": 0.8304, "step": 72630 }, { "epoch": 0.4640762557019281, "grad_norm": 0.653312087059021, "learning_rate": 8.73024251147268e-05, "loss": 0.6409, "step": 72640 }, { "epoch": 0.4641401428516668, "grad_norm": 0.7120294570922852, "learning_rate": 8.729908369256603e-05, "loss": 0.7654, "step": 72650 }, { "epoch": 0.46420403000140553, "grad_norm": 0.9074612259864807, "learning_rate": 8.729574189477124e-05, "loss": 0.9235, "step": 72660 }, { "epoch": 0.46426791715114424, "grad_norm": 1.1278350353240967, "learning_rate": 8.729239972137608e-05, "loss": 0.8423, "step": 72670 }, { "epoch": 0.46433180430088294, "grad_norm": 0.8770177960395813, "learning_rate": 8.728905717241417e-05, "loss": 0.7709, "step": 72680 }, { "epoch": 0.4643956914506216, "grad_norm": 0.8250672817230225, "learning_rate": 8.728571424791921e-05, "loss": 0.8168, "step": 72690 }, { "epoch": 0.4644595786003603, "grad_norm": 1.0635161399841309, "learning_rate": 8.728237094792482e-05, "loss": 0.9243, "step": 72700 }, { "epoch": 0.464523465750099, "grad_norm": 1.3261529207229614, "learning_rate": 8.727902727246473e-05, "loss": 0.9011, "step": 72710 }, { "epoch": 0.4645873528998377, "grad_norm": 0.8366795182228088, "learning_rate": 8.727568322157259e-05, "loss": 1.0456, "step": 72720 }, { "epoch": 0.4646512400495764, "grad_norm": 1.3640772104263306, "learning_rate": 8.727233879528204e-05, "loss": 0.8938, "step": 72730 }, { "epoch": 0.4647151271993151, "grad_norm": 0.7236993312835693, "learning_rate": 8.72689939936268e-05, "loss": 0.8754, "step": 72740 }, { "epoch": 0.46477901434905383, "grad_norm": 0.9828342199325562, "learning_rate": 8.726564881664056e-05, "loss": 0.8948, "step": 72750 }, { "epoch": 0.46484290149879254, "grad_norm": 0.7750747799873352, "learning_rate": 8.7262303264357e-05, "loss": 0.9002, "step": 72760 }, { "epoch": 0.46490678864853124, "grad_norm": 1.2255038022994995, "learning_rate": 8.725895733680983e-05, "loss": 0.933, "step": 72770 }, { "epoch": 0.46497067579826995, "grad_norm": 3.7937097549438477, "learning_rate": 8.725561103403267e-05, "loss": 0.8682, "step": 72780 }, { "epoch": 0.46503456294800866, "grad_norm": 0.7408625483512878, "learning_rate": 8.725226435605934e-05, "loss": 0.644, "step": 72790 }, { "epoch": 0.46509845009774736, "grad_norm": 0.8894087672233582, "learning_rate": 8.724891730292344e-05, "loss": 0.826, "step": 72800 }, { "epoch": 0.465162337247486, "grad_norm": 1.360103964805603, "learning_rate": 8.724556987465872e-05, "loss": 0.9597, "step": 72810 }, { "epoch": 0.4652262243972247, "grad_norm": 0.8977581858634949, "learning_rate": 8.724222207129889e-05, "loss": 0.7513, "step": 72820 }, { "epoch": 0.4652901115469634, "grad_norm": 0.8301047086715698, "learning_rate": 8.723887389287768e-05, "loss": 0.7628, "step": 72830 }, { "epoch": 0.46535399869670213, "grad_norm": 1.7645938396453857, "learning_rate": 8.723552533942878e-05, "loss": 0.8691, "step": 72840 }, { "epoch": 0.46541788584644084, "grad_norm": 0.6487802267074585, "learning_rate": 8.723217641098594e-05, "loss": 0.7312, "step": 72850 }, { "epoch": 0.46548177299617954, "grad_norm": 0.7930013537406921, "learning_rate": 8.722882710758286e-05, "loss": 0.9037, "step": 72860 }, { "epoch": 0.46554566014591825, "grad_norm": 0.749622106552124, "learning_rate": 8.722547742925328e-05, "loss": 1.0156, "step": 72870 }, { "epoch": 0.46560954729565696, "grad_norm": 0.8207896947860718, "learning_rate": 8.722212737603095e-05, "loss": 0.7249, "step": 72880 }, { "epoch": 0.46567343444539566, "grad_norm": 2.454975128173828, "learning_rate": 8.721877694794958e-05, "loss": 0.823, "step": 72890 }, { "epoch": 0.46573732159513437, "grad_norm": 0.46863147616386414, "learning_rate": 8.721542614504294e-05, "loss": 0.9426, "step": 72900 }, { "epoch": 0.4658012087448731, "grad_norm": 1.0021847486495972, "learning_rate": 8.721207496734476e-05, "loss": 1.1263, "step": 72910 }, { "epoch": 0.4658650958946118, "grad_norm": 0.6298357844352722, "learning_rate": 8.720872341488879e-05, "loss": 0.8613, "step": 72920 }, { "epoch": 0.4659289830443505, "grad_norm": 1.0806231498718262, "learning_rate": 8.72053714877088e-05, "loss": 0.7853, "step": 72930 }, { "epoch": 0.46599287019408914, "grad_norm": 0.9028376936912537, "learning_rate": 8.720201918583853e-05, "loss": 0.8704, "step": 72940 }, { "epoch": 0.46605675734382784, "grad_norm": 1.1052665710449219, "learning_rate": 8.719866650931172e-05, "loss": 0.7413, "step": 72950 }, { "epoch": 0.46612064449356655, "grad_norm": 0.8171069025993347, "learning_rate": 8.719531345816216e-05, "loss": 0.9989, "step": 72960 }, { "epoch": 0.46618453164330526, "grad_norm": 1.0400487184524536, "learning_rate": 8.719196003242362e-05, "loss": 0.8036, "step": 72970 }, { "epoch": 0.46624841879304396, "grad_norm": 0.5930902361869812, "learning_rate": 8.718860623212988e-05, "loss": 0.9512, "step": 72980 }, { "epoch": 0.46631230594278267, "grad_norm": 0.9061450958251953, "learning_rate": 8.718525205731469e-05, "loss": 0.909, "step": 72990 }, { "epoch": 0.4663761930925214, "grad_norm": 0.9812560081481934, "learning_rate": 8.718189750801184e-05, "loss": 0.9485, "step": 73000 }, { "epoch": 0.4664400802422601, "grad_norm": 1.0964970588684082, "learning_rate": 8.717854258425512e-05, "loss": 0.6829, "step": 73010 }, { "epoch": 0.4665039673919988, "grad_norm": 0.513983964920044, "learning_rate": 8.717518728607832e-05, "loss": 0.7848, "step": 73020 }, { "epoch": 0.4665678545417375, "grad_norm": 0.647631824016571, "learning_rate": 8.71718316135152e-05, "loss": 1.0222, "step": 73030 }, { "epoch": 0.4666317416914762, "grad_norm": 0.7781062126159668, "learning_rate": 8.716847556659961e-05, "loss": 0.767, "step": 73040 }, { "epoch": 0.4666956288412149, "grad_norm": 0.9073989987373352, "learning_rate": 8.71651191453653e-05, "loss": 0.7717, "step": 73050 }, { "epoch": 0.46675951599095356, "grad_norm": 1.8708226680755615, "learning_rate": 8.71617623498461e-05, "loss": 0.902, "step": 73060 }, { "epoch": 0.46682340314069226, "grad_norm": 0.9808720350265503, "learning_rate": 8.715840518007578e-05, "loss": 0.9002, "step": 73070 }, { "epoch": 0.46688729029043097, "grad_norm": 0.9100602865219116, "learning_rate": 8.715504763608818e-05, "loss": 1.0678, "step": 73080 }, { "epoch": 0.4669511774401697, "grad_norm": 0.6987651586532593, "learning_rate": 8.71516897179171e-05, "loss": 1.1864, "step": 73090 }, { "epoch": 0.4670150645899084, "grad_norm": 0.9975560903549194, "learning_rate": 8.714833142559637e-05, "loss": 1.4101, "step": 73100 }, { "epoch": 0.4670789517396471, "grad_norm": 1.2323815822601318, "learning_rate": 8.714497275915982e-05, "loss": 0.81, "step": 73110 }, { "epoch": 0.4671428388893858, "grad_norm": 1.1183509826660156, "learning_rate": 8.714161371864124e-05, "loss": 0.8636, "step": 73120 }, { "epoch": 0.4672067260391245, "grad_norm": 0.9153540730476379, "learning_rate": 8.71382543040745e-05, "loss": 0.9229, "step": 73130 }, { "epoch": 0.4672706131888632, "grad_norm": 0.6757118701934814, "learning_rate": 8.71348945154934e-05, "loss": 0.8663, "step": 73140 }, { "epoch": 0.4673345003386019, "grad_norm": 0.9965721964836121, "learning_rate": 8.713153435293178e-05, "loss": 0.9432, "step": 73150 }, { "epoch": 0.4673983874883406, "grad_norm": 3.0663352012634277, "learning_rate": 8.712817381642348e-05, "loss": 0.9173, "step": 73160 }, { "epoch": 0.4674622746380793, "grad_norm": 0.8566670417785645, "learning_rate": 8.712481290600235e-05, "loss": 0.8049, "step": 73170 }, { "epoch": 0.467526161787818, "grad_norm": 0.6621735095977783, "learning_rate": 8.712145162170224e-05, "loss": 1.0432, "step": 73180 }, { "epoch": 0.4675900489375567, "grad_norm": 0.7513931393623352, "learning_rate": 8.7118089963557e-05, "loss": 0.999, "step": 73190 }, { "epoch": 0.4676539360872954, "grad_norm": 0.6250850558280945, "learning_rate": 8.711472793160049e-05, "loss": 1.0574, "step": 73200 }, { "epoch": 0.4677178232370341, "grad_norm": 1.0595519542694092, "learning_rate": 8.711136552586655e-05, "loss": 0.8253, "step": 73210 }, { "epoch": 0.4677817103867728, "grad_norm": 0.5927673578262329, "learning_rate": 8.71083390411543e-05, "loss": 0.9964, "step": 73220 }, { "epoch": 0.4678455975365115, "grad_norm": 0.9299998879432678, "learning_rate": 8.710497592533657e-05, "loss": 0.8753, "step": 73230 }, { "epoch": 0.4679094846862502, "grad_norm": 0.6862097978591919, "learning_rate": 8.710161243583962e-05, "loss": 0.7737, "step": 73240 }, { "epoch": 0.4679733718359889, "grad_norm": 0.6349765062332153, "learning_rate": 8.709824857269732e-05, "loss": 0.7059, "step": 73250 }, { "epoch": 0.4680372589857276, "grad_norm": 0.7105598449707031, "learning_rate": 8.709488433594359e-05, "loss": 0.9881, "step": 73260 }, { "epoch": 0.46810114613546633, "grad_norm": 0.836338996887207, "learning_rate": 8.709151972561228e-05, "loss": 0.8385, "step": 73270 }, { "epoch": 0.46816503328520503, "grad_norm": 0.8248547911643982, "learning_rate": 8.708815474173728e-05, "loss": 0.8845, "step": 73280 }, { "epoch": 0.46822892043494374, "grad_norm": 3.468738079071045, "learning_rate": 8.708478938435246e-05, "loss": 1.0441, "step": 73290 }, { "epoch": 0.4682928075846824, "grad_norm": 0.9611918330192566, "learning_rate": 8.708142365349173e-05, "loss": 1.1622, "step": 73300 }, { "epoch": 0.4683566947344211, "grad_norm": 1.594110369682312, "learning_rate": 8.7078057549189e-05, "loss": 0.7014, "step": 73310 }, { "epoch": 0.4684205818841598, "grad_norm": 0.8596274852752686, "learning_rate": 8.707469107147815e-05, "loss": 0.9094, "step": 73320 }, { "epoch": 0.4684844690338985, "grad_norm": 0.673202395439148, "learning_rate": 8.707132422039305e-05, "loss": 1.0132, "step": 73330 }, { "epoch": 0.4685483561836372, "grad_norm": 0.6166740655899048, "learning_rate": 8.706795699596769e-05, "loss": 0.8095, "step": 73340 }, { "epoch": 0.4686122433333759, "grad_norm": 0.7982991337776184, "learning_rate": 8.706458939823592e-05, "loss": 0.8268, "step": 73350 }, { "epoch": 0.46867613048311463, "grad_norm": 2.1832122802734375, "learning_rate": 8.706122142723167e-05, "loss": 0.9482, "step": 73360 }, { "epoch": 0.46874001763285333, "grad_norm": 0.5912348031997681, "learning_rate": 8.705785308298886e-05, "loss": 0.9366, "step": 73370 }, { "epoch": 0.46880390478259204, "grad_norm": 0.9966716766357422, "learning_rate": 8.705448436554139e-05, "loss": 1.1344, "step": 73380 }, { "epoch": 0.46886779193233075, "grad_norm": 0.6441813111305237, "learning_rate": 8.705111527492322e-05, "loss": 1.0889, "step": 73390 }, { "epoch": 0.46893167908206945, "grad_norm": 0.8401795029640198, "learning_rate": 8.704774581116827e-05, "loss": 1.0559, "step": 73400 }, { "epoch": 0.46899556623180816, "grad_norm": 0.7190397381782532, "learning_rate": 8.704437597431047e-05, "loss": 0.9705, "step": 73410 }, { "epoch": 0.4690594533815468, "grad_norm": 0.8827881217002869, "learning_rate": 8.704100576438374e-05, "loss": 0.6985, "step": 73420 }, { "epoch": 0.4691233405312855, "grad_norm": 1.544293999671936, "learning_rate": 8.703763518142205e-05, "loss": 0.9164, "step": 73430 }, { "epoch": 0.4691872276810242, "grad_norm": 1.1441346406936646, "learning_rate": 8.703426422545934e-05, "loss": 1.31, "step": 73440 }, { "epoch": 0.4692511148307629, "grad_norm": 0.8129305243492126, "learning_rate": 8.703089289652954e-05, "loss": 0.8406, "step": 73450 }, { "epoch": 0.46931500198050163, "grad_norm": 0.8427706956863403, "learning_rate": 8.70275211946666e-05, "loss": 0.873, "step": 73460 }, { "epoch": 0.46937888913024034, "grad_norm": 1.3682218790054321, "learning_rate": 8.70241491199045e-05, "loss": 0.7712, "step": 73470 }, { "epoch": 0.46944277627997905, "grad_norm": 0.7666106820106506, "learning_rate": 8.70207766722772e-05, "loss": 0.8279, "step": 73480 }, { "epoch": 0.46950666342971775, "grad_norm": 0.8763406276702881, "learning_rate": 8.701740385181863e-05, "loss": 0.7302, "step": 73490 }, { "epoch": 0.46957055057945646, "grad_norm": 0.8393523693084717, "learning_rate": 8.70140306585628e-05, "loss": 0.6908, "step": 73500 }, { "epoch": 0.46963443772919516, "grad_norm": 0.8529371619224548, "learning_rate": 8.701065709254363e-05, "loss": 0.7582, "step": 73510 }, { "epoch": 0.46969832487893387, "grad_norm": 0.9004521369934082, "learning_rate": 8.700728315379515e-05, "loss": 0.8145, "step": 73520 }, { "epoch": 0.4697622120286726, "grad_norm": 1.3330974578857422, "learning_rate": 8.70039088423513e-05, "loss": 0.7189, "step": 73530 }, { "epoch": 0.4698260991784112, "grad_norm": 0.6903400421142578, "learning_rate": 8.700053415824608e-05, "loss": 1.0698, "step": 73540 }, { "epoch": 0.46988998632814993, "grad_norm": 0.8569963574409485, "learning_rate": 8.699715910151347e-05, "loss": 0.8589, "step": 73550 }, { "epoch": 0.46995387347788864, "grad_norm": 1.7143669128417969, "learning_rate": 8.699378367218747e-05, "loss": 0.9826, "step": 73560 }, { "epoch": 0.47001776062762735, "grad_norm": 0.8588539361953735, "learning_rate": 8.699040787030205e-05, "loss": 0.7637, "step": 73570 }, { "epoch": 0.47008164777736605, "grad_norm": 0.7576454877853394, "learning_rate": 8.698703169589122e-05, "loss": 0.8103, "step": 73580 }, { "epoch": 0.47014553492710476, "grad_norm": 1.1570996046066284, "learning_rate": 8.698365514898899e-05, "loss": 0.9776, "step": 73590 }, { "epoch": 0.47020942207684346, "grad_norm": 0.5057058334350586, "learning_rate": 8.698027822962937e-05, "loss": 0.9414, "step": 73600 }, { "epoch": 0.47027330922658217, "grad_norm": 0.6375735402107239, "learning_rate": 8.697690093784634e-05, "loss": 0.7779, "step": 73610 }, { "epoch": 0.4703371963763209, "grad_norm": 0.9835091829299927, "learning_rate": 8.697352327367391e-05, "loss": 0.8034, "step": 73620 }, { "epoch": 0.4704010835260596, "grad_norm": 0.5068366527557373, "learning_rate": 8.697014523714615e-05, "loss": 0.7921, "step": 73630 }, { "epoch": 0.4704649706757983, "grad_norm": 0.646186351776123, "learning_rate": 8.696676682829704e-05, "loss": 0.7929, "step": 73640 }, { "epoch": 0.470528857825537, "grad_norm": 0.6051701307296753, "learning_rate": 8.696338804716058e-05, "loss": 0.7974, "step": 73650 }, { "epoch": 0.4705927449752757, "grad_norm": 0.7829045653343201, "learning_rate": 8.696000889377085e-05, "loss": 0.8989, "step": 73660 }, { "epoch": 0.47065663212501435, "grad_norm": 0.8022125959396362, "learning_rate": 8.695662936816185e-05, "loss": 0.9278, "step": 73670 }, { "epoch": 0.47072051927475306, "grad_norm": 1.3843055963516235, "learning_rate": 8.69532494703676e-05, "loss": 0.7795, "step": 73680 }, { "epoch": 0.47078440642449176, "grad_norm": 0.8143162727355957, "learning_rate": 8.694986920042218e-05, "loss": 0.8787, "step": 73690 }, { "epoch": 0.47084829357423047, "grad_norm": 0.9179696440696716, "learning_rate": 8.694648855835961e-05, "loss": 0.8331, "step": 73700 }, { "epoch": 0.4709121807239692, "grad_norm": 0.7905839085578918, "learning_rate": 8.694310754421393e-05, "loss": 0.8672, "step": 73710 }, { "epoch": 0.4709760678737079, "grad_norm": 0.642015278339386, "learning_rate": 8.69397261580192e-05, "loss": 0.6751, "step": 73720 }, { "epoch": 0.4710399550234466, "grad_norm": 0.7508492469787598, "learning_rate": 8.693634439980946e-05, "loss": 1.0497, "step": 73730 }, { "epoch": 0.4711038421731853, "grad_norm": 0.7233025431632996, "learning_rate": 8.693296226961879e-05, "loss": 0.9594, "step": 73740 }, { "epoch": 0.471167729322924, "grad_norm": 0.9499550461769104, "learning_rate": 8.692957976748124e-05, "loss": 1.0151, "step": 73750 }, { "epoch": 0.4712316164726627, "grad_norm": 2.5865375995635986, "learning_rate": 8.692619689343087e-05, "loss": 0.8826, "step": 73760 }, { "epoch": 0.4712955036224014, "grad_norm": 1.2711101770401, "learning_rate": 8.692281364750174e-05, "loss": 1.1665, "step": 73770 }, { "epoch": 0.4713593907721401, "grad_norm": 2.8341193199157715, "learning_rate": 8.691943002972794e-05, "loss": 0.7414, "step": 73780 }, { "epoch": 0.47142327792187877, "grad_norm": 1.014237880706787, "learning_rate": 8.691604604014355e-05, "loss": 1.0644, "step": 73790 }, { "epoch": 0.4714871650716175, "grad_norm": 0.9451431632041931, "learning_rate": 8.691266167878263e-05, "loss": 0.9587, "step": 73800 }, { "epoch": 0.4715510522213562, "grad_norm": 0.7285395264625549, "learning_rate": 8.690927694567927e-05, "loss": 0.8145, "step": 73810 }, { "epoch": 0.4716149393710949, "grad_norm": 0.9039714932441711, "learning_rate": 8.690589184086758e-05, "loss": 0.9801, "step": 73820 }, { "epoch": 0.4716788265208336, "grad_norm": 1.3245606422424316, "learning_rate": 8.690250636438161e-05, "loss": 0.8307, "step": 73830 }, { "epoch": 0.4717427136705723, "grad_norm": 0.8605784773826599, "learning_rate": 8.689912051625549e-05, "loss": 0.802, "step": 73840 }, { "epoch": 0.471806600820311, "grad_norm": 0.8803051710128784, "learning_rate": 8.689573429652329e-05, "loss": 0.9706, "step": 73850 }, { "epoch": 0.4718704879700497, "grad_norm": 1.14476478099823, "learning_rate": 8.689234770521913e-05, "loss": 1.1188, "step": 73860 }, { "epoch": 0.4719343751197884, "grad_norm": 0.6339378356933594, "learning_rate": 8.688896074237712e-05, "loss": 1.0567, "step": 73870 }, { "epoch": 0.4719982622695271, "grad_norm": 1.278977394104004, "learning_rate": 8.688557340803135e-05, "loss": 1.0485, "step": 73880 }, { "epoch": 0.47206214941926583, "grad_norm": 0.6915751695632935, "learning_rate": 8.688218570221596e-05, "loss": 1.0948, "step": 73890 }, { "epoch": 0.47212603656900454, "grad_norm": 0.6988540887832642, "learning_rate": 8.687879762496504e-05, "loss": 1.1413, "step": 73900 }, { "epoch": 0.4721899237187432, "grad_norm": 0.9477376341819763, "learning_rate": 8.687540917631273e-05, "loss": 1.0465, "step": 73910 }, { "epoch": 0.4722538108684819, "grad_norm": 0.8210738301277161, "learning_rate": 8.687202035629314e-05, "loss": 0.9296, "step": 73920 }, { "epoch": 0.4723176980182206, "grad_norm": 1.0816015005111694, "learning_rate": 8.686863116494042e-05, "loss": 1.1211, "step": 73930 }, { "epoch": 0.4723815851679593, "grad_norm": 0.5747155547142029, "learning_rate": 8.686524160228867e-05, "loss": 0.9563, "step": 73940 }, { "epoch": 0.472445472317698, "grad_norm": 0.9445788860321045, "learning_rate": 8.686185166837206e-05, "loss": 1.0368, "step": 73950 }, { "epoch": 0.4725093594674367, "grad_norm": 0.9299923181533813, "learning_rate": 8.685846136322471e-05, "loss": 0.8771, "step": 73960 }, { "epoch": 0.4725732466171754, "grad_norm": 0.8922392725944519, "learning_rate": 8.685507068688075e-05, "loss": 0.7732, "step": 73970 }, { "epoch": 0.47263713376691413, "grad_norm": 1.0317169427871704, "learning_rate": 8.685167963937437e-05, "loss": 0.7212, "step": 73980 }, { "epoch": 0.47270102091665284, "grad_norm": 0.6838691234588623, "learning_rate": 8.684828822073967e-05, "loss": 0.9596, "step": 73990 }, { "epoch": 0.47276490806639154, "grad_norm": 0.93050616979599, "learning_rate": 8.684489643101085e-05, "loss": 0.9891, "step": 74000 }, { "epoch": 0.47282879521613025, "grad_norm": 0.7852534651756287, "learning_rate": 8.684150427022205e-05, "loss": 0.8776, "step": 74010 }, { "epoch": 0.47289268236586895, "grad_norm": 1.1245160102844238, "learning_rate": 8.683811173840741e-05, "loss": 1.0402, "step": 74020 }, { "epoch": 0.4729565695156076, "grad_norm": 1.0451771020889282, "learning_rate": 8.683471883560113e-05, "loss": 0.8141, "step": 74030 }, { "epoch": 0.4730204566653463, "grad_norm": 0.8366501331329346, "learning_rate": 8.683132556183735e-05, "loss": 0.9944, "step": 74040 }, { "epoch": 0.473084343815085, "grad_norm": 1.0142920017242432, "learning_rate": 8.682793191715027e-05, "loss": 0.8935, "step": 74050 }, { "epoch": 0.4731482309648237, "grad_norm": 0.7233960032463074, "learning_rate": 8.682453790157405e-05, "loss": 0.9414, "step": 74060 }, { "epoch": 0.47321211811456243, "grad_norm": 0.8871427178382874, "learning_rate": 8.682114351514287e-05, "loss": 0.8502, "step": 74070 }, { "epoch": 0.47327600526430114, "grad_norm": 1.854498028755188, "learning_rate": 8.681774875789095e-05, "loss": 1.076, "step": 74080 }, { "epoch": 0.47333989241403984, "grad_norm": 0.7415865063667297, "learning_rate": 8.681435362985242e-05, "loss": 0.8037, "step": 74090 }, { "epoch": 0.47340377956377855, "grad_norm": 0.7761117815971375, "learning_rate": 8.68109581310615e-05, "loss": 0.7338, "step": 74100 }, { "epoch": 0.47346766671351725, "grad_norm": 1.1805341243743896, "learning_rate": 8.68075622615524e-05, "loss": 1.1164, "step": 74110 }, { "epoch": 0.47353155386325596, "grad_norm": 0.7617985606193542, "learning_rate": 8.680416602135929e-05, "loss": 1.064, "step": 74120 }, { "epoch": 0.47359544101299467, "grad_norm": 0.5913506746292114, "learning_rate": 8.68007694105164e-05, "loss": 1.0071, "step": 74130 }, { "epoch": 0.4736593281627334, "grad_norm": 1.0599095821380615, "learning_rate": 8.679737242905792e-05, "loss": 0.8347, "step": 74140 }, { "epoch": 0.473723215312472, "grad_norm": 1.3343327045440674, "learning_rate": 8.679397507701806e-05, "loss": 1.1742, "step": 74150 }, { "epoch": 0.47378710246221073, "grad_norm": 0.5925554037094116, "learning_rate": 8.679057735443104e-05, "loss": 0.9706, "step": 74160 }, { "epoch": 0.47385098961194944, "grad_norm": 0.764336109161377, "learning_rate": 8.678717926133109e-05, "loss": 1.2481, "step": 74170 }, { "epoch": 0.47391487676168814, "grad_norm": 0.8975499868392944, "learning_rate": 8.678378079775241e-05, "loss": 0.9182, "step": 74180 }, { "epoch": 0.47397876391142685, "grad_norm": 0.7242470383644104, "learning_rate": 8.678038196372925e-05, "loss": 0.7105, "step": 74190 }, { "epoch": 0.47404265106116555, "grad_norm": 0.8966102004051208, "learning_rate": 8.67769827592958e-05, "loss": 0.9065, "step": 74200 }, { "epoch": 0.47410653821090426, "grad_norm": 0.5318197011947632, "learning_rate": 8.677358318448633e-05, "loss": 0.8434, "step": 74210 }, { "epoch": 0.47417042536064297, "grad_norm": 1.834756851196289, "learning_rate": 8.677018323933505e-05, "loss": 1.1204, "step": 74220 }, { "epoch": 0.4742343125103817, "grad_norm": 1.0717896223068237, "learning_rate": 8.676678292387623e-05, "loss": 0.8128, "step": 74230 }, { "epoch": 0.4742981996601204, "grad_norm": 0.7135387063026428, "learning_rate": 8.67633822381441e-05, "loss": 0.8956, "step": 74240 }, { "epoch": 0.4743620868098591, "grad_norm": 1.2037732601165771, "learning_rate": 8.675998118217289e-05, "loss": 0.833, "step": 74250 }, { "epoch": 0.4744259739595978, "grad_norm": 1.4196306467056274, "learning_rate": 8.675657975599688e-05, "loss": 1.05, "step": 74260 }, { "epoch": 0.47448986110933644, "grad_norm": 0.9658291339874268, "learning_rate": 8.675317795965031e-05, "loss": 0.6996, "step": 74270 }, { "epoch": 0.47455374825907515, "grad_norm": 1.4640781879425049, "learning_rate": 8.674977579316745e-05, "loss": 0.704, "step": 74280 }, { "epoch": 0.47461763540881385, "grad_norm": 0.7206962704658508, "learning_rate": 8.674637325658254e-05, "loss": 0.6542, "step": 74290 }, { "epoch": 0.47468152255855256, "grad_norm": 0.9174501895904541, "learning_rate": 8.674297034992986e-05, "loss": 0.904, "step": 74300 }, { "epoch": 0.47474540970829127, "grad_norm": 0.7845925092697144, "learning_rate": 8.673956707324369e-05, "loss": 0.9329, "step": 74310 }, { "epoch": 0.47480929685802997, "grad_norm": 1.1023668050765991, "learning_rate": 8.67361634265583e-05, "loss": 0.9712, "step": 74320 }, { "epoch": 0.4748731840077687, "grad_norm": 1.1444423198699951, "learning_rate": 8.673275940990796e-05, "loss": 0.8582, "step": 74330 }, { "epoch": 0.4749370711575074, "grad_norm": 0.9823821783065796, "learning_rate": 8.672935502332696e-05, "loss": 0.9597, "step": 74340 }, { "epoch": 0.4750009583072461, "grad_norm": 1.3823814392089844, "learning_rate": 8.672595026684955e-05, "loss": 0.8345, "step": 74350 }, { "epoch": 0.4750648454569848, "grad_norm": 1.1680278778076172, "learning_rate": 8.672254514051009e-05, "loss": 1.0284, "step": 74360 }, { "epoch": 0.4751287326067235, "grad_norm": 0.6968647241592407, "learning_rate": 8.67191396443428e-05, "loss": 0.7296, "step": 74370 }, { "epoch": 0.4751926197564622, "grad_norm": 1.0131860971450806, "learning_rate": 8.671573377838202e-05, "loss": 0.9332, "step": 74380 }, { "epoch": 0.47525650690620086, "grad_norm": 1.0270569324493408, "learning_rate": 8.671232754266203e-05, "loss": 0.7498, "step": 74390 }, { "epoch": 0.47532039405593957, "grad_norm": 0.8761411905288696, "learning_rate": 8.670892093721715e-05, "loss": 0.729, "step": 74400 }, { "epoch": 0.47538428120567827, "grad_norm": 0.9730551838874817, "learning_rate": 8.670551396208168e-05, "loss": 0.8988, "step": 74410 }, { "epoch": 0.475448168355417, "grad_norm": 0.7609050869941711, "learning_rate": 8.670210661728992e-05, "loss": 0.965, "step": 74420 }, { "epoch": 0.4755120555051557, "grad_norm": 0.9374824166297913, "learning_rate": 8.669869890287621e-05, "loss": 0.8523, "step": 74430 }, { "epoch": 0.4755759426548944, "grad_norm": 0.9228322505950928, "learning_rate": 8.669529081887484e-05, "loss": 1.0975, "step": 74440 }, { "epoch": 0.4756398298046331, "grad_norm": 0.8603367209434509, "learning_rate": 8.669188236532013e-05, "loss": 0.8295, "step": 74450 }, { "epoch": 0.4757037169543718, "grad_norm": 0.9186978936195374, "learning_rate": 8.668847354224645e-05, "loss": 0.9944, "step": 74460 }, { "epoch": 0.4757676041041105, "grad_norm": 0.8278791904449463, "learning_rate": 8.668506434968808e-05, "loss": 1.077, "step": 74470 }, { "epoch": 0.4758314912538492, "grad_norm": 1.0060932636260986, "learning_rate": 8.66816547876794e-05, "loss": 1.1607, "step": 74480 }, { "epoch": 0.4758953784035879, "grad_norm": 1.0081162452697754, "learning_rate": 8.667824485625471e-05, "loss": 0.8542, "step": 74490 }, { "epoch": 0.4759592655533266, "grad_norm": 1.362919569015503, "learning_rate": 8.667483455544835e-05, "loss": 1.1144, "step": 74500 }, { "epoch": 0.47602315270306533, "grad_norm": 0.8849195241928101, "learning_rate": 8.667142388529467e-05, "loss": 1.0793, "step": 74510 }, { "epoch": 0.476087039852804, "grad_norm": 1.2261072397232056, "learning_rate": 8.666801284582806e-05, "loss": 0.6733, "step": 74520 }, { "epoch": 0.4761509270025427, "grad_norm": 0.8894041180610657, "learning_rate": 8.666460143708283e-05, "loss": 0.8456, "step": 74530 }, { "epoch": 0.4762148141522814, "grad_norm": 0.7797572016716003, "learning_rate": 8.666118965909334e-05, "loss": 0.7103, "step": 74540 }, { "epoch": 0.4762787013020201, "grad_norm": 0.8391841650009155, "learning_rate": 8.665777751189395e-05, "loss": 0.9627, "step": 74550 }, { "epoch": 0.4763425884517588, "grad_norm": 0.9427254796028137, "learning_rate": 8.665436499551903e-05, "loss": 0.8994, "step": 74560 }, { "epoch": 0.4764064756014975, "grad_norm": 0.9323469996452332, "learning_rate": 8.665095211000293e-05, "loss": 0.9536, "step": 74570 }, { "epoch": 0.4764703627512362, "grad_norm": 0.731502890586853, "learning_rate": 8.664753885538005e-05, "loss": 0.9477, "step": 74580 }, { "epoch": 0.4765342499009749, "grad_norm": 0.7330303192138672, "learning_rate": 8.664412523168474e-05, "loss": 0.7534, "step": 74590 }, { "epoch": 0.47659813705071363, "grad_norm": 1.0151233673095703, "learning_rate": 8.664071123895138e-05, "loss": 1.1555, "step": 74600 }, { "epoch": 0.47666202420045234, "grad_norm": 0.7544573545455933, "learning_rate": 8.663729687721439e-05, "loss": 0.8015, "step": 74610 }, { "epoch": 0.47672591135019105, "grad_norm": 0.5822036862373352, "learning_rate": 8.66338821465081e-05, "loss": 0.7977, "step": 74620 }, { "epoch": 0.47678979849992975, "grad_norm": 1.627901554107666, "learning_rate": 8.663046704686692e-05, "loss": 0.9961, "step": 74630 }, { "epoch": 0.4768536856496684, "grad_norm": 0.9120510220527649, "learning_rate": 8.662705157832527e-05, "loss": 0.9101, "step": 74640 }, { "epoch": 0.4769175727994071, "grad_norm": 1.2490442991256714, "learning_rate": 8.662363574091752e-05, "loss": 0.9137, "step": 74650 }, { "epoch": 0.4769814599491458, "grad_norm": 0.5486982464790344, "learning_rate": 8.662021953467806e-05, "loss": 1.0668, "step": 74660 }, { "epoch": 0.4770453470988845, "grad_norm": 1.0078871250152588, "learning_rate": 8.661680295964131e-05, "loss": 0.8362, "step": 74670 }, { "epoch": 0.4771092342486232, "grad_norm": 1.1747907400131226, "learning_rate": 8.661338601584168e-05, "loss": 0.6677, "step": 74680 }, { "epoch": 0.47717312139836193, "grad_norm": 1.0243124961853027, "learning_rate": 8.660996870331357e-05, "loss": 0.9675, "step": 74690 }, { "epoch": 0.47723700854810064, "grad_norm": 0.8121140599250793, "learning_rate": 8.66065510220914e-05, "loss": 0.7801, "step": 74700 }, { "epoch": 0.47730089569783934, "grad_norm": 1.2878518104553223, "learning_rate": 8.660313297220962e-05, "loss": 0.8109, "step": 74710 }, { "epoch": 0.47736478284757805, "grad_norm": 0.9882553219795227, "learning_rate": 8.65997145537026e-05, "loss": 0.8357, "step": 74720 }, { "epoch": 0.47742866999731676, "grad_norm": 0.9014390110969543, "learning_rate": 8.659629576660479e-05, "loss": 1.0185, "step": 74730 }, { "epoch": 0.47749255714705546, "grad_norm": 0.8646599054336548, "learning_rate": 8.659287661095063e-05, "loss": 0.8571, "step": 74740 }, { "epoch": 0.47755644429679417, "grad_norm": 0.6751865744590759, "learning_rate": 8.658945708677455e-05, "loss": 0.8639, "step": 74750 }, { "epoch": 0.4776203314465328, "grad_norm": 0.6493138074874878, "learning_rate": 8.658603719411098e-05, "loss": 0.9801, "step": 74760 }, { "epoch": 0.4776842185962715, "grad_norm": 0.7330247163772583, "learning_rate": 8.658261693299436e-05, "loss": 0.9075, "step": 74770 }, { "epoch": 0.47774810574601023, "grad_norm": 0.8450262546539307, "learning_rate": 8.657919630345914e-05, "loss": 0.9218, "step": 74780 }, { "epoch": 0.47781199289574894, "grad_norm": 1.399348258972168, "learning_rate": 8.657577530553977e-05, "loss": 0.987, "step": 74790 }, { "epoch": 0.47787588004548764, "grad_norm": 0.6834306716918945, "learning_rate": 8.65723539392707e-05, "loss": 1.0965, "step": 74800 }, { "epoch": 0.47793976719522635, "grad_norm": 0.8273354768753052, "learning_rate": 8.656893220468638e-05, "loss": 0.9514, "step": 74810 }, { "epoch": 0.47800365434496506, "grad_norm": 0.5543147325515747, "learning_rate": 8.656551010182128e-05, "loss": 0.8666, "step": 74820 }, { "epoch": 0.47806754149470376, "grad_norm": 1.8543487787246704, "learning_rate": 8.656208763070986e-05, "loss": 0.8342, "step": 74830 }, { "epoch": 0.47813142864444247, "grad_norm": 0.9043295383453369, "learning_rate": 8.655866479138659e-05, "loss": 1.0627, "step": 74840 }, { "epoch": 0.4781953157941812, "grad_norm": 3.4877755641937256, "learning_rate": 8.655524158388595e-05, "loss": 0.9094, "step": 74850 }, { "epoch": 0.4782592029439199, "grad_norm": 0.6816970109939575, "learning_rate": 8.655181800824237e-05, "loss": 0.6711, "step": 74860 }, { "epoch": 0.4783230900936586, "grad_norm": 1.15105402469635, "learning_rate": 8.654839406449037e-05, "loss": 0.8039, "step": 74870 }, { "epoch": 0.47838697724339724, "grad_norm": 0.43395039439201355, "learning_rate": 8.654496975266445e-05, "loss": 0.8421, "step": 74880 }, { "epoch": 0.47845086439313594, "grad_norm": 0.7869691252708435, "learning_rate": 8.654154507279904e-05, "loss": 0.8841, "step": 74890 }, { "epoch": 0.47851475154287465, "grad_norm": 1.013023853302002, "learning_rate": 8.653812002492867e-05, "loss": 1.1382, "step": 74900 }, { "epoch": 0.47857863869261336, "grad_norm": 0.7388662099838257, "learning_rate": 8.653469460908783e-05, "loss": 0.8478, "step": 74910 }, { "epoch": 0.47864252584235206, "grad_norm": 0.8700296878814697, "learning_rate": 8.6531268825311e-05, "loss": 0.8433, "step": 74920 }, { "epoch": 0.47870641299209077, "grad_norm": 0.7805728912353516, "learning_rate": 8.652784267363268e-05, "loss": 0.8564, "step": 74930 }, { "epoch": 0.4787703001418295, "grad_norm": 1.0190261602401733, "learning_rate": 8.652441615408739e-05, "loss": 0.7729, "step": 74940 }, { "epoch": 0.4788341872915682, "grad_norm": 0.9089486002922058, "learning_rate": 8.652098926670961e-05, "loss": 0.8051, "step": 74950 }, { "epoch": 0.4788980744413069, "grad_norm": 0.8379830121994019, "learning_rate": 8.651756201153391e-05, "loss": 0.7344, "step": 74960 }, { "epoch": 0.4789619615910456, "grad_norm": 0.8890141844749451, "learning_rate": 8.651413438859475e-05, "loss": 1.0695, "step": 74970 }, { "epoch": 0.4790258487407843, "grad_norm": 1.0251997709274292, "learning_rate": 8.651070639792667e-05, "loss": 0.9472, "step": 74980 }, { "epoch": 0.479089735890523, "grad_norm": 0.7137789130210876, "learning_rate": 8.650727803956418e-05, "loss": 0.9155, "step": 74990 }, { "epoch": 0.47915362304026166, "grad_norm": 0.6541804671287537, "learning_rate": 8.650384931354183e-05, "loss": 0.7172, "step": 75000 }, { "epoch": 0.47921751019000036, "grad_norm": 1.1364400386810303, "learning_rate": 8.650042021989415e-05, "loss": 0.7023, "step": 75010 }, { "epoch": 0.47928139733973907, "grad_norm": 1.3749972581863403, "learning_rate": 8.649699075865564e-05, "loss": 0.7755, "step": 75020 }, { "epoch": 0.4793452844894778, "grad_norm": 1.0463199615478516, "learning_rate": 8.649356092986086e-05, "loss": 0.7507, "step": 75030 }, { "epoch": 0.4794091716392165, "grad_norm": 2.42689847946167, "learning_rate": 8.649013073354434e-05, "loss": 0.8019, "step": 75040 }, { "epoch": 0.4794730587889552, "grad_norm": 0.8399762511253357, "learning_rate": 8.648670016974067e-05, "loss": 0.9304, "step": 75050 }, { "epoch": 0.4795369459386939, "grad_norm": 0.804482638835907, "learning_rate": 8.648326923848434e-05, "loss": 0.963, "step": 75060 }, { "epoch": 0.4796008330884326, "grad_norm": 1.2717317342758179, "learning_rate": 8.647983793980993e-05, "loss": 0.7493, "step": 75070 }, { "epoch": 0.4796647202381713, "grad_norm": 1.063368797302246, "learning_rate": 8.647640627375199e-05, "loss": 0.9279, "step": 75080 }, { "epoch": 0.47972860738791, "grad_norm": 0.8830692768096924, "learning_rate": 8.647297424034509e-05, "loss": 0.887, "step": 75090 }, { "epoch": 0.4797924945376487, "grad_norm": 0.9686833620071411, "learning_rate": 8.646954183962378e-05, "loss": 0.8693, "step": 75100 }, { "epoch": 0.4798563816873874, "grad_norm": 0.8640769124031067, "learning_rate": 8.646610907162262e-05, "loss": 0.7796, "step": 75110 }, { "epoch": 0.4799202688371261, "grad_norm": 0.5140219926834106, "learning_rate": 8.646267593637621e-05, "loss": 1.4085, "step": 75120 }, { "epoch": 0.4799841559868648, "grad_norm": 0.6936458945274353, "learning_rate": 8.64592424339191e-05, "loss": 0.9243, "step": 75130 }, { "epoch": 0.4800480431366035, "grad_norm": 0.8202782869338989, "learning_rate": 8.645580856428588e-05, "loss": 1.1241, "step": 75140 }, { "epoch": 0.4801119302863422, "grad_norm": 1.985823631286621, "learning_rate": 8.645237432751113e-05, "loss": 1.0022, "step": 75150 }, { "epoch": 0.4801758174360809, "grad_norm": 1.036049723625183, "learning_rate": 8.644893972362945e-05, "loss": 1.0675, "step": 75160 }, { "epoch": 0.4802397045858196, "grad_norm": 0.7775549292564392, "learning_rate": 8.644550475267538e-05, "loss": 0.8857, "step": 75170 }, { "epoch": 0.4803035917355583, "grad_norm": 0.7424293160438538, "learning_rate": 8.644206941468358e-05, "loss": 1.2084, "step": 75180 }, { "epoch": 0.480367478885297, "grad_norm": 2.01617169380188, "learning_rate": 8.64386337096886e-05, "loss": 1.284, "step": 75190 }, { "epoch": 0.4804313660350357, "grad_norm": 0.6096950173377991, "learning_rate": 8.643519763772506e-05, "loss": 0.7568, "step": 75200 }, { "epoch": 0.48049525318477443, "grad_norm": 0.817476212978363, "learning_rate": 8.643176119882755e-05, "loss": 0.8748, "step": 75210 }, { "epoch": 0.48055914033451314, "grad_norm": 0.9491440057754517, "learning_rate": 8.642832439303067e-05, "loss": 0.8784, "step": 75220 }, { "epoch": 0.48062302748425184, "grad_norm": 0.4414537847042084, "learning_rate": 8.642488722036908e-05, "loss": 1.0155, "step": 75230 }, { "epoch": 0.4806869146339905, "grad_norm": 0.8975993394851685, "learning_rate": 8.642144968087735e-05, "loss": 0.931, "step": 75240 }, { "epoch": 0.4807508017837292, "grad_norm": 1.0731254816055298, "learning_rate": 8.641801177459012e-05, "loss": 1.1996, "step": 75250 }, { "epoch": 0.4808146889334679, "grad_norm": 0.9253545999526978, "learning_rate": 8.641457350154201e-05, "loss": 0.6433, "step": 75260 }, { "epoch": 0.4808785760832066, "grad_norm": 0.6967938542366028, "learning_rate": 8.641113486176764e-05, "loss": 0.7571, "step": 75270 }, { "epoch": 0.4809424632329453, "grad_norm": 0.694025993347168, "learning_rate": 8.640769585530162e-05, "loss": 1.0296, "step": 75280 }, { "epoch": 0.481006350382684, "grad_norm": 0.6931796073913574, "learning_rate": 8.640425648217863e-05, "loss": 1.0492, "step": 75290 }, { "epoch": 0.48107023753242273, "grad_norm": 0.8335185050964355, "learning_rate": 8.640081674243326e-05, "loss": 0.74, "step": 75300 }, { "epoch": 0.48113412468216143, "grad_norm": 0.9621481895446777, "learning_rate": 8.639737663610019e-05, "loss": 0.9957, "step": 75310 }, { "epoch": 0.48119801183190014, "grad_norm": 0.6297350525856018, "learning_rate": 8.639393616321404e-05, "loss": 0.6059, "step": 75320 }, { "epoch": 0.48126189898163885, "grad_norm": 0.8291562795639038, "learning_rate": 8.639049532380948e-05, "loss": 0.7669, "step": 75330 }, { "epoch": 0.48132578613137755, "grad_norm": 0.7181857824325562, "learning_rate": 8.638705411792115e-05, "loss": 0.9866, "step": 75340 }, { "epoch": 0.48138967328111626, "grad_norm": 0.6401185393333435, "learning_rate": 8.63836125455837e-05, "loss": 0.7838, "step": 75350 }, { "epoch": 0.48145356043085497, "grad_norm": 0.6353443264961243, "learning_rate": 8.638017060683179e-05, "loss": 0.6636, "step": 75360 }, { "epoch": 0.4815174475805936, "grad_norm": 0.9812245965003967, "learning_rate": 8.637672830170009e-05, "loss": 0.7057, "step": 75370 }, { "epoch": 0.4815813347303323, "grad_norm": 0.8041467666625977, "learning_rate": 8.637328563022327e-05, "loss": 0.8152, "step": 75380 }, { "epoch": 0.48164522188007103, "grad_norm": 0.738399863243103, "learning_rate": 8.636984259243601e-05, "loss": 0.8781, "step": 75390 }, { "epoch": 0.48170910902980973, "grad_norm": 0.9629417061805725, "learning_rate": 8.636639918837294e-05, "loss": 0.9158, "step": 75400 }, { "epoch": 0.48177299617954844, "grad_norm": 0.898951530456543, "learning_rate": 8.636295541806881e-05, "loss": 0.9504, "step": 75410 }, { "epoch": 0.48183688332928715, "grad_norm": 0.8771629929542542, "learning_rate": 8.635951128155822e-05, "loss": 0.9677, "step": 75420 }, { "epoch": 0.48190077047902585, "grad_norm": 0.7448533177375793, "learning_rate": 8.635606677887591e-05, "loss": 0.752, "step": 75430 }, { "epoch": 0.48196465762876456, "grad_norm": 0.6516122221946716, "learning_rate": 8.635262191005656e-05, "loss": 0.6401, "step": 75440 }, { "epoch": 0.48202854477850327, "grad_norm": 0.7587134838104248, "learning_rate": 8.634917667513486e-05, "loss": 0.8766, "step": 75450 }, { "epoch": 0.48209243192824197, "grad_norm": 0.728209376335144, "learning_rate": 8.63457310741455e-05, "loss": 0.9743, "step": 75460 }, { "epoch": 0.4821563190779807, "grad_norm": 0.7866697907447815, "learning_rate": 8.634228510712318e-05, "loss": 0.9598, "step": 75470 }, { "epoch": 0.4822202062277194, "grad_norm": 0.8349552750587463, "learning_rate": 8.633883877410261e-05, "loss": 0.7729, "step": 75480 }, { "epoch": 0.48228409337745803, "grad_norm": 0.7193264365196228, "learning_rate": 8.63353920751185e-05, "loss": 0.714, "step": 75490 }, { "epoch": 0.48234798052719674, "grad_norm": 0.9247245192527771, "learning_rate": 8.633194501020556e-05, "loss": 0.9012, "step": 75500 }, { "epoch": 0.48241186767693545, "grad_norm": 1.0399880409240723, "learning_rate": 8.632849757939849e-05, "loss": 0.9669, "step": 75510 }, { "epoch": 0.48247575482667415, "grad_norm": 0.7889145016670227, "learning_rate": 8.632504978273204e-05, "loss": 1.1164, "step": 75520 }, { "epoch": 0.48253964197641286, "grad_norm": 0.8151355981826782, "learning_rate": 8.63216016202409e-05, "loss": 0.9048, "step": 75530 }, { "epoch": 0.48260352912615156, "grad_norm": 0.9007961750030518, "learning_rate": 8.631815309195981e-05, "loss": 0.7891, "step": 75540 }, { "epoch": 0.48266741627589027, "grad_norm": 1.8607451915740967, "learning_rate": 8.631470419792348e-05, "loss": 1.0807, "step": 75550 }, { "epoch": 0.482731303425629, "grad_norm": 0.6548914909362793, "learning_rate": 8.63112549381667e-05, "loss": 1.0285, "step": 75560 }, { "epoch": 0.4827951905753677, "grad_norm": 0.7430241107940674, "learning_rate": 8.630780531272414e-05, "loss": 0.8952, "step": 75570 }, { "epoch": 0.4828590777251064, "grad_norm": 0.6701022386550903, "learning_rate": 8.630435532163059e-05, "loss": 0.8305, "step": 75580 }, { "epoch": 0.4829229648748451, "grad_norm": 0.8253774046897888, "learning_rate": 8.630090496492076e-05, "loss": 1.2012, "step": 75590 }, { "epoch": 0.4829868520245838, "grad_norm": 0.7972230911254883, "learning_rate": 8.629745424262942e-05, "loss": 1.138, "step": 75600 }, { "epoch": 0.48305073917432245, "grad_norm": 1.0207947492599487, "learning_rate": 8.62940031547913e-05, "loss": 0.9028, "step": 75610 }, { "epoch": 0.48311462632406116, "grad_norm": 0.6902018785476685, "learning_rate": 8.62905517014412e-05, "loss": 0.8966, "step": 75620 }, { "epoch": 0.48317851347379986, "grad_norm": 1.1125010251998901, "learning_rate": 8.628709988261381e-05, "loss": 1.008, "step": 75630 }, { "epoch": 0.48324240062353857, "grad_norm": 0.6313163638114929, "learning_rate": 8.628364769834395e-05, "loss": 0.8845, "step": 75640 }, { "epoch": 0.4833062877732773, "grad_norm": 0.6679086685180664, "learning_rate": 8.628019514866637e-05, "loss": 0.9086, "step": 75650 }, { "epoch": 0.483370174923016, "grad_norm": 0.7422047853469849, "learning_rate": 8.627674223361584e-05, "loss": 0.8719, "step": 75660 }, { "epoch": 0.4834340620727547, "grad_norm": 0.7488150596618652, "learning_rate": 8.627328895322713e-05, "loss": 0.8072, "step": 75670 }, { "epoch": 0.4834979492224934, "grad_norm": 0.5652221441268921, "learning_rate": 8.627018068854189e-05, "loss": 1.0794, "step": 75680 }, { "epoch": 0.4835618363722321, "grad_norm": 0.8535979986190796, "learning_rate": 8.626672671410644e-05, "loss": 0.8991, "step": 75690 }, { "epoch": 0.4836257235219708, "grad_norm": 0.8179265260696411, "learning_rate": 8.62632723744337e-05, "loss": 1.2132, "step": 75700 }, { "epoch": 0.4836896106717095, "grad_norm": 0.7996183037757874, "learning_rate": 8.625981766955842e-05, "loss": 0.8212, "step": 75710 }, { "epoch": 0.4837534978214482, "grad_norm": 0.671373724937439, "learning_rate": 8.625636259951542e-05, "loss": 0.9386, "step": 75720 }, { "epoch": 0.48381738497118687, "grad_norm": 1.1768290996551514, "learning_rate": 8.625290716433947e-05, "loss": 0.8154, "step": 75730 }, { "epoch": 0.4838812721209256, "grad_norm": 0.9020494818687439, "learning_rate": 8.62494513640654e-05, "loss": 1.119, "step": 75740 }, { "epoch": 0.4839451592706643, "grad_norm": 1.0053081512451172, "learning_rate": 8.624599519872798e-05, "loss": 1.0964, "step": 75750 }, { "epoch": 0.484009046420403, "grad_norm": 1.5894237756729126, "learning_rate": 8.624253866836202e-05, "loss": 0.9744, "step": 75760 }, { "epoch": 0.4840729335701417, "grad_norm": 0.5304593443870544, "learning_rate": 8.623908177300236e-05, "loss": 0.7911, "step": 75770 }, { "epoch": 0.4841368207198804, "grad_norm": 0.8319995403289795, "learning_rate": 8.623562451268378e-05, "loss": 1.0109, "step": 75780 }, { "epoch": 0.4842007078696191, "grad_norm": 1.3417378664016724, "learning_rate": 8.623216688744113e-05, "loss": 0.8693, "step": 75790 }, { "epoch": 0.4842645950193578, "grad_norm": 0.7891839742660522, "learning_rate": 8.622870889730921e-05, "loss": 0.9214, "step": 75800 }, { "epoch": 0.4843284821690965, "grad_norm": 0.7130112648010254, "learning_rate": 8.622525054232285e-05, "loss": 0.8262, "step": 75810 }, { "epoch": 0.4843923693188352, "grad_norm": 0.9320762157440186, "learning_rate": 8.622179182251686e-05, "loss": 0.8674, "step": 75820 }, { "epoch": 0.48445625646857393, "grad_norm": 0.6487066745758057, "learning_rate": 8.62183327379261e-05, "loss": 1.126, "step": 75830 }, { "epoch": 0.48452014361831264, "grad_norm": 0.6271628737449646, "learning_rate": 8.62148732885854e-05, "loss": 0.7827, "step": 75840 }, { "epoch": 0.4845840307680513, "grad_norm": 0.7439334988594055, "learning_rate": 8.621141347452959e-05, "loss": 1.2293, "step": 75850 }, { "epoch": 0.48464791791779, "grad_norm": 0.8553930521011353, "learning_rate": 8.620795329579354e-05, "loss": 0.8525, "step": 75860 }, { "epoch": 0.4847118050675287, "grad_norm": 0.9168295860290527, "learning_rate": 8.620449275241205e-05, "loss": 0.7103, "step": 75870 }, { "epoch": 0.4847756922172674, "grad_norm": 0.61861652135849, "learning_rate": 8.620103184442001e-05, "loss": 0.796, "step": 75880 }, { "epoch": 0.4848395793670061, "grad_norm": 1.4174355268478394, "learning_rate": 8.619757057185226e-05, "loss": 0.8479, "step": 75890 }, { "epoch": 0.4849034665167448, "grad_norm": 0.9580785036087036, "learning_rate": 8.619410893474365e-05, "loss": 0.7067, "step": 75900 }, { "epoch": 0.4849673536664835, "grad_norm": 0.7961419820785522, "learning_rate": 8.619064693312906e-05, "loss": 1.1983, "step": 75910 }, { "epoch": 0.48503124081622223, "grad_norm": 1.8671194314956665, "learning_rate": 8.618718456704335e-05, "loss": 1.2858, "step": 75920 }, { "epoch": 0.48509512796596094, "grad_norm": 1.4799001216888428, "learning_rate": 8.618372183652137e-05, "loss": 0.9962, "step": 75930 }, { "epoch": 0.48515901511569964, "grad_norm": 0.6392105221748352, "learning_rate": 8.6180258741598e-05, "loss": 0.8089, "step": 75940 }, { "epoch": 0.48522290226543835, "grad_norm": 0.8513908982276917, "learning_rate": 8.617679528230816e-05, "loss": 0.9247, "step": 75950 }, { "epoch": 0.48528678941517706, "grad_norm": 0.6598104238510132, "learning_rate": 8.617333145868667e-05, "loss": 0.9169, "step": 75960 }, { "epoch": 0.4853506765649157, "grad_norm": 1.3016315698623657, "learning_rate": 8.616986727076843e-05, "loss": 0.8606, "step": 75970 }, { "epoch": 0.4854145637146544, "grad_norm": 0.950963020324707, "learning_rate": 8.616640271858835e-05, "loss": 0.8453, "step": 75980 }, { "epoch": 0.4854784508643931, "grad_norm": 0.9443991780281067, "learning_rate": 8.616293780218131e-05, "loss": 0.9117, "step": 75990 }, { "epoch": 0.4855423380141318, "grad_norm": 0.8694010972976685, "learning_rate": 8.615947252158219e-05, "loss": 0.9585, "step": 76000 }, { "epoch": 0.48560622516387053, "grad_norm": 1.7652310132980347, "learning_rate": 8.615600687682591e-05, "loss": 0.9593, "step": 76010 }, { "epoch": 0.48567011231360924, "grad_norm": 0.4394935369491577, "learning_rate": 8.615254086794735e-05, "loss": 0.856, "step": 76020 }, { "epoch": 0.48573399946334794, "grad_norm": 1.1516753435134888, "learning_rate": 8.614907449498144e-05, "loss": 1.2644, "step": 76030 }, { "epoch": 0.48579788661308665, "grad_norm": 2.2719500064849854, "learning_rate": 8.614560775796307e-05, "loss": 0.7425, "step": 76040 }, { "epoch": 0.48586177376282536, "grad_norm": 0.5767148733139038, "learning_rate": 8.614214065692715e-05, "loss": 0.8913, "step": 76050 }, { "epoch": 0.48592566091256406, "grad_norm": 0.7121883034706116, "learning_rate": 8.613867319190861e-05, "loss": 1.0213, "step": 76060 }, { "epoch": 0.48598954806230277, "grad_norm": 0.948017418384552, "learning_rate": 8.613520536294238e-05, "loss": 0.8787, "step": 76070 }, { "epoch": 0.4860534352120415, "grad_norm": 0.6437438130378723, "learning_rate": 8.613173717006335e-05, "loss": 0.9009, "step": 76080 }, { "epoch": 0.4861173223617801, "grad_norm": 0.7081766724586487, "learning_rate": 8.612826861330648e-05, "loss": 0.9181, "step": 76090 }, { "epoch": 0.48618120951151883, "grad_norm": 0.7698941826820374, "learning_rate": 8.61247996927067e-05, "loss": 0.8093, "step": 76100 }, { "epoch": 0.48624509666125754, "grad_norm": 0.8024051785469055, "learning_rate": 8.612133040829892e-05, "loss": 0.98, "step": 76110 }, { "epoch": 0.48630898381099624, "grad_norm": 2.622551679611206, "learning_rate": 8.611786076011809e-05, "loss": 0.9282, "step": 76120 }, { "epoch": 0.48637287096073495, "grad_norm": 1.0354362726211548, "learning_rate": 8.611439074819917e-05, "loss": 1.0491, "step": 76130 }, { "epoch": 0.48643675811047365, "grad_norm": 0.6621295213699341, "learning_rate": 8.611092037257709e-05, "loss": 0.776, "step": 76140 }, { "epoch": 0.48650064526021236, "grad_norm": 0.6664482355117798, "learning_rate": 8.610744963328679e-05, "loss": 1.0026, "step": 76150 }, { "epoch": 0.48656453240995107, "grad_norm": 0.9559485912322998, "learning_rate": 8.610397853036325e-05, "loss": 1.0425, "step": 76160 }, { "epoch": 0.4866284195596898, "grad_norm": 1.0177385807037354, "learning_rate": 8.61005070638414e-05, "loss": 0.9678, "step": 76170 }, { "epoch": 0.4866923067094285, "grad_norm": 0.630623459815979, "learning_rate": 8.60970352337562e-05, "loss": 0.99, "step": 76180 }, { "epoch": 0.4867561938591672, "grad_norm": 0.9502881169319153, "learning_rate": 8.609356304014264e-05, "loss": 0.7239, "step": 76190 }, { "epoch": 0.4868200810089059, "grad_norm": 2.090254306793213, "learning_rate": 8.60900904830357e-05, "loss": 1.2069, "step": 76200 }, { "epoch": 0.4868839681586446, "grad_norm": 0.5957566499710083, "learning_rate": 8.60866175624703e-05, "loss": 0.8932, "step": 76210 }, { "epoch": 0.48694785530838325, "grad_norm": 1.5734295845031738, "learning_rate": 8.608314427848144e-05, "loss": 0.7924, "step": 76220 }, { "epoch": 0.48701174245812195, "grad_norm": 0.6711301207542419, "learning_rate": 8.60796706311041e-05, "loss": 0.952, "step": 76230 }, { "epoch": 0.48707562960786066, "grad_norm": 0.6539300084114075, "learning_rate": 8.607619662037327e-05, "loss": 0.949, "step": 76240 }, { "epoch": 0.48713951675759937, "grad_norm": 1.1970055103302002, "learning_rate": 8.607272224632393e-05, "loss": 1.0121, "step": 76250 }, { "epoch": 0.4872034039073381, "grad_norm": 0.9336310625076294, "learning_rate": 8.606924750899106e-05, "loss": 0.8952, "step": 76260 }, { "epoch": 0.4872672910570768, "grad_norm": 0.9023282527923584, "learning_rate": 8.606577240840968e-05, "loss": 0.9134, "step": 76270 }, { "epoch": 0.4873311782068155, "grad_norm": 0.4293481111526489, "learning_rate": 8.606229694461476e-05, "loss": 0.7425, "step": 76280 }, { "epoch": 0.4873950653565542, "grad_norm": 0.736682116985321, "learning_rate": 8.605882111764132e-05, "loss": 0.8171, "step": 76290 }, { "epoch": 0.4874589525062929, "grad_norm": 1.6317270994186401, "learning_rate": 8.605534492752434e-05, "loss": 0.824, "step": 76300 }, { "epoch": 0.4875228396560316, "grad_norm": 3.0119450092315674, "learning_rate": 8.605186837429887e-05, "loss": 0.859, "step": 76310 }, { "epoch": 0.4875867268057703, "grad_norm": 1.0656332969665527, "learning_rate": 8.604839145799987e-05, "loss": 0.7387, "step": 76320 }, { "epoch": 0.487650613955509, "grad_norm": 0.7559338808059692, "learning_rate": 8.604491417866238e-05, "loss": 0.9439, "step": 76330 }, { "epoch": 0.48771450110524767, "grad_norm": 0.8888264894485474, "learning_rate": 8.604143653632144e-05, "loss": 1.0296, "step": 76340 }, { "epoch": 0.4877783882549864, "grad_norm": 0.9546695947647095, "learning_rate": 8.603795853101204e-05, "loss": 1.0504, "step": 76350 }, { "epoch": 0.4878422754047251, "grad_norm": 2.9092493057250977, "learning_rate": 8.603448016276924e-05, "loss": 1.1027, "step": 76360 }, { "epoch": 0.4879061625544638, "grad_norm": 0.840092122554779, "learning_rate": 8.603100143162803e-05, "loss": 0.7812, "step": 76370 }, { "epoch": 0.4879700497042025, "grad_norm": 0.8399893641471863, "learning_rate": 8.602752233762348e-05, "loss": 0.9633, "step": 76380 }, { "epoch": 0.4880339368539412, "grad_norm": 0.9037623405456543, "learning_rate": 8.60240428807906e-05, "loss": 0.9545, "step": 76390 }, { "epoch": 0.4880978240036799, "grad_norm": 0.843728244304657, "learning_rate": 8.602056306116445e-05, "loss": 0.7823, "step": 76400 }, { "epoch": 0.4881617111534186, "grad_norm": 0.9266428351402283, "learning_rate": 8.601708287878006e-05, "loss": 0.7908, "step": 76410 }, { "epoch": 0.4882255983031573, "grad_norm": 0.7917917966842651, "learning_rate": 8.60136023336725e-05, "loss": 1.0549, "step": 76420 }, { "epoch": 0.488289485452896, "grad_norm": 0.7976272702217102, "learning_rate": 8.601012142587678e-05, "loss": 0.944, "step": 76430 }, { "epoch": 0.4883533726026347, "grad_norm": 1.0543662309646606, "learning_rate": 8.6006640155428e-05, "loss": 0.8536, "step": 76440 }, { "epoch": 0.48841725975237343, "grad_norm": 0.7209562063217163, "learning_rate": 8.600315852236121e-05, "loss": 0.7236, "step": 76450 }, { "epoch": 0.4884811469021121, "grad_norm": 0.7003374695777893, "learning_rate": 8.599967652671147e-05, "loss": 1.1172, "step": 76460 }, { "epoch": 0.4885450340518508, "grad_norm": 1.0924787521362305, "learning_rate": 8.599619416851384e-05, "loss": 0.7156, "step": 76470 }, { "epoch": 0.4886089212015895, "grad_norm": 0.6103460192680359, "learning_rate": 8.599271144780339e-05, "loss": 0.9213, "step": 76480 }, { "epoch": 0.4886728083513282, "grad_norm": 0.675788938999176, "learning_rate": 8.59892283646152e-05, "loss": 0.7218, "step": 76490 }, { "epoch": 0.4887366955010669, "grad_norm": 0.5468382835388184, "learning_rate": 8.598574491898435e-05, "loss": 0.7851, "step": 76500 }, { "epoch": 0.4888005826508056, "grad_norm": 0.9708940982818604, "learning_rate": 8.59822611109459e-05, "loss": 0.8894, "step": 76510 }, { "epoch": 0.4888644698005443, "grad_norm": 2.2232227325439453, "learning_rate": 8.597877694053496e-05, "loss": 1.0381, "step": 76520 }, { "epoch": 0.488928356950283, "grad_norm": 1.9338047504425049, "learning_rate": 8.597529240778661e-05, "loss": 0.8914, "step": 76530 }, { "epoch": 0.48899224410002173, "grad_norm": 0.842464029788971, "learning_rate": 8.597180751273595e-05, "loss": 0.8219, "step": 76540 }, { "epoch": 0.48905613124976044, "grad_norm": 0.6262010931968689, "learning_rate": 8.596832225541806e-05, "loss": 0.6786, "step": 76550 }, { "epoch": 0.48912001839949915, "grad_norm": 0.7342615723609924, "learning_rate": 8.596483663586804e-05, "loss": 0.6999, "step": 76560 }, { "epoch": 0.48918390554923785, "grad_norm": 0.6208049654960632, "learning_rate": 8.596135065412101e-05, "loss": 1.0309, "step": 76570 }, { "epoch": 0.4892477926989765, "grad_norm": 0.8948808312416077, "learning_rate": 8.595786431021207e-05, "loss": 0.7239, "step": 76580 }, { "epoch": 0.4893116798487152, "grad_norm": 0.7227377891540527, "learning_rate": 8.595437760417633e-05, "loss": 0.9171, "step": 76590 }, { "epoch": 0.4893755669984539, "grad_norm": 0.8162720203399658, "learning_rate": 8.59508905360489e-05, "loss": 0.9206, "step": 76600 }, { "epoch": 0.4894394541481926, "grad_norm": 1.031140923500061, "learning_rate": 8.59474031058649e-05, "loss": 0.8738, "step": 76610 }, { "epoch": 0.4895033412979313, "grad_norm": 0.6611879467964172, "learning_rate": 8.594391531365943e-05, "loss": 0.8089, "step": 76620 }, { "epoch": 0.48956722844767003, "grad_norm": 0.8293446898460388, "learning_rate": 8.594042715946768e-05, "loss": 1.0846, "step": 76630 }, { "epoch": 0.48963111559740874, "grad_norm": 0.7987895607948303, "learning_rate": 8.59369386433247e-05, "loss": 1.0023, "step": 76640 }, { "epoch": 0.48969500274714745, "grad_norm": 0.8030225038528442, "learning_rate": 8.593344976526569e-05, "loss": 0.6244, "step": 76650 }, { "epoch": 0.48975888989688615, "grad_norm": 1.0051014423370361, "learning_rate": 8.592996052532572e-05, "loss": 0.8662, "step": 76660 }, { "epoch": 0.48982277704662486, "grad_norm": 1.1758030652999878, "learning_rate": 8.592647092353998e-05, "loss": 0.8531, "step": 76670 }, { "epoch": 0.48988666419636356, "grad_norm": 0.8429425358772278, "learning_rate": 8.59229809599436e-05, "loss": 0.9254, "step": 76680 }, { "epoch": 0.48995055134610227, "grad_norm": 0.9333186149597168, "learning_rate": 8.591949063457172e-05, "loss": 0.9736, "step": 76690 }, { "epoch": 0.4900144384958409, "grad_norm": 0.98914635181427, "learning_rate": 8.59159999474595e-05, "loss": 0.8071, "step": 76700 }, { "epoch": 0.4900783256455796, "grad_norm": 0.6618992686271667, "learning_rate": 8.591250889864209e-05, "loss": 1.0296, "step": 76710 }, { "epoch": 0.49014221279531833, "grad_norm": 0.5613696575164795, "learning_rate": 8.590901748815464e-05, "loss": 0.8095, "step": 76720 }, { "epoch": 0.49020609994505704, "grad_norm": 0.6220462322235107, "learning_rate": 8.590552571603232e-05, "loss": 0.7297, "step": 76730 }, { "epoch": 0.49026998709479575, "grad_norm": 0.5085312128067017, "learning_rate": 8.590203358231028e-05, "loss": 0.7892, "step": 76740 }, { "epoch": 0.49033387424453445, "grad_norm": 0.7087169885635376, "learning_rate": 8.589854108702371e-05, "loss": 0.8027, "step": 76750 }, { "epoch": 0.49039776139427316, "grad_norm": 0.7277820110321045, "learning_rate": 8.589504823020778e-05, "loss": 0.8146, "step": 76760 }, { "epoch": 0.49046164854401186, "grad_norm": 0.8798472881317139, "learning_rate": 8.589155501189767e-05, "loss": 0.8185, "step": 76770 }, { "epoch": 0.49052553569375057, "grad_norm": 0.8742108345031738, "learning_rate": 8.588806143212852e-05, "loss": 0.9735, "step": 76780 }, { "epoch": 0.4905894228434893, "grad_norm": 1.9560281038284302, "learning_rate": 8.588456749093558e-05, "loss": 0.9918, "step": 76790 }, { "epoch": 0.490653309993228, "grad_norm": 0.953271746635437, "learning_rate": 8.588107318835398e-05, "loss": 1.0473, "step": 76800 }, { "epoch": 0.4907171971429667, "grad_norm": 0.8690406084060669, "learning_rate": 8.587757852441893e-05, "loss": 0.9629, "step": 76810 }, { "epoch": 0.49078108429270534, "grad_norm": 1.7574247121810913, "learning_rate": 8.587408349916564e-05, "loss": 0.662, "step": 76820 }, { "epoch": 0.49084497144244404, "grad_norm": 0.9854816198348999, "learning_rate": 8.587058811262929e-05, "loss": 0.86, "step": 76830 }, { "epoch": 0.49090885859218275, "grad_norm": 1.1772929430007935, "learning_rate": 8.586709236484507e-05, "loss": 0.7821, "step": 76840 }, { "epoch": 0.49097274574192146, "grad_norm": 1.0073944330215454, "learning_rate": 8.586359625584822e-05, "loss": 0.8854, "step": 76850 }, { "epoch": 0.49103663289166016, "grad_norm": 0.7128773927688599, "learning_rate": 8.586009978567391e-05, "loss": 0.7433, "step": 76860 }, { "epoch": 0.49110052004139887, "grad_norm": 0.663662314414978, "learning_rate": 8.58566029543574e-05, "loss": 0.8485, "step": 76870 }, { "epoch": 0.4911644071911376, "grad_norm": 0.6211137771606445, "learning_rate": 8.585310576193384e-05, "loss": 0.8421, "step": 76880 }, { "epoch": 0.4912282943408763, "grad_norm": 1.202216625213623, "learning_rate": 8.584960820843851e-05, "loss": 0.7988, "step": 76890 }, { "epoch": 0.491292181490615, "grad_norm": 0.7233720421791077, "learning_rate": 8.584611029390661e-05, "loss": 0.903, "step": 76900 }, { "epoch": 0.4913560686403537, "grad_norm": 0.5349984765052795, "learning_rate": 8.584261201837337e-05, "loss": 1.0756, "step": 76910 }, { "epoch": 0.4914199557900924, "grad_norm": 0.9443363547325134, "learning_rate": 8.583911338187401e-05, "loss": 0.8465, "step": 76920 }, { "epoch": 0.4914838429398311, "grad_norm": 0.7299784421920776, "learning_rate": 8.583561438444379e-05, "loss": 1.1754, "step": 76930 }, { "epoch": 0.49154773008956976, "grad_norm": 1.1991528272628784, "learning_rate": 8.583211502611792e-05, "loss": 0.9597, "step": 76940 }, { "epoch": 0.49161161723930846, "grad_norm": 1.1621544361114502, "learning_rate": 8.582861530693165e-05, "loss": 0.7185, "step": 76950 }, { "epoch": 0.49167550438904717, "grad_norm": 1.565203070640564, "learning_rate": 8.582511522692022e-05, "loss": 0.8875, "step": 76960 }, { "epoch": 0.4917393915387859, "grad_norm": 0.9279037714004517, "learning_rate": 8.58216147861189e-05, "loss": 1.1073, "step": 76970 }, { "epoch": 0.4918032786885246, "grad_norm": 1.2294920682907104, "learning_rate": 8.581811398456292e-05, "loss": 0.9467, "step": 76980 }, { "epoch": 0.4918671658382633, "grad_norm": 0.6225689053535461, "learning_rate": 8.581461282228756e-05, "loss": 0.9234, "step": 76990 }, { "epoch": 0.491931052988002, "grad_norm": 1.2206075191497803, "learning_rate": 8.581111129932805e-05, "loss": 1.0835, "step": 77000 }, { "epoch": 0.4919949401377407, "grad_norm": 0.7417840957641602, "learning_rate": 8.580760941571967e-05, "loss": 0.9111, "step": 77010 }, { "epoch": 0.4920588272874794, "grad_norm": 0.6536421775817871, "learning_rate": 8.580410717149769e-05, "loss": 0.9932, "step": 77020 }, { "epoch": 0.4921227144372181, "grad_norm": 0.8102644681930542, "learning_rate": 8.580060456669738e-05, "loss": 1.0341, "step": 77030 }, { "epoch": 0.4921866015869568, "grad_norm": 0.9170993566513062, "learning_rate": 8.579710160135399e-05, "loss": 1.0178, "step": 77040 }, { "epoch": 0.4922504887366955, "grad_norm": 0.4981268644332886, "learning_rate": 8.579359827550284e-05, "loss": 0.7805, "step": 77050 }, { "epoch": 0.49231437588643423, "grad_norm": 1.8786097764968872, "learning_rate": 8.579009458917917e-05, "loss": 0.9201, "step": 77060 }, { "epoch": 0.4923782630361729, "grad_norm": 1.0269956588745117, "learning_rate": 8.57865905424183e-05, "loss": 0.9258, "step": 77070 }, { "epoch": 0.4924421501859116, "grad_norm": 0.959625244140625, "learning_rate": 8.578308613525549e-05, "loss": 0.7213, "step": 77080 }, { "epoch": 0.4925060373356503, "grad_norm": 0.7318682670593262, "learning_rate": 8.577958136772608e-05, "loss": 0.9329, "step": 77090 }, { "epoch": 0.492569924485389, "grad_norm": 0.9213690757751465, "learning_rate": 8.57760762398653e-05, "loss": 0.8856, "step": 77100 }, { "epoch": 0.4926338116351277, "grad_norm": 0.7937483191490173, "learning_rate": 8.577257075170849e-05, "loss": 1.2098, "step": 77110 }, { "epoch": 0.4926976987848664, "grad_norm": 1.0895425081253052, "learning_rate": 8.576906490329094e-05, "loss": 0.8427, "step": 77120 }, { "epoch": 0.4927615859346051, "grad_norm": 0.9130338430404663, "learning_rate": 8.576555869464798e-05, "loss": 1.2261, "step": 77130 }, { "epoch": 0.4928254730843438, "grad_norm": 0.7346659302711487, "learning_rate": 8.576205212581488e-05, "loss": 0.6587, "step": 77140 }, { "epoch": 0.49288936023408253, "grad_norm": 3.969825267791748, "learning_rate": 8.575854519682698e-05, "loss": 1.0008, "step": 77150 }, { "epoch": 0.49295324738382124, "grad_norm": 1.1453746557235718, "learning_rate": 8.575503790771959e-05, "loss": 1.0563, "step": 77160 }, { "epoch": 0.49301713453355994, "grad_norm": 1.0311975479125977, "learning_rate": 8.575153025852804e-05, "loss": 0.944, "step": 77170 }, { "epoch": 0.49308102168329865, "grad_norm": 0.713505744934082, "learning_rate": 8.574802224928766e-05, "loss": 0.8591, "step": 77180 }, { "epoch": 0.4931449088330373, "grad_norm": 0.9348772764205933, "learning_rate": 8.574451388003378e-05, "loss": 0.7919, "step": 77190 }, { "epoch": 0.493208795982776, "grad_norm": 1.0843831300735474, "learning_rate": 8.57410051508017e-05, "loss": 0.8934, "step": 77200 }, { "epoch": 0.4932726831325147, "grad_norm": 0.8350475430488586, "learning_rate": 8.573749606162678e-05, "loss": 0.7806, "step": 77210 }, { "epoch": 0.4933365702822534, "grad_norm": 1.9250234365463257, "learning_rate": 8.573398661254438e-05, "loss": 0.7523, "step": 77220 }, { "epoch": 0.4934004574319921, "grad_norm": 0.9152832627296448, "learning_rate": 8.573047680358978e-05, "loss": 0.8821, "step": 77230 }, { "epoch": 0.49346434458173083, "grad_norm": 0.9231581687927246, "learning_rate": 8.57269666347984e-05, "loss": 0.7803, "step": 77240 }, { "epoch": 0.49352823173146954, "grad_norm": 0.792326807975769, "learning_rate": 8.572345610620553e-05, "loss": 0.9124, "step": 77250 }, { "epoch": 0.49359211888120824, "grad_norm": 0.986379861831665, "learning_rate": 8.571994521784659e-05, "loss": 0.9055, "step": 77260 }, { "epoch": 0.49365600603094695, "grad_norm": 1.0129300355911255, "learning_rate": 8.571643396975688e-05, "loss": 0.8168, "step": 77270 }, { "epoch": 0.49371989318068565, "grad_norm": 0.5476410984992981, "learning_rate": 8.571292236197178e-05, "loss": 0.6777, "step": 77280 }, { "epoch": 0.49378378033042436, "grad_norm": 1.3759448528289795, "learning_rate": 8.570941039452665e-05, "loss": 0.9291, "step": 77290 }, { "epoch": 0.49384766748016307, "grad_norm": 1.3601030111312866, "learning_rate": 8.570589806745687e-05, "loss": 0.9618, "step": 77300 }, { "epoch": 0.4939115546299017, "grad_norm": 1.174814224243164, "learning_rate": 8.57023853807978e-05, "loss": 0.8576, "step": 77310 }, { "epoch": 0.4939754417796404, "grad_norm": 1.2242119312286377, "learning_rate": 8.569887233458482e-05, "loss": 1.1461, "step": 77320 }, { "epoch": 0.49403932892937913, "grad_norm": 1.0794601440429688, "learning_rate": 8.569535892885333e-05, "loss": 0.8275, "step": 77330 }, { "epoch": 0.49410321607911784, "grad_norm": 0.802666962146759, "learning_rate": 8.569184516363869e-05, "loss": 1.0711, "step": 77340 }, { "epoch": 0.49416710322885654, "grad_norm": 0.9685359001159668, "learning_rate": 8.568833103897628e-05, "loss": 0.8529, "step": 77350 }, { "epoch": 0.49423099037859525, "grad_norm": 0.9577045440673828, "learning_rate": 8.568481655490151e-05, "loss": 0.801, "step": 77360 }, { "epoch": 0.49429487752833395, "grad_norm": 0.6817383170127869, "learning_rate": 8.568130171144975e-05, "loss": 0.9074, "step": 77370 }, { "epoch": 0.49435876467807266, "grad_norm": 1.0045511722564697, "learning_rate": 8.567778650865643e-05, "loss": 0.8414, "step": 77380 }, { "epoch": 0.49442265182781137, "grad_norm": 1.0986788272857666, "learning_rate": 8.567427094655693e-05, "loss": 0.8889, "step": 77390 }, { "epoch": 0.49448653897755007, "grad_norm": 0.8840765357017517, "learning_rate": 8.567075502518667e-05, "loss": 0.9405, "step": 77400 }, { "epoch": 0.4945504261272888, "grad_norm": 0.7125093936920166, "learning_rate": 8.566723874458102e-05, "loss": 1.0639, "step": 77410 }, { "epoch": 0.4946143132770275, "grad_norm": 0.9904595017433167, "learning_rate": 8.566372210477544e-05, "loss": 0.9249, "step": 77420 }, { "epoch": 0.49467820042676613, "grad_norm": 0.9218760132789612, "learning_rate": 8.566020510580532e-05, "loss": 0.8189, "step": 77430 }, { "epoch": 0.49474208757650484, "grad_norm": 0.7512104511260986, "learning_rate": 8.56566877477061e-05, "loss": 1.1084, "step": 77440 }, { "epoch": 0.49480597472624355, "grad_norm": 1.2798714637756348, "learning_rate": 8.565317003051316e-05, "loss": 1.0737, "step": 77450 }, { "epoch": 0.49486986187598225, "grad_norm": 0.9428762793540955, "learning_rate": 8.564965195426197e-05, "loss": 0.8921, "step": 77460 }, { "epoch": 0.49493374902572096, "grad_norm": 0.6847555637359619, "learning_rate": 8.564613351898794e-05, "loss": 0.6569, "step": 77470 }, { "epoch": 0.49499763617545967, "grad_norm": 0.9463028311729431, "learning_rate": 8.56426147247265e-05, "loss": 1.0144, "step": 77480 }, { "epoch": 0.49506152332519837, "grad_norm": 0.9155146479606628, "learning_rate": 8.56390955715131e-05, "loss": 1.0397, "step": 77490 }, { "epoch": 0.4951254104749371, "grad_norm": 1.0941190719604492, "learning_rate": 8.563557605938317e-05, "loss": 0.9424, "step": 77500 }, { "epoch": 0.4951892976246758, "grad_norm": 0.6458025574684143, "learning_rate": 8.563205618837217e-05, "loss": 0.9965, "step": 77510 }, { "epoch": 0.4952531847744145, "grad_norm": 0.49309995770454407, "learning_rate": 8.562853595851554e-05, "loss": 0.9261, "step": 77520 }, { "epoch": 0.4953170719241532, "grad_norm": 0.741894543170929, "learning_rate": 8.562501536984873e-05, "loss": 0.9298, "step": 77530 }, { "epoch": 0.4953809590738919, "grad_norm": 0.9847732782363892, "learning_rate": 8.562149442240718e-05, "loss": 0.911, "step": 77540 }, { "epoch": 0.49544484622363055, "grad_norm": 0.9211950302124023, "learning_rate": 8.561797311622637e-05, "loss": 0.8011, "step": 77550 }, { "epoch": 0.49550873337336926, "grad_norm": 1.5273650884628296, "learning_rate": 8.561445145134177e-05, "loss": 0.9902, "step": 77560 }, { "epoch": 0.49557262052310797, "grad_norm": 0.8541943430900574, "learning_rate": 8.561092942778882e-05, "loss": 0.7033, "step": 77570 }, { "epoch": 0.49563650767284667, "grad_norm": 1.0141565799713135, "learning_rate": 8.560740704560299e-05, "loss": 0.7592, "step": 77580 }, { "epoch": 0.4957003948225854, "grad_norm": 1.0680170059204102, "learning_rate": 8.560388430481979e-05, "loss": 1.0655, "step": 77590 }, { "epoch": 0.4957642819723241, "grad_norm": 0.8860800862312317, "learning_rate": 8.560036120547468e-05, "loss": 0.9499, "step": 77600 }, { "epoch": 0.4958281691220628, "grad_norm": 1.698761224746704, "learning_rate": 8.559683774760311e-05, "loss": 1.088, "step": 77610 }, { "epoch": 0.4958920562718015, "grad_norm": 1.741514801979065, "learning_rate": 8.559331393124059e-05, "loss": 0.9058, "step": 77620 }, { "epoch": 0.4959559434215402, "grad_norm": 2.4504334926605225, "learning_rate": 8.558978975642262e-05, "loss": 0.8441, "step": 77630 }, { "epoch": 0.4960198305712789, "grad_norm": 1.7975729703903198, "learning_rate": 8.558626522318467e-05, "loss": 1.0169, "step": 77640 }, { "epoch": 0.4960837177210176, "grad_norm": 0.8959939479827881, "learning_rate": 8.558274033156224e-05, "loss": 0.7613, "step": 77650 }, { "epoch": 0.4961476048707563, "grad_norm": 1.107818841934204, "learning_rate": 8.557921508159083e-05, "loss": 1.1208, "step": 77660 }, { "epoch": 0.49621149202049497, "grad_norm": 0.7719281911849976, "learning_rate": 8.557568947330596e-05, "loss": 0.932, "step": 77670 }, { "epoch": 0.4962753791702337, "grad_norm": 0.7551287412643433, "learning_rate": 8.557216350674311e-05, "loss": 0.9016, "step": 77680 }, { "epoch": 0.4963392663199724, "grad_norm": 0.8318659067153931, "learning_rate": 8.556863718193779e-05, "loss": 0.9607, "step": 77690 }, { "epoch": 0.4964031534697111, "grad_norm": 0.799311101436615, "learning_rate": 8.556511049892553e-05, "loss": 1.0467, "step": 77700 }, { "epoch": 0.4964670406194498, "grad_norm": 1.174993634223938, "learning_rate": 8.556158345774184e-05, "loss": 0.9272, "step": 77710 }, { "epoch": 0.4965309277691885, "grad_norm": 0.6065928936004639, "learning_rate": 8.555805605842224e-05, "loss": 0.9209, "step": 77720 }, { "epoch": 0.4965948149189272, "grad_norm": 0.9526639580726624, "learning_rate": 8.555452830100226e-05, "loss": 0.8938, "step": 77730 }, { "epoch": 0.4966587020686659, "grad_norm": 0.893748939037323, "learning_rate": 8.555100018551741e-05, "loss": 1.0344, "step": 77740 }, { "epoch": 0.4967225892184046, "grad_norm": 0.9119165539741516, "learning_rate": 8.554747171200324e-05, "loss": 1.1131, "step": 77750 }, { "epoch": 0.4967864763681433, "grad_norm": 0.649663507938385, "learning_rate": 8.554394288049526e-05, "loss": 0.8, "step": 77760 }, { "epoch": 0.49685036351788203, "grad_norm": 0.7879058122634888, "learning_rate": 8.554041369102904e-05, "loss": 0.7511, "step": 77770 }, { "epoch": 0.49691425066762074, "grad_norm": 1.0487127304077148, "learning_rate": 8.55368841436401e-05, "loss": 1.022, "step": 77780 }, { "epoch": 0.4969781378173594, "grad_norm": 2.496959686279297, "learning_rate": 8.553335423836399e-05, "loss": 1.0092, "step": 77790 }, { "epoch": 0.4970420249670981, "grad_norm": 0.8213831186294556, "learning_rate": 8.552982397523628e-05, "loss": 0.6308, "step": 77800 }, { "epoch": 0.4971059121168368, "grad_norm": 1.1579580307006836, "learning_rate": 8.55262933542925e-05, "loss": 1.0219, "step": 77810 }, { "epoch": 0.4971697992665755, "grad_norm": 0.927528440952301, "learning_rate": 8.55227623755682e-05, "loss": 0.7056, "step": 77820 }, { "epoch": 0.4972336864163142, "grad_norm": 1.2623285055160522, "learning_rate": 8.551923103909896e-05, "loss": 0.9829, "step": 77830 }, { "epoch": 0.4972975735660529, "grad_norm": 0.9464250802993774, "learning_rate": 8.551569934492032e-05, "loss": 0.8887, "step": 77840 }, { "epoch": 0.4973614607157916, "grad_norm": 2.496879816055298, "learning_rate": 8.551216729306788e-05, "loss": 1.054, "step": 77850 }, { "epoch": 0.49742534786553033, "grad_norm": 1.2735011577606201, "learning_rate": 8.550863488357718e-05, "loss": 0.7108, "step": 77860 }, { "epoch": 0.49748923501526904, "grad_norm": 0.8742243647575378, "learning_rate": 8.550510211648382e-05, "loss": 1.1427, "step": 77870 }, { "epoch": 0.49755312216500774, "grad_norm": 0.5537328720092773, "learning_rate": 8.550156899182336e-05, "loss": 0.5181, "step": 77880 }, { "epoch": 0.49761700931474645, "grad_norm": 1.0898637771606445, "learning_rate": 8.54980355096314e-05, "loss": 1.0092, "step": 77890 }, { "epoch": 0.49768089646448516, "grad_norm": 0.8493994474411011, "learning_rate": 8.549450166994348e-05, "loss": 0.9335, "step": 77900 }, { "epoch": 0.49774478361422386, "grad_norm": 0.8540746569633484, "learning_rate": 8.549096747279526e-05, "loss": 0.9631, "step": 77910 }, { "epoch": 0.4978086707639625, "grad_norm": 0.9067754745483398, "learning_rate": 8.548743291822227e-05, "loss": 0.7435, "step": 77920 }, { "epoch": 0.4978725579137012, "grad_norm": 0.9325600862503052, "learning_rate": 8.548389800626013e-05, "loss": 0.9721, "step": 77930 }, { "epoch": 0.4979364450634399, "grad_norm": 0.892930805683136, "learning_rate": 8.548036273694445e-05, "loss": 1.0944, "step": 77940 }, { "epoch": 0.49800033221317863, "grad_norm": 0.8587602376937866, "learning_rate": 8.54768271103108e-05, "loss": 1.1267, "step": 77950 }, { "epoch": 0.49806421936291734, "grad_norm": 0.6374524831771851, "learning_rate": 8.547329112639483e-05, "loss": 0.8988, "step": 77960 }, { "epoch": 0.49812810651265604, "grad_norm": 0.7276429533958435, "learning_rate": 8.546975478523211e-05, "loss": 0.9158, "step": 77970 }, { "epoch": 0.49819199366239475, "grad_norm": 1.0100622177124023, "learning_rate": 8.546621808685829e-05, "loss": 0.9022, "step": 77980 }, { "epoch": 0.49825588081213346, "grad_norm": 0.7123457193374634, "learning_rate": 8.546268103130897e-05, "loss": 1.0161, "step": 77990 }, { "epoch": 0.49831976796187216, "grad_norm": 1.1501771211624146, "learning_rate": 8.545914361861977e-05, "loss": 0.848, "step": 78000 }, { "epoch": 0.49838365511161087, "grad_norm": 1.3650606870651245, "learning_rate": 8.545560584882632e-05, "loss": 1.0104, "step": 78010 }, { "epoch": 0.4984475422613496, "grad_norm": 0.5080598592758179, "learning_rate": 8.545206772196425e-05, "loss": 0.8855, "step": 78020 }, { "epoch": 0.4985114294110883, "grad_norm": 0.9266533851623535, "learning_rate": 8.544852923806918e-05, "loss": 0.8948, "step": 78030 }, { "epoch": 0.49857531656082693, "grad_norm": 0.782556414604187, "learning_rate": 8.544499039717675e-05, "loss": 1.0098, "step": 78040 }, { "epoch": 0.49863920371056564, "grad_norm": 0.6983265280723572, "learning_rate": 8.544145119932261e-05, "loss": 0.8239, "step": 78050 }, { "epoch": 0.49870309086030434, "grad_norm": 0.8616853952407837, "learning_rate": 8.543791164454238e-05, "loss": 0.8512, "step": 78060 }, { "epoch": 0.49876697801004305, "grad_norm": 1.00681471824646, "learning_rate": 8.543437173287175e-05, "loss": 0.8171, "step": 78070 }, { "epoch": 0.49883086515978176, "grad_norm": 0.7537940144538879, "learning_rate": 8.543083146434632e-05, "loss": 0.7415, "step": 78080 }, { "epoch": 0.49889475230952046, "grad_norm": 0.8292582631111145, "learning_rate": 8.542729083900176e-05, "loss": 0.7361, "step": 78090 }, { "epoch": 0.49895863945925917, "grad_norm": 0.6987549066543579, "learning_rate": 8.542374985687376e-05, "loss": 1.0473, "step": 78100 }, { "epoch": 0.4990225266089979, "grad_norm": 0.7763581275939941, "learning_rate": 8.542020851799792e-05, "loss": 0.9915, "step": 78110 }, { "epoch": 0.4990864137587366, "grad_norm": 0.857244610786438, "learning_rate": 8.541666682240996e-05, "loss": 0.8533, "step": 78120 }, { "epoch": 0.4991503009084753, "grad_norm": 0.7238770127296448, "learning_rate": 8.541312477014551e-05, "loss": 1.0054, "step": 78130 }, { "epoch": 0.499214188058214, "grad_norm": 1.2885125875473022, "learning_rate": 8.540958236124028e-05, "loss": 0.9024, "step": 78140 }, { "epoch": 0.4992780752079527, "grad_norm": 1.3444433212280273, "learning_rate": 8.540603959572991e-05, "loss": 0.8516, "step": 78150 }, { "epoch": 0.49934196235769135, "grad_norm": 1.0060087442398071, "learning_rate": 8.540249647365008e-05, "loss": 0.8785, "step": 78160 }, { "epoch": 0.49940584950743006, "grad_norm": 1.0679055452346802, "learning_rate": 8.539895299503648e-05, "loss": 0.9874, "step": 78170 }, { "epoch": 0.49946973665716876, "grad_norm": 0.7113552093505859, "learning_rate": 8.539540915992482e-05, "loss": 0.8014, "step": 78180 }, { "epoch": 0.49953362380690747, "grad_norm": 1.0472384691238403, "learning_rate": 8.539186496835077e-05, "loss": 0.9478, "step": 78190 }, { "epoch": 0.4995975109566462, "grad_norm": 0.7270193099975586, "learning_rate": 8.538832042035e-05, "loss": 0.9747, "step": 78200 }, { "epoch": 0.4996613981063849, "grad_norm": 0.6182805895805359, "learning_rate": 8.538477551595824e-05, "loss": 0.9063, "step": 78210 }, { "epoch": 0.4997252852561236, "grad_norm": 1.1360803842544556, "learning_rate": 8.538123025521117e-05, "loss": 0.942, "step": 78220 }, { "epoch": 0.4997891724058623, "grad_norm": 0.7602689862251282, "learning_rate": 8.537768463814451e-05, "loss": 0.9089, "step": 78230 }, { "epoch": 0.499853059555601, "grad_norm": 0.8490816354751587, "learning_rate": 8.537413866479396e-05, "loss": 1.0442, "step": 78240 }, { "epoch": 0.4999169467053397, "grad_norm": 0.8993768692016602, "learning_rate": 8.537059233519522e-05, "loss": 0.7744, "step": 78250 }, { "epoch": 0.4999808338550784, "grad_norm": 1.221891164779663, "learning_rate": 8.536704564938402e-05, "loss": 0.9663, "step": 78260 }, { "epoch": 0.5000447210048171, "grad_norm": 0.7886923551559448, "learning_rate": 8.536349860739608e-05, "loss": 0.9475, "step": 78270 }, { "epoch": 0.5001086081545558, "grad_norm": 0.8273355960845947, "learning_rate": 8.535995120926712e-05, "loss": 0.9991, "step": 78280 }, { "epoch": 0.5001724953042945, "grad_norm": 0.7041333317756653, "learning_rate": 8.535640345503285e-05, "loss": 0.7416, "step": 78290 }, { "epoch": 0.5002363824540332, "grad_norm": 0.994158148765564, "learning_rate": 8.535285534472901e-05, "loss": 0.802, "step": 78300 }, { "epoch": 0.5003002696037719, "grad_norm": 1.7603987455368042, "learning_rate": 8.534930687839134e-05, "loss": 0.948, "step": 78310 }, { "epoch": 0.5003641567535106, "grad_norm": 0.8774569034576416, "learning_rate": 8.534575805605555e-05, "loss": 0.9112, "step": 78320 }, { "epoch": 0.5004280439032494, "grad_norm": 1.1153593063354492, "learning_rate": 8.534220887775743e-05, "loss": 0.8952, "step": 78330 }, { "epoch": 0.500491931052988, "grad_norm": 0.6436009407043457, "learning_rate": 8.533865934353267e-05, "loss": 0.8265, "step": 78340 }, { "epoch": 0.5005558182027267, "grad_norm": 0.8798633217811584, "learning_rate": 8.533510945341704e-05, "loss": 0.876, "step": 78350 }, { "epoch": 0.5006197053524654, "grad_norm": 0.92572021484375, "learning_rate": 8.533155920744629e-05, "loss": 1.0156, "step": 78360 }, { "epoch": 0.5006835925022041, "grad_norm": 1.193281650543213, "learning_rate": 8.532800860565618e-05, "loss": 0.7637, "step": 78370 }, { "epoch": 0.5007474796519428, "grad_norm": 1.7722188234329224, "learning_rate": 8.532445764808243e-05, "loss": 0.8904, "step": 78380 }, { "epoch": 0.5008113668016815, "grad_norm": 1.1688928604125977, "learning_rate": 8.532090633476087e-05, "loss": 0.9443, "step": 78390 }, { "epoch": 0.5008752539514202, "grad_norm": 0.7432667016983032, "learning_rate": 8.531735466572722e-05, "loss": 0.8921, "step": 78400 }, { "epoch": 0.5009391411011589, "grad_norm": 0.5280702710151672, "learning_rate": 8.531380264101722e-05, "loss": 0.6771, "step": 78410 }, { "epoch": 0.5010030282508976, "grad_norm": 1.1904703378677368, "learning_rate": 8.531025026066672e-05, "loss": 0.9539, "step": 78420 }, { "epoch": 0.5010669154006363, "grad_norm": 1.0105900764465332, "learning_rate": 8.530669752471142e-05, "loss": 0.7645, "step": 78430 }, { "epoch": 0.501130802550375, "grad_norm": 1.2654132843017578, "learning_rate": 8.530314443318714e-05, "loss": 0.8891, "step": 78440 }, { "epoch": 0.5011946897001137, "grad_norm": 1.8313031196594238, "learning_rate": 8.529959098612966e-05, "loss": 0.8518, "step": 78450 }, { "epoch": 0.5012585768498524, "grad_norm": 0.9657493829727173, "learning_rate": 8.529603718357476e-05, "loss": 0.9087, "step": 78460 }, { "epoch": 0.5013224639995911, "grad_norm": 0.9630830883979797, "learning_rate": 8.529248302555824e-05, "loss": 0.9349, "step": 78470 }, { "epoch": 0.5013863511493298, "grad_norm": 0.7544282674789429, "learning_rate": 8.528892851211587e-05, "loss": 0.811, "step": 78480 }, { "epoch": 0.5014502382990685, "grad_norm": 1.854946255683899, "learning_rate": 8.528537364328346e-05, "loss": 1.142, "step": 78490 }, { "epoch": 0.5015141254488072, "grad_norm": 0.9642276763916016, "learning_rate": 8.528181841909681e-05, "loss": 0.8864, "step": 78500 }, { "epoch": 0.501578012598546, "grad_norm": 0.8221122026443481, "learning_rate": 8.527826283959173e-05, "loss": 0.9846, "step": 78510 }, { "epoch": 0.5016418997482847, "grad_norm": 1.0337133407592773, "learning_rate": 8.527470690480403e-05, "loss": 0.7898, "step": 78520 }, { "epoch": 0.5017057868980234, "grad_norm": 0.7045915126800537, "learning_rate": 8.527115061476951e-05, "loss": 0.9587, "step": 78530 }, { "epoch": 0.5017696740477621, "grad_norm": 0.8976203799247742, "learning_rate": 8.526759396952398e-05, "loss": 0.815, "step": 78540 }, { "epoch": 0.5018335611975008, "grad_norm": 1.2948665618896484, "learning_rate": 8.526403696910326e-05, "loss": 1.1365, "step": 78550 }, { "epoch": 0.5018974483472395, "grad_norm": 0.5973215699195862, "learning_rate": 8.52604796135432e-05, "loss": 0.934, "step": 78560 }, { "epoch": 0.5019613354969782, "grad_norm": 0.8393608927726746, "learning_rate": 8.52569219028796e-05, "loss": 0.9389, "step": 78570 }, { "epoch": 0.5020252226467168, "grad_norm": 0.8553054332733154, "learning_rate": 8.525336383714831e-05, "loss": 0.9821, "step": 78580 }, { "epoch": 0.5020891097964555, "grad_norm": 0.43800783157348633, "learning_rate": 8.524980541638513e-05, "loss": 0.7432, "step": 78590 }, { "epoch": 0.5021529969461942, "grad_norm": 0.6436516046524048, "learning_rate": 8.524624664062591e-05, "loss": 0.8488, "step": 78600 }, { "epoch": 0.5022168840959329, "grad_norm": 1.9890680313110352, "learning_rate": 8.524268750990649e-05, "loss": 0.9869, "step": 78610 }, { "epoch": 0.5022807712456716, "grad_norm": 0.5732369422912598, "learning_rate": 8.523912802426274e-05, "loss": 0.8985, "step": 78620 }, { "epoch": 0.5023446583954103, "grad_norm": 2.491802453994751, "learning_rate": 8.523556818373047e-05, "loss": 0.7785, "step": 78630 }, { "epoch": 0.502408545545149, "grad_norm": 0.7027126550674438, "learning_rate": 8.523200798834555e-05, "loss": 0.9466, "step": 78640 }, { "epoch": 0.5024724326948877, "grad_norm": 0.7287322282791138, "learning_rate": 8.522844743814382e-05, "loss": 0.7692, "step": 78650 }, { "epoch": 0.5025363198446264, "grad_norm": 0.9365010857582092, "learning_rate": 8.522488653316117e-05, "loss": 0.7661, "step": 78660 }, { "epoch": 0.5026002069943651, "grad_norm": 0.5464925765991211, "learning_rate": 8.522132527343342e-05, "loss": 0.6622, "step": 78670 }, { "epoch": 0.5026640941441038, "grad_norm": 1.0046019554138184, "learning_rate": 8.521776365899645e-05, "loss": 0.8033, "step": 78680 }, { "epoch": 0.5027279812938426, "grad_norm": 1.4342055320739746, "learning_rate": 8.521420168988615e-05, "loss": 0.6578, "step": 78690 }, { "epoch": 0.5027918684435813, "grad_norm": 0.5104334950447083, "learning_rate": 8.521063936613835e-05, "loss": 0.8798, "step": 78700 }, { "epoch": 0.50285575559332, "grad_norm": 0.5231984853744507, "learning_rate": 8.520707668778897e-05, "loss": 0.709, "step": 78710 }, { "epoch": 0.5029196427430587, "grad_norm": 1.10532546043396, "learning_rate": 8.520351365487387e-05, "loss": 0.7458, "step": 78720 }, { "epoch": 0.5029835298927974, "grad_norm": 1.1135833263397217, "learning_rate": 8.519995026742892e-05, "loss": 0.779, "step": 78730 }, { "epoch": 0.5030474170425361, "grad_norm": 1.3996037244796753, "learning_rate": 8.519638652549003e-05, "loss": 0.8194, "step": 78740 }, { "epoch": 0.5031113041922748, "grad_norm": 0.8986942172050476, "learning_rate": 8.519282242909307e-05, "loss": 0.876, "step": 78750 }, { "epoch": 0.5031751913420135, "grad_norm": 1.100974202156067, "learning_rate": 8.518925797827394e-05, "loss": 0.9528, "step": 78760 }, { "epoch": 0.5032390784917522, "grad_norm": 0.801201581954956, "learning_rate": 8.518569317306855e-05, "loss": 1.0414, "step": 78770 }, { "epoch": 0.5033029656414909, "grad_norm": 0.7082595825195312, "learning_rate": 8.518212801351278e-05, "loss": 0.9245, "step": 78780 }, { "epoch": 0.5033668527912296, "grad_norm": 0.708473265171051, "learning_rate": 8.517856249964254e-05, "loss": 1.1068, "step": 78790 }, { "epoch": 0.5034307399409683, "grad_norm": 0.6335508823394775, "learning_rate": 8.517499663149376e-05, "loss": 0.6662, "step": 78800 }, { "epoch": 0.503494627090707, "grad_norm": 0.6749662756919861, "learning_rate": 8.517143040910231e-05, "loss": 0.7987, "step": 78810 }, { "epoch": 0.5035585142404457, "grad_norm": 0.8133144974708557, "learning_rate": 8.516786383250415e-05, "loss": 0.8016, "step": 78820 }, { "epoch": 0.5036224013901843, "grad_norm": 2.017829418182373, "learning_rate": 8.516429690173516e-05, "loss": 0.9251, "step": 78830 }, { "epoch": 0.503686288539923, "grad_norm": 0.5370079278945923, "learning_rate": 8.516072961683128e-05, "loss": 0.8484, "step": 78840 }, { "epoch": 0.5037501756896617, "grad_norm": 0.9369492530822754, "learning_rate": 8.515716197782845e-05, "loss": 0.7588, "step": 78850 }, { "epoch": 0.5038140628394004, "grad_norm": 0.9964790344238281, "learning_rate": 8.515359398476257e-05, "loss": 1.0391, "step": 78860 }, { "epoch": 0.5038779499891392, "grad_norm": 0.9717357158660889, "learning_rate": 8.51500256376696e-05, "loss": 0.8133, "step": 78870 }, { "epoch": 0.5039418371388779, "grad_norm": 1.1114850044250488, "learning_rate": 8.514645693658545e-05, "loss": 1.0353, "step": 78880 }, { "epoch": 0.5040057242886166, "grad_norm": 0.9141243100166321, "learning_rate": 8.514288788154607e-05, "loss": 1.0811, "step": 78890 }, { "epoch": 0.5040696114383553, "grad_norm": 0.6969479322433472, "learning_rate": 8.513931847258741e-05, "loss": 0.7896, "step": 78900 }, { "epoch": 0.504133498588094, "grad_norm": 0.8812980651855469, "learning_rate": 8.513574870974542e-05, "loss": 0.9231, "step": 78910 }, { "epoch": 0.5041973857378327, "grad_norm": 0.9980469942092896, "learning_rate": 8.513217859305604e-05, "loss": 0.8142, "step": 78920 }, { "epoch": 0.5042612728875714, "grad_norm": 2.051957130432129, "learning_rate": 8.512860812255523e-05, "loss": 0.9061, "step": 78930 }, { "epoch": 0.5043251600373101, "grad_norm": 1.683716058731079, "learning_rate": 8.512503729827894e-05, "loss": 0.8771, "step": 78940 }, { "epoch": 0.5043890471870488, "grad_norm": 0.7868318557739258, "learning_rate": 8.512146612026314e-05, "loss": 0.8051, "step": 78950 }, { "epoch": 0.5044529343367875, "grad_norm": 0.5927671790122986, "learning_rate": 8.511789458854379e-05, "loss": 0.8834, "step": 78960 }, { "epoch": 0.5045168214865262, "grad_norm": 1.6531774997711182, "learning_rate": 8.511432270315685e-05, "loss": 0.7847, "step": 78970 }, { "epoch": 0.5045807086362649, "grad_norm": 1.921579360961914, "learning_rate": 8.511075046413832e-05, "loss": 0.9616, "step": 78980 }, { "epoch": 0.5046445957860036, "grad_norm": 0.9210075736045837, "learning_rate": 8.510717787152416e-05, "loss": 0.6717, "step": 78990 }, { "epoch": 0.5047084829357423, "grad_norm": 0.7043361663818359, "learning_rate": 8.510360492535033e-05, "loss": 0.8636, "step": 79000 }, { "epoch": 0.504772370085481, "grad_norm": 0.8452950716018677, "learning_rate": 8.510003162565283e-05, "loss": 1.0588, "step": 79010 }, { "epoch": 0.5048362572352197, "grad_norm": 0.9324773550033569, "learning_rate": 8.509645797246766e-05, "loss": 0.8968, "step": 79020 }, { "epoch": 0.5049001443849584, "grad_norm": 0.7496733069419861, "learning_rate": 8.50928839658308e-05, "loss": 0.9523, "step": 79030 }, { "epoch": 0.5049640315346972, "grad_norm": 1.1798431873321533, "learning_rate": 8.508930960577821e-05, "loss": 0.9494, "step": 79040 }, { "epoch": 0.5050279186844359, "grad_norm": 0.9668488502502441, "learning_rate": 8.508573489234594e-05, "loss": 0.8869, "step": 79050 }, { "epoch": 0.5050918058341746, "grad_norm": 0.746605634689331, "learning_rate": 8.508215982556996e-05, "loss": 0.8892, "step": 79060 }, { "epoch": 0.5051556929839132, "grad_norm": 0.7922160029411316, "learning_rate": 8.507858440548628e-05, "loss": 0.772, "step": 79070 }, { "epoch": 0.5052195801336519, "grad_norm": 0.7023123502731323, "learning_rate": 8.50750086321309e-05, "loss": 1.062, "step": 79080 }, { "epoch": 0.5052834672833906, "grad_norm": 0.7416033148765564, "learning_rate": 8.507143250553985e-05, "loss": 0.8482, "step": 79090 }, { "epoch": 0.5053473544331293, "grad_norm": 0.6974393725395203, "learning_rate": 8.506785602574914e-05, "loss": 1.0322, "step": 79100 }, { "epoch": 0.505411241582868, "grad_norm": 1.0407123565673828, "learning_rate": 8.506427919279478e-05, "loss": 0.7803, "step": 79110 }, { "epoch": 0.5054751287326067, "grad_norm": 0.6520995497703552, "learning_rate": 8.506070200671277e-05, "loss": 1.2658, "step": 79120 }, { "epoch": 0.5055390158823454, "grad_norm": 1.1513316631317139, "learning_rate": 8.505712446753918e-05, "loss": 0.8079, "step": 79130 }, { "epoch": 0.5056029030320841, "grad_norm": 0.7983292937278748, "learning_rate": 8.505354657531001e-05, "loss": 1.3388, "step": 79140 }, { "epoch": 0.5056667901818228, "grad_norm": 0.6515194177627563, "learning_rate": 8.50499683300613e-05, "loss": 0.7375, "step": 79150 }, { "epoch": 0.5057306773315615, "grad_norm": 0.7249539494514465, "learning_rate": 8.504638973182908e-05, "loss": 0.8181, "step": 79160 }, { "epoch": 0.5057945644813002, "grad_norm": 1.1405197381973267, "learning_rate": 8.504281078064942e-05, "loss": 0.8314, "step": 79170 }, { "epoch": 0.5058584516310389, "grad_norm": 0.5889720320701599, "learning_rate": 8.503923147655832e-05, "loss": 0.9283, "step": 79180 }, { "epoch": 0.5059223387807776, "grad_norm": 1.013061761856079, "learning_rate": 8.503565181959185e-05, "loss": 0.7729, "step": 79190 }, { "epoch": 0.5059862259305163, "grad_norm": 1.1202266216278076, "learning_rate": 8.503207180978604e-05, "loss": 0.888, "step": 79200 }, { "epoch": 0.506050113080255, "grad_norm": 1.2008094787597656, "learning_rate": 8.502849144717698e-05, "loss": 0.8544, "step": 79210 }, { "epoch": 0.5061140002299938, "grad_norm": 0.7154238224029541, "learning_rate": 8.50249107318007e-05, "loss": 1.0849, "step": 79220 }, { "epoch": 0.5061778873797325, "grad_norm": 0.9151634573936462, "learning_rate": 8.502132966369327e-05, "loss": 0.8898, "step": 79230 }, { "epoch": 0.5062417745294712, "grad_norm": 0.9326740503311157, "learning_rate": 8.501774824289076e-05, "loss": 0.7975, "step": 79240 }, { "epoch": 0.5063056616792099, "grad_norm": 0.9655689001083374, "learning_rate": 8.501416646942922e-05, "loss": 0.8229, "step": 79250 }, { "epoch": 0.5063695488289486, "grad_norm": NaN, "learning_rate": 8.50109425718202e-05, "loss": 0.929, "step": 79260 }, { "epoch": 0.5064334359786873, "grad_norm": 1.2509207725524902, "learning_rate": 8.50073601284059e-05, "loss": 0.8922, "step": 79270 }, { "epoch": 0.506497323128426, "grad_norm": 0.7241592407226562, "learning_rate": 8.50037773324372e-05, "loss": 1.0239, "step": 79280 }, { "epoch": 0.5065612102781647, "grad_norm": 0.7398717999458313, "learning_rate": 8.500019418395019e-05, "loss": 1.0812, "step": 79290 }, { "epoch": 0.5066250974279034, "grad_norm": 1.2325761318206787, "learning_rate": 8.499661068298093e-05, "loss": 0.9234, "step": 79300 }, { "epoch": 0.506688984577642, "grad_norm": 0.7128446102142334, "learning_rate": 8.499302682956554e-05, "loss": 0.7636, "step": 79310 }, { "epoch": 0.5067528717273807, "grad_norm": 1.0909960269927979, "learning_rate": 8.498944262374009e-05, "loss": 0.9345, "step": 79320 }, { "epoch": 0.5068167588771194, "grad_norm": 0.8504812121391296, "learning_rate": 8.498585806554069e-05, "loss": 1.0587, "step": 79330 }, { "epoch": 0.5068806460268581, "grad_norm": 1.0179625749588013, "learning_rate": 8.498227315500343e-05, "loss": 0.8948, "step": 79340 }, { "epoch": 0.5069445331765968, "grad_norm": 0.7913358807563782, "learning_rate": 8.497868789216439e-05, "loss": 0.9132, "step": 79350 }, { "epoch": 0.5070084203263355, "grad_norm": 2.097581148147583, "learning_rate": 8.497510227705972e-05, "loss": 1.0746, "step": 79360 }, { "epoch": 0.5070723074760742, "grad_norm": 0.8437251448631287, "learning_rate": 8.497151630972552e-05, "loss": 0.8626, "step": 79370 }, { "epoch": 0.5071361946258129, "grad_norm": 1.6225666999816895, "learning_rate": 8.496792999019789e-05, "loss": 0.9101, "step": 79380 }, { "epoch": 0.5072000817755516, "grad_norm": 1.98760986328125, "learning_rate": 8.496434331851295e-05, "loss": 0.8182, "step": 79390 }, { "epoch": 0.5072639689252904, "grad_norm": 0.8181973099708557, "learning_rate": 8.496075629470683e-05, "loss": 0.7777, "step": 79400 }, { "epoch": 0.5073278560750291, "grad_norm": 0.9031455516815186, "learning_rate": 8.495716891881564e-05, "loss": 1.0561, "step": 79410 }, { "epoch": 0.5073917432247678, "grad_norm": 1.2834783792495728, "learning_rate": 8.495358119087553e-05, "loss": 0.8807, "step": 79420 }, { "epoch": 0.5074556303745065, "grad_norm": 0.5575640201568604, "learning_rate": 8.494999311092262e-05, "loss": 0.8329, "step": 79430 }, { "epoch": 0.5075195175242452, "grad_norm": 1.2049697637557983, "learning_rate": 8.494640467899303e-05, "loss": 1.0383, "step": 79440 }, { "epoch": 0.5075834046739839, "grad_norm": 1.0265311002731323, "learning_rate": 8.494281589512292e-05, "loss": 0.8573, "step": 79450 }, { "epoch": 0.5076472918237226, "grad_norm": 1.0250693559646606, "learning_rate": 8.493922675934842e-05, "loss": 1.0297, "step": 79460 }, { "epoch": 0.5077111789734613, "grad_norm": 0.6825410723686218, "learning_rate": 8.493563727170569e-05, "loss": 0.7719, "step": 79470 }, { "epoch": 0.5077750661232, "grad_norm": 0.7861701250076294, "learning_rate": 8.493204743223084e-05, "loss": 0.915, "step": 79480 }, { "epoch": 0.5078389532729387, "grad_norm": 0.7970221638679504, "learning_rate": 8.492845724096008e-05, "loss": 0.7341, "step": 79490 }, { "epoch": 0.5079028404226774, "grad_norm": 1.6295416355133057, "learning_rate": 8.492486669792955e-05, "loss": 0.7568, "step": 79500 }, { "epoch": 0.5079667275724161, "grad_norm": 1.207197666168213, "learning_rate": 8.492127580317536e-05, "loss": 0.7609, "step": 79510 }, { "epoch": 0.5080306147221548, "grad_norm": 0.6028062105178833, "learning_rate": 8.491768455673373e-05, "loss": 0.8507, "step": 79520 }, { "epoch": 0.5080945018718935, "grad_norm": 0.6845399737358093, "learning_rate": 8.49140929586408e-05, "loss": 0.8778, "step": 79530 }, { "epoch": 0.5081583890216322, "grad_norm": 0.9547748565673828, "learning_rate": 8.491050100893276e-05, "loss": 0.7729, "step": 79540 }, { "epoch": 0.508222276171371, "grad_norm": 1.033980369567871, "learning_rate": 8.490690870764577e-05, "loss": 0.928, "step": 79550 }, { "epoch": 0.5082861633211095, "grad_norm": 0.9669222831726074, "learning_rate": 8.490331605481602e-05, "loss": 0.9523, "step": 79560 }, { "epoch": 0.5083500504708482, "grad_norm": 0.8368834853172302, "learning_rate": 8.489972305047968e-05, "loss": 1.0998, "step": 79570 }, { "epoch": 0.508413937620587, "grad_norm": 0.8119040727615356, "learning_rate": 8.489612969467292e-05, "loss": 1.0353, "step": 79580 }, { "epoch": 0.5084778247703257, "grad_norm": 0.9374289512634277, "learning_rate": 8.489253598743195e-05, "loss": 1.2263, "step": 79590 }, { "epoch": 0.5085417119200644, "grad_norm": 0.6595514416694641, "learning_rate": 8.488894192879297e-05, "loss": 0.9092, "step": 79600 }, { "epoch": 0.5086055990698031, "grad_norm": 0.5380666851997375, "learning_rate": 8.488534751879213e-05, "loss": 1.0118, "step": 79610 }, { "epoch": 0.5086694862195418, "grad_norm": 0.6525367498397827, "learning_rate": 8.488175275746568e-05, "loss": 0.9371, "step": 79620 }, { "epoch": 0.5087333733692805, "grad_norm": 0.5488191246986389, "learning_rate": 8.487815764484981e-05, "loss": 0.8618, "step": 79630 }, { "epoch": 0.5087972605190192, "grad_norm": 0.7757022380828857, "learning_rate": 8.487456218098071e-05, "loss": 0.9421, "step": 79640 }, { "epoch": 0.5088611476687579, "grad_norm": 0.7398278117179871, "learning_rate": 8.48709663658946e-05, "loss": 1.1401, "step": 79650 }, { "epoch": 0.5089250348184966, "grad_norm": 1.6941704750061035, "learning_rate": 8.486737019962769e-05, "loss": 0.8624, "step": 79660 }, { "epoch": 0.5089889219682353, "grad_norm": 0.8483586311340332, "learning_rate": 8.486377368221621e-05, "loss": 0.8825, "step": 79670 }, { "epoch": 0.509052809117974, "grad_norm": 0.8225073218345642, "learning_rate": 8.486017681369636e-05, "loss": 0.7361, "step": 79680 }, { "epoch": 0.5091166962677127, "grad_norm": 0.8197336196899414, "learning_rate": 8.485657959410436e-05, "loss": 0.9902, "step": 79690 }, { "epoch": 0.5091805834174514, "grad_norm": 0.6956250667572021, "learning_rate": 8.485298202347646e-05, "loss": 0.9947, "step": 79700 }, { "epoch": 0.5092444705671901, "grad_norm": 1.1216806173324585, "learning_rate": 8.484938410184888e-05, "loss": 0.7103, "step": 79710 }, { "epoch": 0.5093083577169288, "grad_norm": 1.101396083831787, "learning_rate": 8.484578582925784e-05, "loss": 0.7783, "step": 79720 }, { "epoch": 0.5093722448666675, "grad_norm": 1.2090519666671753, "learning_rate": 8.48421872057396e-05, "loss": 0.7809, "step": 79730 }, { "epoch": 0.5094361320164063, "grad_norm": 0.9379667043685913, "learning_rate": 8.48385882313304e-05, "loss": 1.0095, "step": 79740 }, { "epoch": 0.509500019166145, "grad_norm": 0.6880574822425842, "learning_rate": 8.483498890606647e-05, "loss": 0.7678, "step": 79750 }, { "epoch": 0.5095639063158837, "grad_norm": 0.9663302898406982, "learning_rate": 8.483138922998406e-05, "loss": 0.9895, "step": 79760 }, { "epoch": 0.5096277934656224, "grad_norm": 2.0903241634368896, "learning_rate": 8.482778920311942e-05, "loss": 0.9586, "step": 79770 }, { "epoch": 0.5096916806153611, "grad_norm": 0.723540723323822, "learning_rate": 8.482418882550882e-05, "loss": 0.7927, "step": 79780 }, { "epoch": 0.5097555677650998, "grad_norm": 0.6735635995864868, "learning_rate": 8.482058809718852e-05, "loss": 0.733, "step": 79790 }, { "epoch": 0.5098194549148384, "grad_norm": 0.7494048476219177, "learning_rate": 8.481698701819476e-05, "loss": 0.7265, "step": 79800 }, { "epoch": 0.5098833420645771, "grad_norm": 1.2474843263626099, "learning_rate": 8.481338558856383e-05, "loss": 0.9442, "step": 79810 }, { "epoch": 0.5099472292143158, "grad_norm": 1.0671770572662354, "learning_rate": 8.4809783808332e-05, "loss": 0.6449, "step": 79820 }, { "epoch": 0.5100111163640545, "grad_norm": 1.693997859954834, "learning_rate": 8.480618167753551e-05, "loss": 0.9382, "step": 79830 }, { "epoch": 0.5100750035137932, "grad_norm": 0.8211742639541626, "learning_rate": 8.480257919621067e-05, "loss": 0.8989, "step": 79840 }, { "epoch": 0.5101388906635319, "grad_norm": 0.6184179186820984, "learning_rate": 8.479897636439375e-05, "loss": 0.9222, "step": 79850 }, { "epoch": 0.5102027778132706, "grad_norm": 0.9833461046218872, "learning_rate": 8.479537318212103e-05, "loss": 0.8038, "step": 79860 }, { "epoch": 0.5102666649630093, "grad_norm": 1.01847243309021, "learning_rate": 8.479176964942879e-05, "loss": 1.0515, "step": 79870 }, { "epoch": 0.510330552112748, "grad_norm": 1.1789108514785767, "learning_rate": 8.478816576635334e-05, "loss": 0.7673, "step": 79880 }, { "epoch": 0.5103944392624867, "grad_norm": 1.949750542640686, "learning_rate": 8.478456153293096e-05, "loss": 0.8108, "step": 79890 }, { "epoch": 0.5104583264122254, "grad_norm": 1.040195345878601, "learning_rate": 8.478095694919797e-05, "loss": 0.9984, "step": 79900 }, { "epoch": 0.5105222135619641, "grad_norm": 0.8911735415458679, "learning_rate": 8.477735201519063e-05, "loss": 0.9404, "step": 79910 }, { "epoch": 0.5105861007117029, "grad_norm": 0.8057443499565125, "learning_rate": 8.477374673094526e-05, "loss": 0.638, "step": 79920 }, { "epoch": 0.5106499878614416, "grad_norm": 0.5853357911109924, "learning_rate": 8.477014109649822e-05, "loss": 0.8098, "step": 79930 }, { "epoch": 0.5107138750111803, "grad_norm": 0.8207983374595642, "learning_rate": 8.476653511188575e-05, "loss": 0.9, "step": 79940 }, { "epoch": 0.510777762160919, "grad_norm": 0.6729571223258972, "learning_rate": 8.47629287771442e-05, "loss": 0.8749, "step": 79950 }, { "epoch": 0.5108416493106577, "grad_norm": 0.5826616287231445, "learning_rate": 8.475932209230987e-05, "loss": 0.8363, "step": 79960 }, { "epoch": 0.5109055364603964, "grad_norm": 0.5043898224830627, "learning_rate": 8.475571505741912e-05, "loss": 0.7508, "step": 79970 }, { "epoch": 0.5109694236101351, "grad_norm": 0.9225212931632996, "learning_rate": 8.475210767250823e-05, "loss": 0.9501, "step": 79980 }, { "epoch": 0.5110333107598738, "grad_norm": 1.0718021392822266, "learning_rate": 8.474849993761357e-05, "loss": 0.8453, "step": 79990 }, { "epoch": 0.5110971979096125, "grad_norm": 0.7842211127281189, "learning_rate": 8.474489185277143e-05, "loss": 1.3727, "step": 80000 }, { "epoch": 0.5111610850593512, "grad_norm": 0.8479704260826111, "learning_rate": 8.474128341801819e-05, "loss": 1.1579, "step": 80010 }, { "epoch": 0.5112249722090899, "grad_norm": 0.736724317073822, "learning_rate": 8.473767463339018e-05, "loss": 0.8251, "step": 80020 }, { "epoch": 0.5112888593588286, "grad_norm": 0.6635915040969849, "learning_rate": 8.47340654989237e-05, "loss": 1.2262, "step": 80030 }, { "epoch": 0.5113527465085672, "grad_norm": 0.818091869354248, "learning_rate": 8.473045601465515e-05, "loss": 0.9825, "step": 80040 }, { "epoch": 0.5114166336583059, "grad_norm": 0.8209525346755981, "learning_rate": 8.472684618062085e-05, "loss": 1.1992, "step": 80050 }, { "epoch": 0.5114805208080446, "grad_norm": 0.6535345911979675, "learning_rate": 8.472323599685718e-05, "loss": 0.6442, "step": 80060 }, { "epoch": 0.5115444079577833, "grad_norm": 1.321568489074707, "learning_rate": 8.471962546340049e-05, "loss": 1.0123, "step": 80070 }, { "epoch": 0.511608295107522, "grad_norm": 1.0992311239242554, "learning_rate": 8.471601458028713e-05, "loss": 0.8926, "step": 80080 }, { "epoch": 0.5116721822572607, "grad_norm": 0.987280547618866, "learning_rate": 8.471240334755346e-05, "loss": 0.943, "step": 80090 }, { "epoch": 0.5117360694069994, "grad_norm": 0.7247947454452515, "learning_rate": 8.470879176523586e-05, "loss": 0.8531, "step": 80100 }, { "epoch": 0.5117999565567382, "grad_norm": 0.8526644706726074, "learning_rate": 8.470517983337071e-05, "loss": 0.7333, "step": 80110 }, { "epoch": 0.5118638437064769, "grad_norm": 1.081724762916565, "learning_rate": 8.470156755199436e-05, "loss": 0.9023, "step": 80120 }, { "epoch": 0.5119277308562156, "grad_norm": 0.9575611352920532, "learning_rate": 8.469795492114321e-05, "loss": 0.7949, "step": 80130 }, { "epoch": 0.5119916180059543, "grad_norm": 0.6004752516746521, "learning_rate": 8.469434194085364e-05, "loss": 1.0179, "step": 80140 }, { "epoch": 0.512055505155693, "grad_norm": 0.7859931588172913, "learning_rate": 8.469072861116202e-05, "loss": 0.9604, "step": 80150 }, { "epoch": 0.5121193923054317, "grad_norm": 0.9513803124427795, "learning_rate": 8.468711493210476e-05, "loss": 0.8357, "step": 80160 }, { "epoch": 0.5121832794551704, "grad_norm": 0.8474782109260559, "learning_rate": 8.468350090371825e-05, "loss": 0.9121, "step": 80170 }, { "epoch": 0.5122471666049091, "grad_norm": 0.49391424655914307, "learning_rate": 8.467988652603887e-05, "loss": 1.0967, "step": 80180 }, { "epoch": 0.5123110537546478, "grad_norm": 0.6341314911842346, "learning_rate": 8.467627179910304e-05, "loss": 1.1882, "step": 80190 }, { "epoch": 0.5123749409043865, "grad_norm": 0.5973122119903564, "learning_rate": 8.467265672294715e-05, "loss": 1.372, "step": 80200 }, { "epoch": 0.5124388280541252, "grad_norm": 1.0197665691375732, "learning_rate": 8.46690412976076e-05, "loss": 0.6034, "step": 80210 }, { "epoch": 0.5125027152038639, "grad_norm": 1.1325267553329468, "learning_rate": 8.466542552312083e-05, "loss": 0.9992, "step": 80220 }, { "epoch": 0.5125666023536026, "grad_norm": 1.2969529628753662, "learning_rate": 8.466180939952322e-05, "loss": 0.9412, "step": 80230 }, { "epoch": 0.5126304895033413, "grad_norm": 0.804654598236084, "learning_rate": 8.465819292685121e-05, "loss": 0.9241, "step": 80240 }, { "epoch": 0.51269437665308, "grad_norm": 0.5683889985084534, "learning_rate": 8.465457610514122e-05, "loss": 0.9131, "step": 80250 }, { "epoch": 0.5127582638028187, "grad_norm": 1.4431538581848145, "learning_rate": 8.465095893442965e-05, "loss": 0.8802, "step": 80260 }, { "epoch": 0.5128221509525575, "grad_norm": 0.7495303750038147, "learning_rate": 8.464734141475296e-05, "loss": 0.7763, "step": 80270 }, { "epoch": 0.512886038102296, "grad_norm": 1.0469660758972168, "learning_rate": 8.464372354614755e-05, "loss": 0.7827, "step": 80280 }, { "epoch": 0.5129499252520348, "grad_norm": 0.8818047046661377, "learning_rate": 8.46401053286499e-05, "loss": 0.7446, "step": 80290 }, { "epoch": 0.5130138124017735, "grad_norm": 0.610306441783905, "learning_rate": 8.463648676229641e-05, "loss": 0.9616, "step": 80300 }, { "epoch": 0.5130776995515122, "grad_norm": 1.0561434030532837, "learning_rate": 8.463286784712352e-05, "loss": 0.9341, "step": 80310 }, { "epoch": 0.5131415867012509, "grad_norm": 1.1245967149734497, "learning_rate": 8.46292485831677e-05, "loss": 1.0593, "step": 80320 }, { "epoch": 0.5132054738509896, "grad_norm": 0.8336319327354431, "learning_rate": 8.462562897046539e-05, "loss": 0.9832, "step": 80330 }, { "epoch": 0.5132693610007283, "grad_norm": 1.2860108613967896, "learning_rate": 8.462200900905304e-05, "loss": 1.2113, "step": 80340 }, { "epoch": 0.513333248150467, "grad_norm": 0.6594120860099792, "learning_rate": 8.46183886989671e-05, "loss": 1.0788, "step": 80350 }, { "epoch": 0.5133971353002057, "grad_norm": 1.1738802194595337, "learning_rate": 8.461476804024405e-05, "loss": 1.1394, "step": 80360 }, { "epoch": 0.5134610224499444, "grad_norm": 0.8349171280860901, "learning_rate": 8.461114703292032e-05, "loss": 0.9976, "step": 80370 }, { "epoch": 0.5135249095996831, "grad_norm": 0.9331271648406982, "learning_rate": 8.460752567703242e-05, "loss": 0.8871, "step": 80380 }, { "epoch": 0.5135887967494218, "grad_norm": 1.0842266082763672, "learning_rate": 8.460390397261679e-05, "loss": 0.8379, "step": 80390 }, { "epoch": 0.5136526838991605, "grad_norm": 0.8020588159561157, "learning_rate": 8.46002819197099e-05, "loss": 0.7943, "step": 80400 }, { "epoch": 0.5137165710488992, "grad_norm": 1.0201034545898438, "learning_rate": 8.459665951834825e-05, "loss": 0.8956, "step": 80410 }, { "epoch": 0.5137804581986379, "grad_norm": 0.710241436958313, "learning_rate": 8.459303676856829e-05, "loss": 1.1422, "step": 80420 }, { "epoch": 0.5138443453483766, "grad_norm": 1.129925012588501, "learning_rate": 8.458941367040654e-05, "loss": 1.0028, "step": 80430 }, { "epoch": 0.5139082324981153, "grad_norm": 0.9500714540481567, "learning_rate": 8.458579022389946e-05, "loss": 0.9935, "step": 80440 }, { "epoch": 0.513972119647854, "grad_norm": 1.4013770818710327, "learning_rate": 8.458216642908357e-05, "loss": 1.1331, "step": 80450 }, { "epoch": 0.5140360067975928, "grad_norm": 1.6361690759658813, "learning_rate": 8.457854228599533e-05, "loss": 0.8196, "step": 80460 }, { "epoch": 0.5140998939473315, "grad_norm": 1.773687481880188, "learning_rate": 8.457491779467124e-05, "loss": 0.6577, "step": 80470 }, { "epoch": 0.5141637810970702, "grad_norm": 1.1989527940750122, "learning_rate": 8.457129295514785e-05, "loss": 0.9754, "step": 80480 }, { "epoch": 0.5142276682468089, "grad_norm": 1.0061672925949097, "learning_rate": 8.456766776746161e-05, "loss": 0.7289, "step": 80490 }, { "epoch": 0.5142915553965476, "grad_norm": 0.5245055556297302, "learning_rate": 8.456404223164906e-05, "loss": 0.8355, "step": 80500 }, { "epoch": 0.5143554425462863, "grad_norm": 0.9344064593315125, "learning_rate": 8.45604163477467e-05, "loss": 0.9097, "step": 80510 }, { "epoch": 0.514419329696025, "grad_norm": 0.8581297993659973, "learning_rate": 8.455679011579104e-05, "loss": 0.6422, "step": 80520 }, { "epoch": 0.5144832168457636, "grad_norm": 1.0841580629348755, "learning_rate": 8.455316353581861e-05, "loss": 1.1547, "step": 80530 }, { "epoch": 0.5145471039955023, "grad_norm": 0.8380923867225647, "learning_rate": 8.454953660786594e-05, "loss": 1.1443, "step": 80540 }, { "epoch": 0.514610991145241, "grad_norm": 1.0253181457519531, "learning_rate": 8.454590933196953e-05, "loss": 1.0363, "step": 80550 }, { "epoch": 0.5146748782949797, "grad_norm": 0.8039796948432922, "learning_rate": 8.454228170816594e-05, "loss": 0.9947, "step": 80560 }, { "epoch": 0.5147387654447184, "grad_norm": 0.6157310605049133, "learning_rate": 8.453865373649168e-05, "loss": 0.8205, "step": 80570 }, { "epoch": 0.5148026525944571, "grad_norm": 1.2950266599655151, "learning_rate": 8.45350254169833e-05, "loss": 0.8778, "step": 80580 }, { "epoch": 0.5148665397441958, "grad_norm": 0.5074208974838257, "learning_rate": 8.453139674967735e-05, "loss": 0.9512, "step": 80590 }, { "epoch": 0.5149304268939345, "grad_norm": 0.974296510219574, "learning_rate": 8.452776773461035e-05, "loss": 0.7415, "step": 80600 }, { "epoch": 0.5149943140436732, "grad_norm": 0.8522329926490784, "learning_rate": 8.452413837181886e-05, "loss": 0.7619, "step": 80610 }, { "epoch": 0.515058201193412, "grad_norm": 0.7677290439605713, "learning_rate": 8.452050866133943e-05, "loss": 0.7501, "step": 80620 }, { "epoch": 0.5151220883431507, "grad_norm": 0.7231885194778442, "learning_rate": 8.451687860320862e-05, "loss": 0.8417, "step": 80630 }, { "epoch": 0.5151859754928894, "grad_norm": 1.0473037958145142, "learning_rate": 8.451324819746297e-05, "loss": 0.7961, "step": 80640 }, { "epoch": 0.5152498626426281, "grad_norm": 1.339667558670044, "learning_rate": 8.450961744413906e-05, "loss": 0.6476, "step": 80650 }, { "epoch": 0.5153137497923668, "grad_norm": 1.0341308116912842, "learning_rate": 8.450598634327342e-05, "loss": 0.7599, "step": 80660 }, { "epoch": 0.5153776369421055, "grad_norm": 0.5190713405609131, "learning_rate": 8.450235489490268e-05, "loss": 1.0512, "step": 80670 }, { "epoch": 0.5154415240918442, "grad_norm": 0.6918653845787048, "learning_rate": 8.449872309906338e-05, "loss": 0.8157, "step": 80680 }, { "epoch": 0.5155054112415829, "grad_norm": 0.6024577617645264, "learning_rate": 8.449509095579206e-05, "loss": 0.9064, "step": 80690 }, { "epoch": 0.5155692983913216, "grad_norm": 0.7624403238296509, "learning_rate": 8.449145846512536e-05, "loss": 0.7265, "step": 80700 }, { "epoch": 0.5156331855410603, "grad_norm": 1.1898252964019775, "learning_rate": 8.448782562709983e-05, "loss": 0.9, "step": 80710 }, { "epoch": 0.515697072690799, "grad_norm": 1.169190526008606, "learning_rate": 8.448419244175205e-05, "loss": 0.9871, "step": 80720 }, { "epoch": 0.5157609598405377, "grad_norm": 0.9798734188079834, "learning_rate": 8.448055890911863e-05, "loss": 0.9437, "step": 80730 }, { "epoch": 0.5158248469902764, "grad_norm": 0.6357259154319763, "learning_rate": 8.447692502923615e-05, "loss": 0.8561, "step": 80740 }, { "epoch": 0.5158887341400151, "grad_norm": 0.7946950793266296, "learning_rate": 8.447329080214119e-05, "loss": 0.9226, "step": 80750 }, { "epoch": 0.5159526212897538, "grad_norm": 1.21712064743042, "learning_rate": 8.446965622787038e-05, "loss": 0.7975, "step": 80760 }, { "epoch": 0.5160165084394924, "grad_norm": 0.6995162963867188, "learning_rate": 8.446602130646031e-05, "loss": 0.7762, "step": 80770 }, { "epoch": 0.5160803955892311, "grad_norm": 0.9936223030090332, "learning_rate": 8.44623860379476e-05, "loss": 0.8434, "step": 80780 }, { "epoch": 0.5161442827389698, "grad_norm": 0.8816124796867371, "learning_rate": 8.445875042236884e-05, "loss": 0.9124, "step": 80790 }, { "epoch": 0.5162081698887085, "grad_norm": 3.8590049743652344, "learning_rate": 8.445511445976064e-05, "loss": 0.9158, "step": 80800 }, { "epoch": 0.5162720570384473, "grad_norm": 0.8542178273200989, "learning_rate": 8.445147815015964e-05, "loss": 0.7654, "step": 80810 }, { "epoch": 0.516335944188186, "grad_norm": 1.375125527381897, "learning_rate": 8.444784149360245e-05, "loss": 0.8894, "step": 80820 }, { "epoch": 0.5163998313379247, "grad_norm": 0.7835062742233276, "learning_rate": 8.444420449012569e-05, "loss": 0.6714, "step": 80830 }, { "epoch": 0.5164637184876634, "grad_norm": 0.8339881896972656, "learning_rate": 8.4440567139766e-05, "loss": 0.9002, "step": 80840 }, { "epoch": 0.5165276056374021, "grad_norm": 0.8910601139068604, "learning_rate": 8.443692944256001e-05, "loss": 0.9653, "step": 80850 }, { "epoch": 0.5165914927871408, "grad_norm": 0.9393212795257568, "learning_rate": 8.443329139854433e-05, "loss": 0.9248, "step": 80860 }, { "epoch": 0.5166553799368795, "grad_norm": 0.8954446315765381, "learning_rate": 8.442965300775563e-05, "loss": 0.8944, "step": 80870 }, { "epoch": 0.5167192670866182, "grad_norm": 1.6217565536499023, "learning_rate": 8.442601427023054e-05, "loss": 0.9823, "step": 80880 }, { "epoch": 0.5167831542363569, "grad_norm": 0.6830261945724487, "learning_rate": 8.442237518600569e-05, "loss": 0.8189, "step": 80890 }, { "epoch": 0.5168470413860956, "grad_norm": 0.788052499294281, "learning_rate": 8.441873575511775e-05, "loss": 0.897, "step": 80900 }, { "epoch": 0.5169109285358343, "grad_norm": 0.8000684380531311, "learning_rate": 8.441509597760336e-05, "loss": 0.902, "step": 80910 }, { "epoch": 0.516974815685573, "grad_norm": 2.1170742511749268, "learning_rate": 8.441145585349918e-05, "loss": 0.7763, "step": 80920 }, { "epoch": 0.5170387028353117, "grad_norm": 0.7653173208236694, "learning_rate": 8.440781538284189e-05, "loss": 0.8674, "step": 80930 }, { "epoch": 0.5171025899850504, "grad_norm": 0.8054555654525757, "learning_rate": 8.44041745656681e-05, "loss": 1.0094, "step": 80940 }, { "epoch": 0.5171664771347891, "grad_norm": 0.9257411956787109, "learning_rate": 8.440053340201454e-05, "loss": 0.698, "step": 80950 }, { "epoch": 0.5172303642845278, "grad_norm": 0.7227391600608826, "learning_rate": 8.439689189191783e-05, "loss": 1.0385, "step": 80960 }, { "epoch": 0.5172942514342665, "grad_norm": 0.873188853263855, "learning_rate": 8.439325003541466e-05, "loss": 0.8549, "step": 80970 }, { "epoch": 0.5173581385840053, "grad_norm": 2.3998446464538574, "learning_rate": 8.438960783254171e-05, "loss": 1.0805, "step": 80980 }, { "epoch": 0.517422025733744, "grad_norm": 0.8006882071495056, "learning_rate": 8.438596528333567e-05, "loss": 1.0806, "step": 80990 }, { "epoch": 0.5174859128834827, "grad_norm": 0.723087728023529, "learning_rate": 8.438232238783319e-05, "loss": 0.9784, "step": 81000 }, { "epoch": 0.5175498000332213, "grad_norm": 0.805282473564148, "learning_rate": 8.437867914607099e-05, "loss": 0.9561, "step": 81010 }, { "epoch": 0.51761368718296, "grad_norm": 0.7623560428619385, "learning_rate": 8.437503555808575e-05, "loss": 0.9467, "step": 81020 }, { "epoch": 0.5176775743326987, "grad_norm": 0.8528174161911011, "learning_rate": 8.437139162391416e-05, "loss": 0.8532, "step": 81030 }, { "epoch": 0.5177414614824374, "grad_norm": 0.9997398257255554, "learning_rate": 8.436774734359292e-05, "loss": 0.9379, "step": 81040 }, { "epoch": 0.5178053486321761, "grad_norm": 0.682331383228302, "learning_rate": 8.436410271715873e-05, "loss": 0.9537, "step": 81050 }, { "epoch": 0.5178692357819148, "grad_norm": 0.829108476638794, "learning_rate": 8.436045774464831e-05, "loss": 0.6795, "step": 81060 }, { "epoch": 0.5179331229316535, "grad_norm": 0.5814771056175232, "learning_rate": 8.435681242609834e-05, "loss": 1.0538, "step": 81070 }, { "epoch": 0.5179970100813922, "grad_norm": 0.9756491184234619, "learning_rate": 8.435316676154557e-05, "loss": 0.7018, "step": 81080 }, { "epoch": 0.5180608972311309, "grad_norm": 0.49684908986091614, "learning_rate": 8.434952075102665e-05, "loss": 0.7389, "step": 81090 }, { "epoch": 0.5181247843808696, "grad_norm": 0.7988196015357971, "learning_rate": 8.434587439457837e-05, "loss": 0.9728, "step": 81100 }, { "epoch": 0.5181886715306083, "grad_norm": 0.7685717344284058, "learning_rate": 8.43422276922374e-05, "loss": 1.1467, "step": 81110 }, { "epoch": 0.518252558680347, "grad_norm": 1.4645850658416748, "learning_rate": 8.433858064404052e-05, "loss": 0.9914, "step": 81120 }, { "epoch": 0.5183164458300857, "grad_norm": 1.1191790103912354, "learning_rate": 8.433493325002439e-05, "loss": 1.0248, "step": 81130 }, { "epoch": 0.5183803329798244, "grad_norm": 1.087517499923706, "learning_rate": 8.43312855102258e-05, "loss": 0.8725, "step": 81140 }, { "epoch": 0.5184442201295631, "grad_norm": 0.9224085807800293, "learning_rate": 8.432763742468146e-05, "loss": 1.0876, "step": 81150 }, { "epoch": 0.5185081072793019, "grad_norm": 1.3059546947479248, "learning_rate": 8.432398899342811e-05, "loss": 1.003, "step": 81160 }, { "epoch": 0.5185719944290406, "grad_norm": 1.533253788948059, "learning_rate": 8.43203402165025e-05, "loss": 0.8566, "step": 81170 }, { "epoch": 0.5186358815787793, "grad_norm": 0.714002251625061, "learning_rate": 8.431669109394138e-05, "loss": 0.7685, "step": 81180 }, { "epoch": 0.518699768728518, "grad_norm": 0.8544260859489441, "learning_rate": 8.431304162578148e-05, "loss": 0.8297, "step": 81190 }, { "epoch": 0.5187636558782567, "grad_norm": 0.5248509049415588, "learning_rate": 8.430939181205957e-05, "loss": 1.0113, "step": 81200 }, { "epoch": 0.5188275430279954, "grad_norm": 0.8331204056739807, "learning_rate": 8.430574165281239e-05, "loss": 0.8771, "step": 81210 }, { "epoch": 0.5188914301777341, "grad_norm": 0.9952676296234131, "learning_rate": 8.430209114807675e-05, "loss": 0.9538, "step": 81220 }, { "epoch": 0.5189553173274728, "grad_norm": 5.568673133850098, "learning_rate": 8.429844029788933e-05, "loss": 1.1575, "step": 81230 }, { "epoch": 0.5190192044772115, "grad_norm": 0.9033558964729309, "learning_rate": 8.429478910228697e-05, "loss": 1.1018, "step": 81240 }, { "epoch": 0.5190830916269502, "grad_norm": 0.795211911201477, "learning_rate": 8.42911375613064e-05, "loss": 0.8717, "step": 81250 }, { "epoch": 0.5191469787766888, "grad_norm": 0.7301700115203857, "learning_rate": 8.428748567498443e-05, "loss": 0.8484, "step": 81260 }, { "epoch": 0.5192108659264275, "grad_norm": 0.9231024384498596, "learning_rate": 8.428383344335779e-05, "loss": 0.8684, "step": 81270 }, { "epoch": 0.5192747530761662, "grad_norm": 0.681303083896637, "learning_rate": 8.428018086646333e-05, "loss": 0.8345, "step": 81280 }, { "epoch": 0.5193386402259049, "grad_norm": 1.12642502784729, "learning_rate": 8.427652794433776e-05, "loss": 0.8428, "step": 81290 }, { "epoch": 0.5194025273756436, "grad_norm": 0.9355636239051819, "learning_rate": 8.42728746770179e-05, "loss": 0.7608, "step": 81300 }, { "epoch": 0.5194664145253823, "grad_norm": 1.2537355422973633, "learning_rate": 8.426922106454054e-05, "loss": 0.8054, "step": 81310 }, { "epoch": 0.519530301675121, "grad_norm": 0.8078314065933228, "learning_rate": 8.42655671069425e-05, "loss": 0.9239, "step": 81320 }, { "epoch": 0.5195941888248597, "grad_norm": 1.214921236038208, "learning_rate": 8.426191280426052e-05, "loss": 0.8623, "step": 81330 }, { "epoch": 0.5196580759745985, "grad_norm": 1.2319025993347168, "learning_rate": 8.425825815653145e-05, "loss": 0.8355, "step": 81340 }, { "epoch": 0.5197219631243372, "grad_norm": 0.8140376806259155, "learning_rate": 8.42546031637921e-05, "loss": 0.9517, "step": 81350 }, { "epoch": 0.5197858502740759, "grad_norm": 1.0668420791625977, "learning_rate": 8.425094782607925e-05, "loss": 0.8228, "step": 81360 }, { "epoch": 0.5198497374238146, "grad_norm": 0.855273962020874, "learning_rate": 8.424729214342972e-05, "loss": 0.8945, "step": 81370 }, { "epoch": 0.5199136245735533, "grad_norm": 1.8131141662597656, "learning_rate": 8.424363611588033e-05, "loss": 0.9209, "step": 81380 }, { "epoch": 0.519977511723292, "grad_norm": 0.7118239402770996, "learning_rate": 8.42399797434679e-05, "loss": 1.0615, "step": 81390 }, { "epoch": 0.5200413988730307, "grad_norm": 0.7992277145385742, "learning_rate": 8.423632302622926e-05, "loss": 0.8819, "step": 81400 }, { "epoch": 0.5201052860227694, "grad_norm": 0.9642464518547058, "learning_rate": 8.423266596420123e-05, "loss": 0.8717, "step": 81410 }, { "epoch": 0.5201691731725081, "grad_norm": 0.8006801605224609, "learning_rate": 8.422900855742062e-05, "loss": 0.8487, "step": 81420 }, { "epoch": 0.5202330603222468, "grad_norm": 0.7330396771430969, "learning_rate": 8.422535080592431e-05, "loss": 0.9406, "step": 81430 }, { "epoch": 0.5202969474719855, "grad_norm": 0.8267525434494019, "learning_rate": 8.422169270974909e-05, "loss": 0.8308, "step": 81440 }, { "epoch": 0.5203608346217242, "grad_norm": 1.518169641494751, "learning_rate": 8.421803426893182e-05, "loss": 0.9029, "step": 81450 }, { "epoch": 0.5204247217714629, "grad_norm": 1.128998041152954, "learning_rate": 8.421437548350935e-05, "loss": 0.8468, "step": 81460 }, { "epoch": 0.5204886089212016, "grad_norm": 1.051698088645935, "learning_rate": 8.42107163535185e-05, "loss": 1.12, "step": 81470 }, { "epoch": 0.5205524960709403, "grad_norm": 0.7762027382850647, "learning_rate": 8.420705687899616e-05, "loss": 0.6149, "step": 81480 }, { "epoch": 0.520616383220679, "grad_norm": 1.0097482204437256, "learning_rate": 8.420339705997915e-05, "loss": 0.7171, "step": 81490 }, { "epoch": 0.5206802703704176, "grad_norm": 1.062819242477417, "learning_rate": 8.419973689650436e-05, "loss": 0.9634, "step": 81500 }, { "epoch": 0.5207441575201563, "grad_norm": 0.9354637861251831, "learning_rate": 8.41960763886086e-05, "loss": 0.8755, "step": 81510 }, { "epoch": 0.520808044669895, "grad_norm": 0.5439125299453735, "learning_rate": 8.41924155363288e-05, "loss": 0.9437, "step": 81520 }, { "epoch": 0.5208719318196338, "grad_norm": 0.7927828431129456, "learning_rate": 8.418875433970177e-05, "loss": 0.9138, "step": 81530 }, { "epoch": 0.5209358189693725, "grad_norm": 1.90019953250885, "learning_rate": 8.418509279876444e-05, "loss": 1.1347, "step": 81540 }, { "epoch": 0.5209997061191112, "grad_norm": 1.1833665370941162, "learning_rate": 8.418179711756595e-05, "loss": 1.0368, "step": 81550 }, { "epoch": 0.5210635932688499, "grad_norm": 0.6297504305839539, "learning_rate": 8.417813492254057e-05, "loss": 0.7755, "step": 81560 }, { "epoch": 0.5211274804185886, "grad_norm": 1.1400600671768188, "learning_rate": 8.417447238331177e-05, "loss": 1.1425, "step": 81570 }, { "epoch": 0.5211913675683273, "grad_norm": 0.9770885705947876, "learning_rate": 8.41708094999165e-05, "loss": 0.8823, "step": 81580 }, { "epoch": 0.521255254718066, "grad_norm": 1.117124080657959, "learning_rate": 8.41671462723916e-05, "loss": 1.0206, "step": 81590 }, { "epoch": 0.5213191418678047, "grad_norm": 0.7140239477157593, "learning_rate": 8.416348270077399e-05, "loss": 0.9016, "step": 81600 }, { "epoch": 0.5213830290175434, "grad_norm": 0.8254780769348145, "learning_rate": 8.415981878510054e-05, "loss": 0.9917, "step": 81610 }, { "epoch": 0.5214469161672821, "grad_norm": 0.7922462821006775, "learning_rate": 8.415615452540817e-05, "loss": 0.7269, "step": 81620 }, { "epoch": 0.5215108033170208, "grad_norm": 0.5598194003105164, "learning_rate": 8.415248992173377e-05, "loss": 0.7201, "step": 81630 }, { "epoch": 0.5215746904667595, "grad_norm": 1.1841058731079102, "learning_rate": 8.414882497411424e-05, "loss": 0.9925, "step": 81640 }, { "epoch": 0.5216385776164982, "grad_norm": 1.0316505432128906, "learning_rate": 8.414515968258653e-05, "loss": 0.7948, "step": 81650 }, { "epoch": 0.5217024647662369, "grad_norm": 1.1518917083740234, "learning_rate": 8.41414940471875e-05, "loss": 1.0671, "step": 81660 }, { "epoch": 0.5217663519159756, "grad_norm": 1.1492241621017456, "learning_rate": 8.413782806795409e-05, "loss": 0.9031, "step": 81670 }, { "epoch": 0.5218302390657144, "grad_norm": 1.136619210243225, "learning_rate": 8.413416174492323e-05, "loss": 0.8399, "step": 81680 }, { "epoch": 0.5218941262154531, "grad_norm": 0.7834781408309937, "learning_rate": 8.413049507813182e-05, "loss": 1.0536, "step": 81690 }, { "epoch": 0.5219580133651918, "grad_norm": 0.7433455586433411, "learning_rate": 8.412682806761681e-05, "loss": 0.9202, "step": 81700 }, { "epoch": 0.5220219005149305, "grad_norm": 0.7488266229629517, "learning_rate": 8.41231607134151e-05, "loss": 1.1904, "step": 81710 }, { "epoch": 0.5220857876646692, "grad_norm": 1.112772822380066, "learning_rate": 8.411949301556365e-05, "loss": 0.8893, "step": 81720 }, { "epoch": 0.5221496748144079, "grad_norm": 1.1430962085723877, "learning_rate": 8.411582497409937e-05, "loss": 0.7654, "step": 81730 }, { "epoch": 0.5222135619641465, "grad_norm": 0.9723464250564575, "learning_rate": 8.411215658905925e-05, "loss": 0.8554, "step": 81740 }, { "epoch": 0.5222774491138852, "grad_norm": 0.7580609321594238, "learning_rate": 8.410848786048018e-05, "loss": 0.8596, "step": 81750 }, { "epoch": 0.5223413362636239, "grad_norm": 1.1168793439865112, "learning_rate": 8.410481878839914e-05, "loss": 0.798, "step": 81760 }, { "epoch": 0.5224052234133626, "grad_norm": 0.7281593084335327, "learning_rate": 8.410114937285308e-05, "loss": 0.9573, "step": 81770 }, { "epoch": 0.5224691105631013, "grad_norm": 0.8710981011390686, "learning_rate": 8.409747961387892e-05, "loss": 0.8257, "step": 81780 }, { "epoch": 0.52253299771284, "grad_norm": 1.258819818496704, "learning_rate": 8.409380951151364e-05, "loss": 0.7601, "step": 81790 }, { "epoch": 0.5225968848625787, "grad_norm": 0.5845881700515747, "learning_rate": 8.409013906579422e-05, "loss": 0.894, "step": 81800 }, { "epoch": 0.5226607720123174, "grad_norm": 0.7453858852386475, "learning_rate": 8.40864682767576e-05, "loss": 0.8617, "step": 81810 }, { "epoch": 0.5227246591620561, "grad_norm": 0.8870908617973328, "learning_rate": 8.408279714444076e-05, "loss": 0.9018, "step": 81820 }, { "epoch": 0.5227885463117948, "grad_norm": 0.7710734605789185, "learning_rate": 8.407912566888068e-05, "loss": 0.7743, "step": 81830 }, { "epoch": 0.5228524334615335, "grad_norm": 1.001556396484375, "learning_rate": 8.40754538501143e-05, "loss": 0.9576, "step": 81840 }, { "epoch": 0.5229163206112722, "grad_norm": 1.0256420373916626, "learning_rate": 8.407178168817862e-05, "loss": 0.9606, "step": 81850 }, { "epoch": 0.522980207761011, "grad_norm": 1.1523104906082153, "learning_rate": 8.406810918311063e-05, "loss": 0.8513, "step": 81860 }, { "epoch": 0.5230440949107497, "grad_norm": 0.8839610815048218, "learning_rate": 8.40644363349473e-05, "loss": 1.0848, "step": 81870 }, { "epoch": 0.5231079820604884, "grad_norm": 0.863746166229248, "learning_rate": 8.406076314372564e-05, "loss": 0.8318, "step": 81880 }, { "epoch": 0.5231718692102271, "grad_norm": 1.282516360282898, "learning_rate": 8.405708960948262e-05, "loss": 0.9989, "step": 81890 }, { "epoch": 0.5232357563599658, "grad_norm": 0.9695531129837036, "learning_rate": 8.405341573225524e-05, "loss": 1.042, "step": 81900 }, { "epoch": 0.5232996435097045, "grad_norm": 1.1501736640930176, "learning_rate": 8.40497415120805e-05, "loss": 0.9179, "step": 81910 }, { "epoch": 0.5233635306594432, "grad_norm": 0.8112602829933167, "learning_rate": 8.404606694899542e-05, "loss": 0.7767, "step": 81920 }, { "epoch": 0.5234274178091819, "grad_norm": 0.7559998631477356, "learning_rate": 8.404239204303698e-05, "loss": 0.8375, "step": 81930 }, { "epoch": 0.5234913049589206, "grad_norm": 0.6688728332519531, "learning_rate": 8.403871679424222e-05, "loss": 0.8205, "step": 81940 }, { "epoch": 0.5235551921086593, "grad_norm": 1.4303487539291382, "learning_rate": 8.403504120264811e-05, "loss": 1.0413, "step": 81950 }, { "epoch": 0.523619079258398, "grad_norm": 1.0139001607894897, "learning_rate": 8.403136526829171e-05, "loss": 0.7122, "step": 81960 }, { "epoch": 0.5236829664081367, "grad_norm": 1.1375806331634521, "learning_rate": 8.402768899121e-05, "loss": 0.9585, "step": 81970 }, { "epoch": 0.5237468535578753, "grad_norm": 0.801270604133606, "learning_rate": 8.402401237144005e-05, "loss": 1.0773, "step": 81980 }, { "epoch": 0.523810740707614, "grad_norm": 1.0210850238800049, "learning_rate": 8.402033540901884e-05, "loss": 0.9391, "step": 81990 }, { "epoch": 0.5238746278573527, "grad_norm": 0.44863033294677734, "learning_rate": 8.401665810398342e-05, "loss": 0.7008, "step": 82000 }, { "epoch": 0.5239385150070914, "grad_norm": 0.6878476738929749, "learning_rate": 8.401298045637083e-05, "loss": 1.0242, "step": 82010 }, { "epoch": 0.5240024021568301, "grad_norm": 1.221062421798706, "learning_rate": 8.40093024662181e-05, "loss": 0.6651, "step": 82020 }, { "epoch": 0.5240662893065688, "grad_norm": 0.8801291584968567, "learning_rate": 8.400562413356228e-05, "loss": 1.07, "step": 82030 }, { "epoch": 0.5241301764563076, "grad_norm": 1.5638190507888794, "learning_rate": 8.40019454584404e-05, "loss": 1.0253, "step": 82040 }, { "epoch": 0.5241940636060463, "grad_norm": 0.7046545147895813, "learning_rate": 8.399826644088951e-05, "loss": 1.0211, "step": 82050 }, { "epoch": 0.524257950755785, "grad_norm": 0.7261834740638733, "learning_rate": 8.399458708094668e-05, "loss": 1.0918, "step": 82060 }, { "epoch": 0.5243218379055237, "grad_norm": 0.7312687635421753, "learning_rate": 8.399090737864893e-05, "loss": 1.0264, "step": 82070 }, { "epoch": 0.5243857250552624, "grad_norm": 0.7334839105606079, "learning_rate": 8.398722733403335e-05, "loss": 0.6618, "step": 82080 }, { "epoch": 0.5244496122050011, "grad_norm": 1.9923456907272339, "learning_rate": 8.398354694713697e-05, "loss": 0.7325, "step": 82090 }, { "epoch": 0.5245134993547398, "grad_norm": 1.5566961765289307, "learning_rate": 8.397986621799688e-05, "loss": 1.0462, "step": 82100 }, { "epoch": 0.5245773865044785, "grad_norm": 0.5462529063224792, "learning_rate": 8.397618514665015e-05, "loss": 0.7883, "step": 82110 }, { "epoch": 0.5246412736542172, "grad_norm": 0.7034682631492615, "learning_rate": 8.397250373313383e-05, "loss": 0.98, "step": 82120 }, { "epoch": 0.5247051608039559, "grad_norm": 2.1927855014801025, "learning_rate": 8.396882197748501e-05, "loss": 1.0054, "step": 82130 }, { "epoch": 0.5247690479536946, "grad_norm": 0.724446713924408, "learning_rate": 8.396513987974078e-05, "loss": 0.8474, "step": 82140 }, { "epoch": 0.5248329351034333, "grad_norm": 1.2433834075927734, "learning_rate": 8.396145743993819e-05, "loss": 1.0282, "step": 82150 }, { "epoch": 0.524896822253172, "grad_norm": 0.5404759645462036, "learning_rate": 8.395777465811434e-05, "loss": 0.9458, "step": 82160 }, { "epoch": 0.5249607094029107, "grad_norm": 0.5961291790008545, "learning_rate": 8.395409153430633e-05, "loss": 0.8767, "step": 82170 }, { "epoch": 0.5250245965526494, "grad_norm": 1.5657272338867188, "learning_rate": 8.395040806855125e-05, "loss": 0.8355, "step": 82180 }, { "epoch": 0.5250884837023881, "grad_norm": 0.761735737323761, "learning_rate": 8.394672426088618e-05, "loss": 1.1087, "step": 82190 }, { "epoch": 0.5251523708521268, "grad_norm": 0.904757022857666, "learning_rate": 8.394304011134822e-05, "loss": 1.1173, "step": 82200 }, { "epoch": 0.5252162580018656, "grad_norm": 1.890184998512268, "learning_rate": 8.39393556199745e-05, "loss": 0.7589, "step": 82210 }, { "epoch": 0.5252801451516043, "grad_norm": 0.8762137293815613, "learning_rate": 8.39356707868021e-05, "loss": 0.9525, "step": 82220 }, { "epoch": 0.5253440323013429, "grad_norm": 1.153976321220398, "learning_rate": 8.393198561186814e-05, "loss": 1.0239, "step": 82230 }, { "epoch": 0.5254079194510816, "grad_norm": 1.3719924688339233, "learning_rate": 8.392830009520972e-05, "loss": 0.8364, "step": 82240 }, { "epoch": 0.5254718066008203, "grad_norm": 0.9596297144889832, "learning_rate": 8.392461423686397e-05, "loss": 0.8316, "step": 82250 }, { "epoch": 0.525535693750559, "grad_norm": 0.8172164559364319, "learning_rate": 8.392092803686801e-05, "loss": 0.8146, "step": 82260 }, { "epoch": 0.5255995809002977, "grad_norm": 0.7319055795669556, "learning_rate": 8.391724149525895e-05, "loss": 0.8911, "step": 82270 }, { "epoch": 0.5256634680500364, "grad_norm": 0.949073314666748, "learning_rate": 8.391355461207393e-05, "loss": 0.8981, "step": 82280 }, { "epoch": 0.5257273551997751, "grad_norm": 2.1071205139160156, "learning_rate": 8.390986738735007e-05, "loss": 0.882, "step": 82290 }, { "epoch": 0.5257912423495138, "grad_norm": 0.9172298908233643, "learning_rate": 8.390617982112452e-05, "loss": 1.0809, "step": 82300 }, { "epoch": 0.5258551294992525, "grad_norm": 1.3219941854476929, "learning_rate": 8.390249191343442e-05, "loss": 0.8637, "step": 82310 }, { "epoch": 0.5259190166489912, "grad_norm": 0.8916542530059814, "learning_rate": 8.389880366431687e-05, "loss": 1.1391, "step": 82320 }, { "epoch": 0.5259829037987299, "grad_norm": 0.6826764941215515, "learning_rate": 8.389511507380905e-05, "loss": 0.814, "step": 82330 }, { "epoch": 0.5260467909484686, "grad_norm": 1.7593846321105957, "learning_rate": 8.389142614194809e-05, "loss": 1.1427, "step": 82340 }, { "epoch": 0.5261106780982073, "grad_norm": 0.4655475318431854, "learning_rate": 8.388773686877117e-05, "loss": 0.6992, "step": 82350 }, { "epoch": 0.526174565247946, "grad_norm": 1.185663104057312, "learning_rate": 8.38840472543154e-05, "loss": 0.8237, "step": 82360 }, { "epoch": 0.5262384523976847, "grad_norm": 0.8329865336418152, "learning_rate": 8.388035729861797e-05, "loss": 0.9379, "step": 82370 }, { "epoch": 0.5263023395474234, "grad_norm": 0.5856214761734009, "learning_rate": 8.387666700171603e-05, "loss": 0.7349, "step": 82380 }, { "epoch": 0.5263662266971622, "grad_norm": 0.8730786442756653, "learning_rate": 8.387297636364675e-05, "loss": 0.8201, "step": 82390 }, { "epoch": 0.5264301138469009, "grad_norm": 0.7338623404502869, "learning_rate": 8.38692853844473e-05, "loss": 0.9384, "step": 82400 }, { "epoch": 0.5264940009966396, "grad_norm": 0.6729745268821716, "learning_rate": 8.386559406415481e-05, "loss": 0.8973, "step": 82410 }, { "epoch": 0.5265578881463783, "grad_norm": 1.3873519897460938, "learning_rate": 8.386190240280652e-05, "loss": 0.836, "step": 82420 }, { "epoch": 0.526621775296117, "grad_norm": 0.808180570602417, "learning_rate": 8.385821040043958e-05, "loss": 0.8696, "step": 82430 }, { "epoch": 0.5266856624458557, "grad_norm": 1.6018669605255127, "learning_rate": 8.385451805709116e-05, "loss": 0.7054, "step": 82440 }, { "epoch": 0.5267495495955944, "grad_norm": 0.9540040493011475, "learning_rate": 8.385082537279846e-05, "loss": 0.9149, "step": 82450 }, { "epoch": 0.5268134367453331, "grad_norm": 1.0204883813858032, "learning_rate": 8.384713234759866e-05, "loss": 0.8372, "step": 82460 }, { "epoch": 0.5268773238950717, "grad_norm": 0.6929607391357422, "learning_rate": 8.384343898152896e-05, "loss": 0.7188, "step": 82470 }, { "epoch": 0.5269412110448104, "grad_norm": 0.7308977246284485, "learning_rate": 8.383974527462655e-05, "loss": 0.8171, "step": 82480 }, { "epoch": 0.5270050981945491, "grad_norm": 0.9977597594261169, "learning_rate": 8.383605122692861e-05, "loss": 0.7189, "step": 82490 }, { "epoch": 0.5270689853442878, "grad_norm": 1.0899478197097778, "learning_rate": 8.383235683847238e-05, "loss": 1.0767, "step": 82500 }, { "epoch": 0.5271328724940265, "grad_norm": 1.763649582862854, "learning_rate": 8.382866210929506e-05, "loss": 0.9618, "step": 82510 }, { "epoch": 0.5271967596437652, "grad_norm": 0.8483105897903442, "learning_rate": 8.382496703943382e-05, "loss": 0.9302, "step": 82520 }, { "epoch": 0.5272606467935039, "grad_norm": 1.4405713081359863, "learning_rate": 8.38212716289259e-05, "loss": 1.0092, "step": 82530 }, { "epoch": 0.5273245339432426, "grad_norm": 0.8310216069221497, "learning_rate": 8.381757587780853e-05, "loss": 0.9264, "step": 82540 }, { "epoch": 0.5273884210929813, "grad_norm": 1.1414271593093872, "learning_rate": 8.381387978611892e-05, "loss": 0.8157, "step": 82550 }, { "epoch": 0.52745230824272, "grad_norm": 1.1843761205673218, "learning_rate": 8.381018335389428e-05, "loss": 1.0638, "step": 82560 }, { "epoch": 0.5275161953924588, "grad_norm": 0.6531765460968018, "learning_rate": 8.380648658117186e-05, "loss": 0.8616, "step": 82570 }, { "epoch": 0.5275800825421975, "grad_norm": 0.6731355786323547, "learning_rate": 8.380278946798883e-05, "loss": 0.7789, "step": 82580 }, { "epoch": 0.5276439696919362, "grad_norm": 0.7285798788070679, "learning_rate": 8.37990920143825e-05, "loss": 0.8369, "step": 82590 }, { "epoch": 0.5277078568416749, "grad_norm": 1.3306784629821777, "learning_rate": 8.379539422039006e-05, "loss": 0.9274, "step": 82600 }, { "epoch": 0.5277717439914136, "grad_norm": 0.7392432689666748, "learning_rate": 8.379169608604877e-05, "loss": 0.7246, "step": 82610 }, { "epoch": 0.5278356311411523, "grad_norm": 1.119094967842102, "learning_rate": 8.378799761139587e-05, "loss": 0.8628, "step": 82620 }, { "epoch": 0.527899518290891, "grad_norm": 0.5163264274597168, "learning_rate": 8.378429879646859e-05, "loss": 0.6895, "step": 82630 }, { "epoch": 0.5279634054406297, "grad_norm": 0.7841934561729431, "learning_rate": 8.378059964130421e-05, "loss": 0.9658, "step": 82640 }, { "epoch": 0.5280272925903684, "grad_norm": 0.6107231378555298, "learning_rate": 8.377690014593996e-05, "loss": 0.8572, "step": 82650 }, { "epoch": 0.5280911797401071, "grad_norm": 0.839893102645874, "learning_rate": 8.377320031041309e-05, "loss": 1.003, "step": 82660 }, { "epoch": 0.5281550668898458, "grad_norm": 0.8523367643356323, "learning_rate": 8.37695001347609e-05, "loss": 0.8451, "step": 82670 }, { "epoch": 0.5282189540395845, "grad_norm": 0.8957772850990295, "learning_rate": 8.37657996190206e-05, "loss": 1.1725, "step": 82680 }, { "epoch": 0.5282828411893232, "grad_norm": 1.1203519105911255, "learning_rate": 8.376209876322952e-05, "loss": 0.7363, "step": 82690 }, { "epoch": 0.5283467283390619, "grad_norm": 0.440065860748291, "learning_rate": 8.375839756742487e-05, "loss": 0.9615, "step": 82700 }, { "epoch": 0.5284106154888005, "grad_norm": 0.8723582029342651, "learning_rate": 8.375469603164397e-05, "loss": 1.0116, "step": 82710 }, { "epoch": 0.5284745026385392, "grad_norm": 0.7620988488197327, "learning_rate": 8.375099415592406e-05, "loss": 0.74, "step": 82720 }, { "epoch": 0.5285383897882779, "grad_norm": 0.6935875415802002, "learning_rate": 8.374729194030245e-05, "loss": 0.6662, "step": 82730 }, { "epoch": 0.5286022769380166, "grad_norm": 0.8621264696121216, "learning_rate": 8.374358938481641e-05, "loss": 0.8284, "step": 82740 }, { "epoch": 0.5286661640877554, "grad_norm": 1.0655511617660522, "learning_rate": 8.373988648950324e-05, "loss": 0.8265, "step": 82750 }, { "epoch": 0.5287300512374941, "grad_norm": 0.6569475531578064, "learning_rate": 8.373618325440022e-05, "loss": 0.8138, "step": 82760 }, { "epoch": 0.5287939383872328, "grad_norm": 0.7676059007644653, "learning_rate": 8.373247967954465e-05, "loss": 0.798, "step": 82770 }, { "epoch": 0.5288578255369715, "grad_norm": 0.5665331482887268, "learning_rate": 8.372877576497383e-05, "loss": 1.0496, "step": 82780 }, { "epoch": 0.5289217126867102, "grad_norm": 0.7789902687072754, "learning_rate": 8.372507151072506e-05, "loss": 0.9221, "step": 82790 }, { "epoch": 0.5289855998364489, "grad_norm": 0.6825060248374939, "learning_rate": 8.372136691683563e-05, "loss": 0.7601, "step": 82800 }, { "epoch": 0.5290494869861876, "grad_norm": 0.8659316897392273, "learning_rate": 8.371766198334288e-05, "loss": 0.763, "step": 82810 }, { "epoch": 0.5291133741359263, "grad_norm": 0.7778592109680176, "learning_rate": 8.371395671028409e-05, "loss": 0.8137, "step": 82820 }, { "epoch": 0.529177261285665, "grad_norm": 0.7770895957946777, "learning_rate": 8.37102510976966e-05, "loss": 1.0609, "step": 82830 }, { "epoch": 0.5292411484354037, "grad_norm": 0.8733034729957581, "learning_rate": 8.370654514561771e-05, "loss": 0.598, "step": 82840 }, { "epoch": 0.5293050355851424, "grad_norm": 0.7436967492103577, "learning_rate": 8.370283885408474e-05, "loss": 0.8707, "step": 82850 }, { "epoch": 0.5293689227348811, "grad_norm": 0.6810287237167358, "learning_rate": 8.369913222313504e-05, "loss": 0.831, "step": 82860 }, { "epoch": 0.5294328098846198, "grad_norm": 0.6513703465461731, "learning_rate": 8.369542525280593e-05, "loss": 1.0104, "step": 82870 }, { "epoch": 0.5294966970343585, "grad_norm": 0.7047162055969238, "learning_rate": 8.369171794313473e-05, "loss": 0.93, "step": 82880 }, { "epoch": 0.5295605841840972, "grad_norm": 1.145445704460144, "learning_rate": 8.368801029415878e-05, "loss": 0.777, "step": 82890 }, { "epoch": 0.5296244713338359, "grad_norm": 0.8920283317565918, "learning_rate": 8.368430230591542e-05, "loss": 0.7874, "step": 82900 }, { "epoch": 0.5296883584835747, "grad_norm": 1.2382694482803345, "learning_rate": 8.3680593978442e-05, "loss": 0.9681, "step": 82910 }, { "epoch": 0.5297522456333134, "grad_norm": 1.1094623804092407, "learning_rate": 8.367688531177586e-05, "loss": 1.0037, "step": 82920 }, { "epoch": 0.5298161327830521, "grad_norm": 0.6824793815612793, "learning_rate": 8.367317630595434e-05, "loss": 0.8448, "step": 82930 }, { "epoch": 0.5298800199327908, "grad_norm": 0.7402679324150085, "learning_rate": 8.366946696101483e-05, "loss": 0.8678, "step": 82940 }, { "epoch": 0.5299439070825295, "grad_norm": 0.8517834544181824, "learning_rate": 8.366575727699464e-05, "loss": 0.9731, "step": 82950 }, { "epoch": 0.5300077942322681, "grad_norm": 0.645380437374115, "learning_rate": 8.366204725393114e-05, "loss": 0.7756, "step": 82960 }, { "epoch": 0.5300716813820068, "grad_norm": 1.7040249109268188, "learning_rate": 8.365833689186172e-05, "loss": 0.8345, "step": 82970 }, { "epoch": 0.5301355685317455, "grad_norm": 1.2551957368850708, "learning_rate": 8.365462619082372e-05, "loss": 1.1466, "step": 82980 }, { "epoch": 0.5301994556814842, "grad_norm": 0.596899688243866, "learning_rate": 8.365091515085452e-05, "loss": 0.9004, "step": 82990 }, { "epoch": 0.5302633428312229, "grad_norm": 0.7382088899612427, "learning_rate": 8.36472037719915e-05, "loss": 0.9212, "step": 83000 }, { "epoch": 0.5303272299809616, "grad_norm": 0.587727963924408, "learning_rate": 8.364349205427203e-05, "loss": 1.1075, "step": 83010 }, { "epoch": 0.5303911171307003, "grad_norm": 0.5298671126365662, "learning_rate": 8.363977999773347e-05, "loss": 0.8375, "step": 83020 }, { "epoch": 0.530455004280439, "grad_norm": 0.7597865462303162, "learning_rate": 8.363606760241323e-05, "loss": 1.0665, "step": 83030 }, { "epoch": 0.5305188914301777, "grad_norm": 1.4384864568710327, "learning_rate": 8.363235486834871e-05, "loss": 0.8882, "step": 83040 }, { "epoch": 0.5305827785799164, "grad_norm": 1.0824155807495117, "learning_rate": 8.362864179557726e-05, "loss": 0.9262, "step": 83050 }, { "epoch": 0.5306466657296551, "grad_norm": 0.9911159873008728, "learning_rate": 8.36249283841363e-05, "loss": 1.0327, "step": 83060 }, { "epoch": 0.5307105528793938, "grad_norm": 1.1133580207824707, "learning_rate": 8.362121463406323e-05, "loss": 0.9494, "step": 83070 }, { "epoch": 0.5307744400291325, "grad_norm": 1.960688591003418, "learning_rate": 8.361750054539544e-05, "loss": 0.9712, "step": 83080 }, { "epoch": 0.5308383271788712, "grad_norm": 0.710477888584137, "learning_rate": 8.361378611817033e-05, "loss": 0.913, "step": 83090 }, { "epoch": 0.53090221432861, "grad_norm": 0.9821794629096985, "learning_rate": 8.36100713524253e-05, "loss": 0.9127, "step": 83100 }, { "epoch": 0.5309661014783487, "grad_norm": 0.5997344851493835, "learning_rate": 8.360635624819778e-05, "loss": 0.8929, "step": 83110 }, { "epoch": 0.5310299886280874, "grad_norm": 1.1449615955352783, "learning_rate": 8.36026408055252e-05, "loss": 0.5898, "step": 83120 }, { "epoch": 0.5310938757778261, "grad_norm": 1.3157292604446411, "learning_rate": 8.359892502444494e-05, "loss": 0.8881, "step": 83130 }, { "epoch": 0.5311577629275648, "grad_norm": 0.6515958905220032, "learning_rate": 8.359520890499443e-05, "loss": 0.8024, "step": 83140 }, { "epoch": 0.5312216500773035, "grad_norm": 0.8860677480697632, "learning_rate": 8.359149244721112e-05, "loss": 0.8348, "step": 83150 }, { "epoch": 0.5312855372270422, "grad_norm": 0.8517594933509827, "learning_rate": 8.358777565113242e-05, "loss": 0.8221, "step": 83160 }, { "epoch": 0.5313494243767809, "grad_norm": 0.9374033808708191, "learning_rate": 8.358405851679574e-05, "loss": 0.7633, "step": 83170 }, { "epoch": 0.5314133115265196, "grad_norm": 0.885487973690033, "learning_rate": 8.358034104423857e-05, "loss": 0.926, "step": 83180 }, { "epoch": 0.5314771986762583, "grad_norm": 0.6758369207382202, "learning_rate": 8.357662323349828e-05, "loss": 0.9676, "step": 83190 }, { "epoch": 0.5315410858259969, "grad_norm": 1.0564520359039307, "learning_rate": 8.357290508461238e-05, "loss": 1.1299, "step": 83200 }, { "epoch": 0.5316049729757356, "grad_norm": 0.7590727210044861, "learning_rate": 8.356918659761826e-05, "loss": 0.7102, "step": 83210 }, { "epoch": 0.5316688601254743, "grad_norm": 1.157811164855957, "learning_rate": 8.356546777255339e-05, "loss": 1.3055, "step": 83220 }, { "epoch": 0.531732747275213, "grad_norm": 0.7595437169075012, "learning_rate": 8.356174860945521e-05, "loss": 0.6526, "step": 83230 }, { "epoch": 0.5317966344249517, "grad_norm": 1.0199005603790283, "learning_rate": 8.355802910836122e-05, "loss": 0.8271, "step": 83240 }, { "epoch": 0.5318605215746904, "grad_norm": 0.9487647414207458, "learning_rate": 8.355430926930882e-05, "loss": 1.0003, "step": 83250 }, { "epoch": 0.5319244087244291, "grad_norm": 0.8482071161270142, "learning_rate": 8.35505890923355e-05, "loss": 1.0448, "step": 83260 }, { "epoch": 0.5319882958741678, "grad_norm": 0.7448784708976746, "learning_rate": 8.354686857747872e-05, "loss": 1.0787, "step": 83270 }, { "epoch": 0.5320521830239066, "grad_norm": 0.6928815245628357, "learning_rate": 8.354314772477596e-05, "loss": 0.7474, "step": 83280 }, { "epoch": 0.5321160701736453, "grad_norm": 0.7215262651443481, "learning_rate": 8.353942653426468e-05, "loss": 0.8376, "step": 83290 }, { "epoch": 0.532179957323384, "grad_norm": 1.3700790405273438, "learning_rate": 8.353570500598235e-05, "loss": 0.9953, "step": 83300 }, { "epoch": 0.5322438444731227, "grad_norm": 1.3022416830062866, "learning_rate": 8.353198313996649e-05, "loss": 0.874, "step": 83310 }, { "epoch": 0.5323077316228614, "grad_norm": 0.7596305012702942, "learning_rate": 8.352826093625453e-05, "loss": 0.6174, "step": 83320 }, { "epoch": 0.5323716187726001, "grad_norm": 0.7313193082809448, "learning_rate": 8.352453839488397e-05, "loss": 1.105, "step": 83330 }, { "epoch": 0.5324355059223388, "grad_norm": 0.6660478711128235, "learning_rate": 8.35208155158923e-05, "loss": 1.0361, "step": 83340 }, { "epoch": 0.5324993930720775, "grad_norm": 1.1651302576065063, "learning_rate": 8.351709229931704e-05, "loss": 0.8399, "step": 83350 }, { "epoch": 0.5325632802218162, "grad_norm": 1.0588157176971436, "learning_rate": 8.351336874519564e-05, "loss": 1.0247, "step": 83360 }, { "epoch": 0.5326271673715549, "grad_norm": 1.3601503372192383, "learning_rate": 8.350964485356562e-05, "loss": 0.9979, "step": 83370 }, { "epoch": 0.5326910545212936, "grad_norm": 1.1261094808578491, "learning_rate": 8.350592062446451e-05, "loss": 0.793, "step": 83380 }, { "epoch": 0.5327549416710323, "grad_norm": 0.7241072654724121, "learning_rate": 8.35021960579298e-05, "loss": 0.965, "step": 83390 }, { "epoch": 0.532818828820771, "grad_norm": 0.7780799865722656, "learning_rate": 8.349847115399896e-05, "loss": 1.0567, "step": 83400 }, { "epoch": 0.5328827159705097, "grad_norm": 0.7408662438392639, "learning_rate": 8.349474591270957e-05, "loss": 0.782, "step": 83410 }, { "epoch": 0.5329466031202484, "grad_norm": 1.189795732498169, "learning_rate": 8.349102033409907e-05, "loss": 0.8716, "step": 83420 }, { "epoch": 0.5330104902699871, "grad_norm": 0.7125329375267029, "learning_rate": 8.348729441820505e-05, "loss": 0.9386, "step": 83430 }, { "epoch": 0.5330743774197257, "grad_norm": 0.9453898668289185, "learning_rate": 8.3483568165065e-05, "loss": 0.9899, "step": 83440 }, { "epoch": 0.5331382645694644, "grad_norm": 0.7429458498954773, "learning_rate": 8.347984157471645e-05, "loss": 0.893, "step": 83450 }, { "epoch": 0.5332021517192032, "grad_norm": 0.758669376373291, "learning_rate": 8.347611464719694e-05, "loss": 0.9558, "step": 83460 }, { "epoch": 0.5332660388689419, "grad_norm": 0.7475212216377258, "learning_rate": 8.347238738254399e-05, "loss": 0.7202, "step": 83470 }, { "epoch": 0.5333299260186806, "grad_norm": 1.387135624885559, "learning_rate": 8.346865978079512e-05, "loss": 0.6861, "step": 83480 }, { "epoch": 0.5333938131684193, "grad_norm": 0.9755001068115234, "learning_rate": 8.346493184198792e-05, "loss": 0.8146, "step": 83490 }, { "epoch": 0.533457700318158, "grad_norm": 0.9414482712745667, "learning_rate": 8.346120356615989e-05, "loss": 0.9161, "step": 83500 }, { "epoch": 0.5335215874678967, "grad_norm": 0.7464240193367004, "learning_rate": 8.34574749533486e-05, "loss": 0.9186, "step": 83510 }, { "epoch": 0.5335854746176354, "grad_norm": 1.2203441858291626, "learning_rate": 8.34537460035916e-05, "loss": 0.8786, "step": 83520 }, { "epoch": 0.5336493617673741, "grad_norm": 1.0309191942214966, "learning_rate": 8.345001671692641e-05, "loss": 0.7864, "step": 83530 }, { "epoch": 0.5337132489171128, "grad_norm": 1.0020480155944824, "learning_rate": 8.344628709339063e-05, "loss": 0.858, "step": 83540 }, { "epoch": 0.5337771360668515, "grad_norm": 0.7191622257232666, "learning_rate": 8.344255713302181e-05, "loss": 0.699, "step": 83550 }, { "epoch": 0.5338410232165902, "grad_norm": 0.4417421817779541, "learning_rate": 8.343882683585748e-05, "loss": 0.8309, "step": 83560 }, { "epoch": 0.5339049103663289, "grad_norm": 0.5455567240715027, "learning_rate": 8.343509620193526e-05, "loss": 0.7101, "step": 83570 }, { "epoch": 0.5339687975160676, "grad_norm": 0.7480769753456116, "learning_rate": 8.343136523129269e-05, "loss": 1.0079, "step": 83580 }, { "epoch": 0.5340326846658063, "grad_norm": 0.569848895072937, "learning_rate": 8.342763392396735e-05, "loss": 0.9228, "step": 83590 }, { "epoch": 0.534096571815545, "grad_norm": 0.8300278782844543, "learning_rate": 8.342390227999683e-05, "loss": 0.8459, "step": 83600 }, { "epoch": 0.5341604589652837, "grad_norm": 0.7378689050674438, "learning_rate": 8.342017029941868e-05, "loss": 0.9019, "step": 83610 }, { "epoch": 0.5342243461150225, "grad_norm": 1.1345140933990479, "learning_rate": 8.34164379822705e-05, "loss": 0.7472, "step": 83620 }, { "epoch": 0.5342882332647612, "grad_norm": 0.5428297519683838, "learning_rate": 8.341270532858989e-05, "loss": 0.7232, "step": 83630 }, { "epoch": 0.5343521204144999, "grad_norm": 0.8249925971031189, "learning_rate": 8.340897233841443e-05, "loss": 0.785, "step": 83640 }, { "epoch": 0.5344160075642386, "grad_norm": 0.9514716863632202, "learning_rate": 8.340523901178173e-05, "loss": 0.7077, "step": 83650 }, { "epoch": 0.5344798947139773, "grad_norm": 1.2342941761016846, "learning_rate": 8.340150534872934e-05, "loss": 0.7654, "step": 83660 }, { "epoch": 0.534543781863716, "grad_norm": 0.7578923106193542, "learning_rate": 8.339777134929492e-05, "loss": 0.8597, "step": 83670 }, { "epoch": 0.5346076690134546, "grad_norm": 0.7773808836936951, "learning_rate": 8.339403701351604e-05, "loss": 0.9918, "step": 83680 }, { "epoch": 0.5346715561631933, "grad_norm": 1.0507415533065796, "learning_rate": 8.339030234143032e-05, "loss": 0.7686, "step": 83690 }, { "epoch": 0.534735443312932, "grad_norm": 0.6321387887001038, "learning_rate": 8.338656733307537e-05, "loss": 0.9376, "step": 83700 }, { "epoch": 0.5347993304626707, "grad_norm": 0.705500066280365, "learning_rate": 8.33828319884888e-05, "loss": 0.9138, "step": 83710 }, { "epoch": 0.5348632176124094, "grad_norm": 0.745877206325531, "learning_rate": 8.337909630770824e-05, "loss": 1.2483, "step": 83720 }, { "epoch": 0.5349271047621481, "grad_norm": 0.9086830615997314, "learning_rate": 8.337536029077129e-05, "loss": 0.6154, "step": 83730 }, { "epoch": 0.5349909919118868, "grad_norm": 1.130573034286499, "learning_rate": 8.337162393771559e-05, "loss": 0.8188, "step": 83740 }, { "epoch": 0.5350548790616255, "grad_norm": 0.8201401829719543, "learning_rate": 8.336788724857878e-05, "loss": 1.1955, "step": 83750 }, { "epoch": 0.5351187662113642, "grad_norm": 1.0572373867034912, "learning_rate": 8.336415022339847e-05, "loss": 1.0602, "step": 83760 }, { "epoch": 0.5351826533611029, "grad_norm": 0.4251170754432678, "learning_rate": 8.33604128622123e-05, "loss": 0.7514, "step": 83770 }, { "epoch": 0.5352465405108416, "grad_norm": 1.0447115898132324, "learning_rate": 8.335667516505791e-05, "loss": 0.9743, "step": 83780 }, { "epoch": 0.5353104276605803, "grad_norm": 0.8478367924690247, "learning_rate": 8.335293713197296e-05, "loss": 1.123, "step": 83790 }, { "epoch": 0.535374314810319, "grad_norm": 0.8603829145431519, "learning_rate": 8.334919876299507e-05, "loss": 0.9385, "step": 83800 }, { "epoch": 0.5354382019600578, "grad_norm": 0.719473659992218, "learning_rate": 8.334546005816188e-05, "loss": 0.8173, "step": 83810 }, { "epoch": 0.5355020891097965, "grad_norm": 1.2602207660675049, "learning_rate": 8.334172101751108e-05, "loss": 1.4649, "step": 83820 }, { "epoch": 0.5355659762595352, "grad_norm": 1.0611252784729004, "learning_rate": 8.33379816410803e-05, "loss": 0.922, "step": 83830 }, { "epoch": 0.5356298634092739, "grad_norm": 0.7457683682441711, "learning_rate": 8.33342419289072e-05, "loss": 0.9272, "step": 83840 }, { "epoch": 0.5356937505590126, "grad_norm": 1.1285589933395386, "learning_rate": 8.333050188102944e-05, "loss": 0.9133, "step": 83850 }, { "epoch": 0.5357576377087513, "grad_norm": 1.1243196725845337, "learning_rate": 8.33267614974847e-05, "loss": 0.6372, "step": 83860 }, { "epoch": 0.53582152485849, "grad_norm": 1.4428707361221313, "learning_rate": 8.332302077831065e-05, "loss": 0.9235, "step": 83870 }, { "epoch": 0.5358854120082287, "grad_norm": 0.7449108362197876, "learning_rate": 8.331927972354492e-05, "loss": 0.9763, "step": 83880 }, { "epoch": 0.5359492991579674, "grad_norm": 0.6374861001968384, "learning_rate": 8.331553833322526e-05, "loss": 0.7703, "step": 83890 }, { "epoch": 0.5360131863077061, "grad_norm": 1.0096155405044556, "learning_rate": 8.331179660738927e-05, "loss": 0.8562, "step": 83900 }, { "epoch": 0.5360770734574448, "grad_norm": 0.7320453524589539, "learning_rate": 8.330805454607468e-05, "loss": 0.666, "step": 83910 }, { "epoch": 0.5361409606071835, "grad_norm": 0.8086037635803223, "learning_rate": 8.330431214931917e-05, "loss": 0.8849, "step": 83920 }, { "epoch": 0.5362048477569221, "grad_norm": 2.057863235473633, "learning_rate": 8.330056941716043e-05, "loss": 0.9219, "step": 83930 }, { "epoch": 0.5362687349066608, "grad_norm": 0.6205108761787415, "learning_rate": 8.329682634963614e-05, "loss": 1.0129, "step": 83940 }, { "epoch": 0.5363326220563995, "grad_norm": 1.2019091844558716, "learning_rate": 8.3293082946784e-05, "loss": 1.1293, "step": 83950 }, { "epoch": 0.5363965092061382, "grad_norm": 0.6992289423942566, "learning_rate": 8.328933920864172e-05, "loss": 0.6816, "step": 83960 }, { "epoch": 0.536460396355877, "grad_norm": 0.8456112742424011, "learning_rate": 8.328559513524699e-05, "loss": 0.9054, "step": 83970 }, { "epoch": 0.5365242835056157, "grad_norm": 0.6546765565872192, "learning_rate": 8.328185072663752e-05, "loss": 0.8627, "step": 83980 }, { "epoch": 0.5365881706553544, "grad_norm": 0.9863765835762024, "learning_rate": 8.327810598285102e-05, "loss": 1.2343, "step": 83990 }, { "epoch": 0.5366520578050931, "grad_norm": 0.8402466773986816, "learning_rate": 8.32743609039252e-05, "loss": 0.8839, "step": 84000 }, { "epoch": 0.5367159449548318, "grad_norm": 0.5946282744407654, "learning_rate": 8.327061548989778e-05, "loss": 0.8401, "step": 84010 }, { "epoch": 0.5367798321045705, "grad_norm": 0.8258355855941772, "learning_rate": 8.32668697408065e-05, "loss": 1.094, "step": 84020 }, { "epoch": 0.5368437192543092, "grad_norm": 0.779899537563324, "learning_rate": 8.326312365668905e-05, "loss": 1.0426, "step": 84030 }, { "epoch": 0.5369076064040479, "grad_norm": 0.9077179431915283, "learning_rate": 8.325937723758314e-05, "loss": 0.9158, "step": 84040 }, { "epoch": 0.5369714935537866, "grad_norm": 1.139228105545044, "learning_rate": 8.325563048352655e-05, "loss": 0.8906, "step": 84050 }, { "epoch": 0.5370353807035253, "grad_norm": 0.8066197037696838, "learning_rate": 8.3251883394557e-05, "loss": 0.8026, "step": 84060 }, { "epoch": 0.537099267853264, "grad_norm": 0.8473499417304993, "learning_rate": 8.32481359707122e-05, "loss": 0.5615, "step": 84070 }, { "epoch": 0.5371631550030027, "grad_norm": 1.1238465309143066, "learning_rate": 8.324438821202992e-05, "loss": 0.814, "step": 84080 }, { "epoch": 0.5372270421527414, "grad_norm": 0.8760488629341125, "learning_rate": 8.324064011854789e-05, "loss": 0.8522, "step": 84090 }, { "epoch": 0.5372909293024801, "grad_norm": 1.5137993097305298, "learning_rate": 8.323689169030384e-05, "loss": 0.7777, "step": 84100 }, { "epoch": 0.5373548164522188, "grad_norm": 1.2992900609970093, "learning_rate": 8.323314292733556e-05, "loss": 0.8892, "step": 84110 }, { "epoch": 0.5374187036019575, "grad_norm": 0.8411065936088562, "learning_rate": 8.322939382968077e-05, "loss": 0.8524, "step": 84120 }, { "epoch": 0.5374825907516962, "grad_norm": 0.8992130160331726, "learning_rate": 8.322564439737723e-05, "loss": 0.8281, "step": 84130 }, { "epoch": 0.537546477901435, "grad_norm": 0.5751587152481079, "learning_rate": 8.322189463046271e-05, "loss": 0.749, "step": 84140 }, { "epoch": 0.5376103650511737, "grad_norm": 0.6489611268043518, "learning_rate": 8.321814452897498e-05, "loss": 0.9997, "step": 84150 }, { "epoch": 0.5376742522009124, "grad_norm": 0.6058949828147888, "learning_rate": 8.321439409295179e-05, "loss": 1.159, "step": 84160 }, { "epoch": 0.537738139350651, "grad_norm": 0.779172420501709, "learning_rate": 8.321064332243091e-05, "loss": 0.8733, "step": 84170 }, { "epoch": 0.5378020265003897, "grad_norm": 0.8683562278747559, "learning_rate": 8.320689221745012e-05, "loss": 0.7102, "step": 84180 }, { "epoch": 0.5378659136501284, "grad_norm": 0.6446613073348999, "learning_rate": 8.32031407780472e-05, "loss": 0.6436, "step": 84190 }, { "epoch": 0.5379298007998671, "grad_norm": 0.7266974449157715, "learning_rate": 8.319938900425994e-05, "loss": 0.8872, "step": 84200 }, { "epoch": 0.5379936879496058, "grad_norm": 0.8739939332008362, "learning_rate": 8.319563689612611e-05, "loss": 0.8188, "step": 84210 }, { "epoch": 0.5380575750993445, "grad_norm": 1.239883542060852, "learning_rate": 8.319188445368349e-05, "loss": 0.8107, "step": 84220 }, { "epoch": 0.5381214622490832, "grad_norm": 1.1432856321334839, "learning_rate": 8.318813167696987e-05, "loss": 0.7252, "step": 84230 }, { "epoch": 0.5381853493988219, "grad_norm": 1.283229947090149, "learning_rate": 8.318437856602306e-05, "loss": 0.8599, "step": 84240 }, { "epoch": 0.5382492365485606, "grad_norm": 1.238756775856018, "learning_rate": 8.318062512088087e-05, "loss": 0.939, "step": 84250 }, { "epoch": 0.5383131236982993, "grad_norm": 0.9360271096229553, "learning_rate": 8.317687134158106e-05, "loss": 0.9372, "step": 84260 }, { "epoch": 0.538377010848038, "grad_norm": 0.6929467916488647, "learning_rate": 8.317311722816145e-05, "loss": 0.8145, "step": 84270 }, { "epoch": 0.5384408979977767, "grad_norm": 1.410101294517517, "learning_rate": 8.316936278065986e-05, "loss": 0.6732, "step": 84280 }, { "epoch": 0.5385047851475154, "grad_norm": 1.029524803161621, "learning_rate": 8.316560799911408e-05, "loss": 1.0576, "step": 84290 }, { "epoch": 0.5385686722972541, "grad_norm": 1.0988661050796509, "learning_rate": 8.316185288356194e-05, "loss": 0.7547, "step": 84300 }, { "epoch": 0.5386325594469928, "grad_norm": 0.8414357304573059, "learning_rate": 8.315809743404126e-05, "loss": 0.9667, "step": 84310 }, { "epoch": 0.5386964465967315, "grad_norm": 0.6246783137321472, "learning_rate": 8.315434165058983e-05, "loss": 0.7647, "step": 84320 }, { "epoch": 0.5387603337464703, "grad_norm": 0.7971277236938477, "learning_rate": 8.315058553324551e-05, "loss": 1.2018, "step": 84330 }, { "epoch": 0.538824220896209, "grad_norm": 0.7713975310325623, "learning_rate": 8.314682908204612e-05, "loss": 0.9313, "step": 84340 }, { "epoch": 0.5388881080459477, "grad_norm": 0.6083114147186279, "learning_rate": 8.314307229702949e-05, "loss": 0.8577, "step": 84350 }, { "epoch": 0.5389519951956864, "grad_norm": 1.0030479431152344, "learning_rate": 8.313931517823344e-05, "loss": 0.88, "step": 84360 }, { "epoch": 0.5390158823454251, "grad_norm": 0.9634591341018677, "learning_rate": 8.313555772569581e-05, "loss": 1.137, "step": 84370 }, { "epoch": 0.5390797694951638, "grad_norm": 0.6758565902709961, "learning_rate": 8.313179993945445e-05, "loss": 0.8548, "step": 84380 }, { "epoch": 0.5391436566449025, "grad_norm": 1.2440674304962158, "learning_rate": 8.312804181954721e-05, "loss": 0.9583, "step": 84390 }, { "epoch": 0.5392075437946412, "grad_norm": 0.7709629535675049, "learning_rate": 8.312428336601193e-05, "loss": 0.9656, "step": 84400 }, { "epoch": 0.5392714309443798, "grad_norm": 0.8080304265022278, "learning_rate": 8.312052457888646e-05, "loss": 0.8182, "step": 84410 }, { "epoch": 0.5393353180941185, "grad_norm": 0.7901466488838196, "learning_rate": 8.311676545820865e-05, "loss": 0.8039, "step": 84420 }, { "epoch": 0.5393992052438572, "grad_norm": 0.6051963567733765, "learning_rate": 8.311338196444268e-05, "loss": 0.9002, "step": 84430 }, { "epoch": 0.5394630923935959, "grad_norm": 0.8808472156524658, "learning_rate": 8.310962221011971e-05, "loss": 0.8492, "step": 84440 }, { "epoch": 0.5395269795433346, "grad_norm": 0.7070138454437256, "learning_rate": 8.310586212235423e-05, "loss": 1.134, "step": 84450 }, { "epoch": 0.5395908666930733, "grad_norm": 0.7789306640625, "learning_rate": 8.310210170118406e-05, "loss": 0.7914, "step": 84460 }, { "epoch": 0.539654753842812, "grad_norm": 0.9255892634391785, "learning_rate": 8.30983409466471e-05, "loss": 0.8324, "step": 84470 }, { "epoch": 0.5397186409925507, "grad_norm": 1.0117281675338745, "learning_rate": 8.309457985878122e-05, "loss": 0.9676, "step": 84480 }, { "epoch": 0.5397825281422894, "grad_norm": 0.7408267855644226, "learning_rate": 8.309081843762428e-05, "loss": 0.829, "step": 84490 }, { "epoch": 0.5398464152920281, "grad_norm": 0.6966201663017273, "learning_rate": 8.308705668321417e-05, "loss": 0.9113, "step": 84500 }, { "epoch": 0.5399103024417669, "grad_norm": 0.7605626583099365, "learning_rate": 8.308329459558877e-05, "loss": 0.9392, "step": 84510 }, { "epoch": 0.5399741895915056, "grad_norm": 0.7314460277557373, "learning_rate": 8.307953217478599e-05, "loss": 0.7721, "step": 84520 }, { "epoch": 0.5400380767412443, "grad_norm": 0.8111374974250793, "learning_rate": 8.30757694208437e-05, "loss": 0.6695, "step": 84530 }, { "epoch": 0.540101963890983, "grad_norm": 0.7169995903968811, "learning_rate": 8.307200633379978e-05, "loss": 0.8237, "step": 84540 }, { "epoch": 0.5401658510407217, "grad_norm": 0.8992086052894592, "learning_rate": 8.306824291369216e-05, "loss": 0.7942, "step": 84550 }, { "epoch": 0.5402297381904604, "grad_norm": 0.5550522804260254, "learning_rate": 8.306447916055871e-05, "loss": 0.8189, "step": 84560 }, { "epoch": 0.5402936253401991, "grad_norm": 1.1253445148468018, "learning_rate": 8.306071507443737e-05, "loss": 0.8835, "step": 84570 }, { "epoch": 0.5403575124899378, "grad_norm": 1.120518684387207, "learning_rate": 8.305695065536602e-05, "loss": 1.099, "step": 84580 }, { "epoch": 0.5404213996396765, "grad_norm": 1.3610060214996338, "learning_rate": 8.305318590338258e-05, "loss": 0.9345, "step": 84590 }, { "epoch": 0.5404852867894152, "grad_norm": 0.8917859792709351, "learning_rate": 8.304942081852496e-05, "loss": 1.2491, "step": 84600 }, { "epoch": 0.5405491739391539, "grad_norm": 0.734668493270874, "learning_rate": 8.304565540083107e-05, "loss": 1.0179, "step": 84610 }, { "epoch": 0.5406130610888926, "grad_norm": 0.808816134929657, "learning_rate": 8.304188965033885e-05, "loss": 0.9507, "step": 84620 }, { "epoch": 0.5406769482386313, "grad_norm": 0.8101891875267029, "learning_rate": 8.303812356708622e-05, "loss": 0.7707, "step": 84630 }, { "epoch": 0.54074083538837, "grad_norm": 0.5614955425262451, "learning_rate": 8.303435715111111e-05, "loss": 0.9146, "step": 84640 }, { "epoch": 0.5408047225381087, "grad_norm": 1.0607208013534546, "learning_rate": 8.303059040245144e-05, "loss": 0.8684, "step": 84650 }, { "epoch": 0.5408686096878473, "grad_norm": 0.9058458805084229, "learning_rate": 8.302682332114515e-05, "loss": 0.7029, "step": 84660 }, { "epoch": 0.540932496837586, "grad_norm": 0.9082807898521423, "learning_rate": 8.302305590723016e-05, "loss": 0.8539, "step": 84670 }, { "epoch": 0.5409963839873247, "grad_norm": 0.8213421702384949, "learning_rate": 8.301928816074445e-05, "loss": 0.9783, "step": 84680 }, { "epoch": 0.5410602711370635, "grad_norm": 0.7759522795677185, "learning_rate": 8.301552008172593e-05, "loss": 0.6989, "step": 84690 }, { "epoch": 0.5411241582868022, "grad_norm": 0.574531078338623, "learning_rate": 8.301175167021256e-05, "loss": 0.9258, "step": 84700 }, { "epoch": 0.5411880454365409, "grad_norm": 0.8771001100540161, "learning_rate": 8.300798292624228e-05, "loss": 1.0307, "step": 84710 }, { "epoch": 0.5412519325862796, "grad_norm": 1.086178183555603, "learning_rate": 8.300421384985309e-05, "loss": 1.1992, "step": 84720 }, { "epoch": 0.5413158197360183, "grad_norm": 1.1887942552566528, "learning_rate": 8.300044444108288e-05, "loss": 0.7615, "step": 84730 }, { "epoch": 0.541379706885757, "grad_norm": 0.909010648727417, "learning_rate": 8.299667469996966e-05, "loss": 0.9202, "step": 84740 }, { "epoch": 0.5414435940354957, "grad_norm": 0.6186991930007935, "learning_rate": 8.299290462655138e-05, "loss": 0.7071, "step": 84750 }, { "epoch": 0.5415074811852344, "grad_norm": 0.7226212620735168, "learning_rate": 8.2989134220866e-05, "loss": 1.0302, "step": 84760 }, { "epoch": 0.5415713683349731, "grad_norm": 1.6351087093353271, "learning_rate": 8.298536348295152e-05, "loss": 0.7616, "step": 84770 }, { "epoch": 0.5416352554847118, "grad_norm": 2.3202216625213623, "learning_rate": 8.298159241284587e-05, "loss": 0.8703, "step": 84780 }, { "epoch": 0.5416991426344505, "grad_norm": 0.5504477620124817, "learning_rate": 8.297782101058706e-05, "loss": 1.0846, "step": 84790 }, { "epoch": 0.5417630297841892, "grad_norm": 0.846871554851532, "learning_rate": 8.297404927621306e-05, "loss": 0.9876, "step": 84800 }, { "epoch": 0.5418269169339279, "grad_norm": 0.9501508474349976, "learning_rate": 8.297027720976185e-05, "loss": 0.779, "step": 84810 }, { "epoch": 0.5418908040836666, "grad_norm": 0.6770570278167725, "learning_rate": 8.296650481127144e-05, "loss": 0.741, "step": 84820 }, { "epoch": 0.5419546912334053, "grad_norm": 1.0204015970230103, "learning_rate": 8.296273208077981e-05, "loss": 0.8651, "step": 84830 }, { "epoch": 0.542018578383144, "grad_norm": 1.2423951625823975, "learning_rate": 8.295895901832493e-05, "loss": 0.9325, "step": 84840 }, { "epoch": 0.5420824655328828, "grad_norm": 1.19427490234375, "learning_rate": 8.295518562394484e-05, "loss": 0.9283, "step": 84850 }, { "epoch": 0.5421463526826215, "grad_norm": 0.9197470545768738, "learning_rate": 8.29514118976775e-05, "loss": 0.7334, "step": 84860 }, { "epoch": 0.5422102398323602, "grad_norm": 1.0136433839797974, "learning_rate": 8.294763783956096e-05, "loss": 0.6607, "step": 84870 }, { "epoch": 0.5422741269820989, "grad_norm": 1.1331266164779663, "learning_rate": 8.294386344963319e-05, "loss": 0.6727, "step": 84880 }, { "epoch": 0.5423380141318376, "grad_norm": 1.1634505987167358, "learning_rate": 8.294008872793222e-05, "loss": 1.0549, "step": 84890 }, { "epoch": 0.5424019012815762, "grad_norm": 0.885592520236969, "learning_rate": 8.293631367449605e-05, "loss": 0.9175, "step": 84900 }, { "epoch": 0.5424657884313149, "grad_norm": 0.7307121753692627, "learning_rate": 8.293253828936271e-05, "loss": 0.8359, "step": 84910 }, { "epoch": 0.5425296755810536, "grad_norm": 1.0684562921524048, "learning_rate": 8.292876257257022e-05, "loss": 0.9552, "step": 84920 }, { "epoch": 0.5425935627307923, "grad_norm": 0.9303468465805054, "learning_rate": 8.29249865241566e-05, "loss": 0.8814, "step": 84930 }, { "epoch": 0.542657449880531, "grad_norm": 1.3686809539794922, "learning_rate": 8.292121014415987e-05, "loss": 1.0071, "step": 84940 }, { "epoch": 0.5427213370302697, "grad_norm": 0.9795172214508057, "learning_rate": 8.29174334326181e-05, "loss": 0.8684, "step": 84950 }, { "epoch": 0.5427852241800084, "grad_norm": 0.7319976687431335, "learning_rate": 8.29136563895693e-05, "loss": 0.8488, "step": 84960 }, { "epoch": 0.5428491113297471, "grad_norm": 0.7034667730331421, "learning_rate": 8.290987901505148e-05, "loss": 0.8596, "step": 84970 }, { "epoch": 0.5429129984794858, "grad_norm": 1.3945845365524292, "learning_rate": 8.290610130910272e-05, "loss": 0.759, "step": 84980 }, { "epoch": 0.5429768856292245, "grad_norm": 0.878729522228241, "learning_rate": 8.290232327176104e-05, "loss": 1.0464, "step": 84990 }, { "epoch": 0.5430407727789632, "grad_norm": 1.176857590675354, "learning_rate": 8.289854490306453e-05, "loss": 0.9618, "step": 85000 }, { "epoch": 0.5431046599287019, "grad_norm": 1.061789870262146, "learning_rate": 8.289476620305118e-05, "loss": 0.7039, "step": 85010 }, { "epoch": 0.5431685470784406, "grad_norm": 1.1933741569519043, "learning_rate": 8.289098717175909e-05, "loss": 0.9763, "step": 85020 }, { "epoch": 0.5432324342281794, "grad_norm": 1.391781210899353, "learning_rate": 8.28872078092263e-05, "loss": 1.044, "step": 85030 }, { "epoch": 0.5432963213779181, "grad_norm": 0.5679248571395874, "learning_rate": 8.288342811549088e-05, "loss": 1.1061, "step": 85040 }, { "epoch": 0.5433602085276568, "grad_norm": 0.892066240310669, "learning_rate": 8.28796480905909e-05, "loss": 0.8354, "step": 85050 }, { "epoch": 0.5434240956773955, "grad_norm": 0.6071507930755615, "learning_rate": 8.28758677345644e-05, "loss": 1.047, "step": 85060 }, { "epoch": 0.5434879828271342, "grad_norm": 0.8333146572113037, "learning_rate": 8.287208704744946e-05, "loss": 0.7995, "step": 85070 }, { "epoch": 0.5435518699768729, "grad_norm": 1.3047791719436646, "learning_rate": 8.28683060292842e-05, "loss": 1.1147, "step": 85080 }, { "epoch": 0.5436157571266116, "grad_norm": 0.8263481259346008, "learning_rate": 8.286452468010664e-05, "loss": 0.9913, "step": 85090 }, { "epoch": 0.5436796442763503, "grad_norm": 0.7588023543357849, "learning_rate": 8.28607429999549e-05, "loss": 0.8798, "step": 85100 }, { "epoch": 0.543743531426089, "grad_norm": 0.6401307582855225, "learning_rate": 8.285696098886704e-05, "loss": 1.1625, "step": 85110 }, { "epoch": 0.5438074185758277, "grad_norm": 1.0735725164413452, "learning_rate": 8.285317864688116e-05, "loss": 0.836, "step": 85120 }, { "epoch": 0.5438713057255664, "grad_norm": 0.8113425970077515, "learning_rate": 8.284939597403533e-05, "loss": 0.7685, "step": 85130 }, { "epoch": 0.543935192875305, "grad_norm": 0.911358654499054, "learning_rate": 8.28456129703677e-05, "loss": 0.9308, "step": 85140 }, { "epoch": 0.5439990800250437, "grad_norm": 1.186699390411377, "learning_rate": 8.284182963591631e-05, "loss": 0.8727, "step": 85150 }, { "epoch": 0.5440629671747824, "grad_norm": 1.335977554321289, "learning_rate": 8.283804597071928e-05, "loss": 1.0234, "step": 85160 }, { "epoch": 0.5441268543245211, "grad_norm": 1.1093186140060425, "learning_rate": 8.283426197481473e-05, "loss": 1.1973, "step": 85170 }, { "epoch": 0.5441907414742598, "grad_norm": 0.4914005994796753, "learning_rate": 8.283047764824075e-05, "loss": 0.9727, "step": 85180 }, { "epoch": 0.5442546286239985, "grad_norm": 0.5826841592788696, "learning_rate": 8.282669299103544e-05, "loss": 0.9037, "step": 85190 }, { "epoch": 0.5443185157737372, "grad_norm": 1.0560849905014038, "learning_rate": 8.282290800323697e-05, "loss": 1.0382, "step": 85200 }, { "epoch": 0.544382402923476, "grad_norm": 0.6486173272132874, "learning_rate": 8.28191226848834e-05, "loss": 0.913, "step": 85210 }, { "epoch": 0.5444462900732147, "grad_norm": 1.390495777130127, "learning_rate": 8.281533703601288e-05, "loss": 0.9475, "step": 85220 }, { "epoch": 0.5445101772229534, "grad_norm": 0.9394730925559998, "learning_rate": 8.28115510566635e-05, "loss": 0.8956, "step": 85230 }, { "epoch": 0.5445740643726921, "grad_norm": 1.8664871454238892, "learning_rate": 8.280776474687343e-05, "loss": 0.8971, "step": 85240 }, { "epoch": 0.5446379515224308, "grad_norm": 1.337372899055481, "learning_rate": 8.28039781066808e-05, "loss": 0.9751, "step": 85250 }, { "epoch": 0.5447018386721695, "grad_norm": 0.7601255178451538, "learning_rate": 8.280019113612371e-05, "loss": 0.8855, "step": 85260 }, { "epoch": 0.5447657258219082, "grad_norm": 0.9285007119178772, "learning_rate": 8.279640383524034e-05, "loss": 0.6376, "step": 85270 }, { "epoch": 0.5448296129716469, "grad_norm": 0.9922348260879517, "learning_rate": 8.279261620406881e-05, "loss": 1.2103, "step": 85280 }, { "epoch": 0.5448935001213856, "grad_norm": 1.2273433208465576, "learning_rate": 8.278882824264726e-05, "loss": 0.7563, "step": 85290 }, { "epoch": 0.5449573872711243, "grad_norm": 1.0333365201950073, "learning_rate": 8.278503995101383e-05, "loss": 1.0593, "step": 85300 }, { "epoch": 0.545021274420863, "grad_norm": 0.8431949615478516, "learning_rate": 8.278125132920669e-05, "loss": 1.0156, "step": 85310 }, { "epoch": 0.5450851615706017, "grad_norm": 0.8159672617912292, "learning_rate": 8.277746237726401e-05, "loss": 0.8822, "step": 85320 }, { "epoch": 0.5451490487203404, "grad_norm": 0.8089435696601868, "learning_rate": 8.27736730952239e-05, "loss": 0.9965, "step": 85330 }, { "epoch": 0.5452129358700791, "grad_norm": 1.1019364595413208, "learning_rate": 8.276988348312456e-05, "loss": 0.6972, "step": 85340 }, { "epoch": 0.5452768230198178, "grad_norm": 0.755990207195282, "learning_rate": 8.276609354100414e-05, "loss": 0.941, "step": 85350 }, { "epoch": 0.5453407101695565, "grad_norm": 1.2020833492279053, "learning_rate": 8.276230326890081e-05, "loss": 0.8955, "step": 85360 }, { "epoch": 0.5454045973192952, "grad_norm": 0.9152243733406067, "learning_rate": 8.275851266685276e-05, "loss": 0.9033, "step": 85370 }, { "epoch": 0.545468484469034, "grad_norm": 0.9686945080757141, "learning_rate": 8.275472173489814e-05, "loss": 0.967, "step": 85380 }, { "epoch": 0.5455323716187725, "grad_norm": 0.7973108887672424, "learning_rate": 8.275093047307511e-05, "loss": 0.9112, "step": 85390 }, { "epoch": 0.5455962587685113, "grad_norm": 0.8229728937149048, "learning_rate": 8.27471388814219e-05, "loss": 0.9667, "step": 85400 }, { "epoch": 0.54566014591825, "grad_norm": 2.0562517642974854, "learning_rate": 8.274334695997668e-05, "loss": 0.8901, "step": 85410 }, { "epoch": 0.5457240330679887, "grad_norm": 0.8347399830818176, "learning_rate": 8.273955470877762e-05, "loss": 0.6852, "step": 85420 }, { "epoch": 0.5457879202177274, "grad_norm": 0.8529654741287231, "learning_rate": 8.273576212786292e-05, "loss": 0.7934, "step": 85430 }, { "epoch": 0.5458518073674661, "grad_norm": 1.5173248052597046, "learning_rate": 8.273196921727075e-05, "loss": 1.0872, "step": 85440 }, { "epoch": 0.5459156945172048, "grad_norm": 0.9968129396438599, "learning_rate": 8.272817597703936e-05, "loss": 0.831, "step": 85450 }, { "epoch": 0.5459795816669435, "grad_norm": 0.980364978313446, "learning_rate": 8.272438240720692e-05, "loss": 0.993, "step": 85460 }, { "epoch": 0.5460434688166822, "grad_norm": 0.8081022500991821, "learning_rate": 8.272058850781164e-05, "loss": 0.8859, "step": 85470 }, { "epoch": 0.5461073559664209, "grad_norm": 0.5837751030921936, "learning_rate": 8.271679427889172e-05, "loss": 0.8914, "step": 85480 }, { "epoch": 0.5461712431161596, "grad_norm": 1.0083229541778564, "learning_rate": 8.271299972048538e-05, "loss": 1.0343, "step": 85490 }, { "epoch": 0.5462351302658983, "grad_norm": 1.3048707246780396, "learning_rate": 8.270920483263082e-05, "loss": 1.014, "step": 85500 }, { "epoch": 0.546299017415637, "grad_norm": 0.6506006121635437, "learning_rate": 8.27054096153663e-05, "loss": 0.941, "step": 85510 }, { "epoch": 0.5463629045653757, "grad_norm": 0.6416545510292053, "learning_rate": 8.270161406872998e-05, "loss": 1.053, "step": 85520 }, { "epoch": 0.5464267917151144, "grad_norm": 0.9509826302528381, "learning_rate": 8.269781819276015e-05, "loss": 1.2414, "step": 85530 }, { "epoch": 0.5464906788648531, "grad_norm": 0.8895853757858276, "learning_rate": 8.269402198749496e-05, "loss": 0.7903, "step": 85540 }, { "epoch": 0.5465545660145918, "grad_norm": 0.7762950658798218, "learning_rate": 8.269022545297272e-05, "loss": 1.1589, "step": 85550 }, { "epoch": 0.5466184531643306, "grad_norm": 1.1687923669815063, "learning_rate": 8.268642858923161e-05, "loss": 0.7478, "step": 85560 }, { "epoch": 0.5466823403140693, "grad_norm": 0.8369365930557251, "learning_rate": 8.268263139630989e-05, "loss": 0.7321, "step": 85570 }, { "epoch": 0.546746227463808, "grad_norm": 1.429382085800171, "learning_rate": 8.26788338742458e-05, "loss": 0.9934, "step": 85580 }, { "epoch": 0.5468101146135467, "grad_norm": 0.798319935798645, "learning_rate": 8.267503602307758e-05, "loss": 1.0856, "step": 85590 }, { "epoch": 0.5468740017632854, "grad_norm": 0.8369146585464478, "learning_rate": 8.267123784284348e-05, "loss": 1.1573, "step": 85600 }, { "epoch": 0.5469378889130241, "grad_norm": 0.8850108981132507, "learning_rate": 8.266743933358176e-05, "loss": 0.9553, "step": 85610 }, { "epoch": 0.5470017760627628, "grad_norm": 1.494284987449646, "learning_rate": 8.266364049533065e-05, "loss": 0.8582, "step": 85620 }, { "epoch": 0.5470656632125014, "grad_norm": 1.0392504930496216, "learning_rate": 8.265984132812843e-05, "loss": 0.9366, "step": 85630 }, { "epoch": 0.5471295503622401, "grad_norm": 1.1216447353363037, "learning_rate": 8.265604183201335e-05, "loss": 0.7129, "step": 85640 }, { "epoch": 0.5471934375119788, "grad_norm": 0.9301803708076477, "learning_rate": 8.265224200702368e-05, "loss": 0.6782, "step": 85650 }, { "epoch": 0.5472573246617175, "grad_norm": 0.8470297455787659, "learning_rate": 8.264844185319767e-05, "loss": 0.9429, "step": 85660 }, { "epoch": 0.5473212118114562, "grad_norm": 0.5491040945053101, "learning_rate": 8.264464137057361e-05, "loss": 0.896, "step": 85670 }, { "epoch": 0.5473850989611949, "grad_norm": 2.296058416366577, "learning_rate": 8.264084055918979e-05, "loss": 0.9733, "step": 85680 }, { "epoch": 0.5474489861109336, "grad_norm": 0.6744887828826904, "learning_rate": 8.263703941908445e-05, "loss": 0.8849, "step": 85690 }, { "epoch": 0.5475128732606723, "grad_norm": 0.819434404373169, "learning_rate": 8.26332379502959e-05, "loss": 0.9268, "step": 85700 }, { "epoch": 0.547576760410411, "grad_norm": 0.725577175617218, "learning_rate": 8.26294361528624e-05, "loss": 0.7676, "step": 85710 }, { "epoch": 0.5476406475601497, "grad_norm": 0.8266330361366272, "learning_rate": 8.262563402682226e-05, "loss": 0.7853, "step": 85720 }, { "epoch": 0.5477045347098884, "grad_norm": 0.8885651230812073, "learning_rate": 8.262183157221375e-05, "loss": 0.9026, "step": 85730 }, { "epoch": 0.5477684218596272, "grad_norm": 0.5924326777458191, "learning_rate": 8.261802878907518e-05, "loss": 0.8313, "step": 85740 }, { "epoch": 0.5478323090093659, "grad_norm": 0.5639459490776062, "learning_rate": 8.261422567744484e-05, "loss": 0.8063, "step": 85750 }, { "epoch": 0.5478961961591046, "grad_norm": 1.0976332426071167, "learning_rate": 8.261042223736101e-05, "loss": 0.7802, "step": 85760 }, { "epoch": 0.5479600833088433, "grad_norm": 0.8617295622825623, "learning_rate": 8.260661846886205e-05, "loss": 0.8656, "step": 85770 }, { "epoch": 0.548023970458582, "grad_norm": 0.8581666946411133, "learning_rate": 8.260281437198622e-05, "loss": 0.84, "step": 85780 }, { "epoch": 0.5480878576083207, "grad_norm": 1.351036548614502, "learning_rate": 8.259900994677185e-05, "loss": 0.7633, "step": 85790 }, { "epoch": 0.5481517447580594, "grad_norm": 0.803022563457489, "learning_rate": 8.259520519325725e-05, "loss": 0.8321, "step": 85800 }, { "epoch": 0.5482156319077981, "grad_norm": 1.7103533744812012, "learning_rate": 8.259140011148073e-05, "loss": 1.2177, "step": 85810 }, { "epoch": 0.5482795190575368, "grad_norm": 0.7438388466835022, "learning_rate": 8.258759470148061e-05, "loss": 0.7835, "step": 85820 }, { "epoch": 0.5483434062072755, "grad_norm": 0.5970612168312073, "learning_rate": 8.258378896329521e-05, "loss": 0.8907, "step": 85830 }, { "epoch": 0.5484072933570142, "grad_norm": 1.2347373962402344, "learning_rate": 8.257998289696289e-05, "loss": 0.7738, "step": 85840 }, { "epoch": 0.5484711805067529, "grad_norm": 0.8404068350791931, "learning_rate": 8.257617650252194e-05, "loss": 0.9716, "step": 85850 }, { "epoch": 0.5485350676564916, "grad_norm": 0.6082412600517273, "learning_rate": 8.257236978001071e-05, "loss": 0.8301, "step": 85860 }, { "epoch": 0.5485989548062302, "grad_norm": 1.7358567714691162, "learning_rate": 8.256856272946756e-05, "loss": 0.8836, "step": 85870 }, { "epoch": 0.5486628419559689, "grad_norm": 1.0207961797714233, "learning_rate": 8.256475535093077e-05, "loss": 0.8748, "step": 85880 }, { "epoch": 0.5487267291057076, "grad_norm": 0.6568586826324463, "learning_rate": 8.256094764443876e-05, "loss": 0.8192, "step": 85890 }, { "epoch": 0.5487906162554463, "grad_norm": 1.5999755859375, "learning_rate": 8.255713961002981e-05, "loss": 0.8331, "step": 85900 }, { "epoch": 0.548854503405185, "grad_norm": 0.7200731635093689, "learning_rate": 8.255333124774231e-05, "loss": 0.9032, "step": 85910 }, { "epoch": 0.5489183905549238, "grad_norm": 0.9791352152824402, "learning_rate": 8.254952255761458e-05, "loss": 1.2814, "step": 85920 }, { "epoch": 0.5489822777046625, "grad_norm": 0.7664794921875, "learning_rate": 8.254571353968504e-05, "loss": 0.9206, "step": 85930 }, { "epoch": 0.5490461648544012, "grad_norm": 0.8306074738502502, "learning_rate": 8.254190419399197e-05, "loss": 1.034, "step": 85940 }, { "epoch": 0.5491100520041399, "grad_norm": 0.9220788478851318, "learning_rate": 8.25380945205738e-05, "loss": 1.0287, "step": 85950 }, { "epoch": 0.5491739391538786, "grad_norm": 0.8465439081192017, "learning_rate": 8.253428451946885e-05, "loss": 0.6528, "step": 85960 }, { "epoch": 0.5492378263036173, "grad_norm": 1.0008618831634521, "learning_rate": 8.253047419071551e-05, "loss": 1.0279, "step": 85970 }, { "epoch": 0.549301713453356, "grad_norm": 1.0628058910369873, "learning_rate": 8.252666353435217e-05, "loss": 1.0874, "step": 85980 }, { "epoch": 0.5493656006030947, "grad_norm": 0.6217834949493408, "learning_rate": 8.252285255041717e-05, "loss": 0.9456, "step": 85990 }, { "epoch": 0.5494294877528334, "grad_norm": 0.46333763003349304, "learning_rate": 8.251904123894892e-05, "loss": 0.8225, "step": 86000 }, { "epoch": 0.5494933749025721, "grad_norm": 0.8038020730018616, "learning_rate": 8.251522959998577e-05, "loss": 0.8062, "step": 86010 }, { "epoch": 0.5495572620523108, "grad_norm": 1.191167950630188, "learning_rate": 8.251141763356614e-05, "loss": 0.8599, "step": 86020 }, { "epoch": 0.5496211492020495, "grad_norm": 0.8158173561096191, "learning_rate": 8.25076053397284e-05, "loss": 0.9257, "step": 86030 }, { "epoch": 0.5496850363517882, "grad_norm": 0.7656988501548767, "learning_rate": 8.250379271851098e-05, "loss": 0.7153, "step": 86040 }, { "epoch": 0.5497489235015269, "grad_norm": 0.7358280420303345, "learning_rate": 8.249997976995223e-05, "loss": 1.0252, "step": 86050 }, { "epoch": 0.5498128106512656, "grad_norm": 1.4587607383728027, "learning_rate": 8.249616649409057e-05, "loss": 0.8815, "step": 86060 }, { "epoch": 0.5498766978010043, "grad_norm": 0.657911479473114, "learning_rate": 8.24923528909644e-05, "loss": 1.1022, "step": 86070 }, { "epoch": 0.549940584950743, "grad_norm": 0.8566390872001648, "learning_rate": 8.248853896061213e-05, "loss": 0.7702, "step": 86080 }, { "epoch": 0.5500044721004818, "grad_norm": 0.9400178790092468, "learning_rate": 8.248472470307216e-05, "loss": 0.811, "step": 86090 }, { "epoch": 0.5500683592502205, "grad_norm": 1.0126426219940186, "learning_rate": 8.24809101183829e-05, "loss": 0.8001, "step": 86100 }, { "epoch": 0.5501322463999591, "grad_norm": 1.513357162475586, "learning_rate": 8.24770952065828e-05, "loss": 0.9136, "step": 86110 }, { "epoch": 0.5501961335496978, "grad_norm": 1.1465846300125122, "learning_rate": 8.247327996771024e-05, "loss": 0.8718, "step": 86120 }, { "epoch": 0.5502600206994365, "grad_norm": 0.9992212057113647, "learning_rate": 8.246946440180365e-05, "loss": 0.7195, "step": 86130 }, { "epoch": 0.5503239078491752, "grad_norm": 0.9585177302360535, "learning_rate": 8.246564850890148e-05, "loss": 0.8262, "step": 86140 }, { "epoch": 0.5503877949989139, "grad_norm": 0.9354924559593201, "learning_rate": 8.246183228904212e-05, "loss": 1.3068, "step": 86150 }, { "epoch": 0.5504516821486526, "grad_norm": 0.7876535058021545, "learning_rate": 8.245801574226403e-05, "loss": 0.9991, "step": 86160 }, { "epoch": 0.5505155692983913, "grad_norm": 1.089166522026062, "learning_rate": 8.245419886860566e-05, "loss": 0.8906, "step": 86170 }, { "epoch": 0.55057945644813, "grad_norm": 2.4468488693237305, "learning_rate": 8.245038166810543e-05, "loss": 0.9857, "step": 86180 }, { "epoch": 0.5506433435978687, "grad_norm": 0.6112414598464966, "learning_rate": 8.244656414080176e-05, "loss": 0.8398, "step": 86190 }, { "epoch": 0.5507072307476074, "grad_norm": 0.5781684517860413, "learning_rate": 8.244274628673314e-05, "loss": 0.8127, "step": 86200 }, { "epoch": 0.5507711178973461, "grad_norm": 1.1223702430725098, "learning_rate": 8.243892810593798e-05, "loss": 0.8847, "step": 86210 }, { "epoch": 0.5508350050470848, "grad_norm": 0.6717120409011841, "learning_rate": 8.243510959845478e-05, "loss": 0.8302, "step": 86220 }, { "epoch": 0.5508988921968235, "grad_norm": 0.8732984662055969, "learning_rate": 8.243129076432193e-05, "loss": 0.6463, "step": 86230 }, { "epoch": 0.5509627793465622, "grad_norm": 0.8502248525619507, "learning_rate": 8.242747160357796e-05, "loss": 0.8256, "step": 86240 }, { "epoch": 0.5510266664963009, "grad_norm": 0.9782050251960754, "learning_rate": 8.242365211626127e-05, "loss": 0.9004, "step": 86250 }, { "epoch": 0.5510905536460396, "grad_norm": 0.48341086506843567, "learning_rate": 8.241983230241037e-05, "loss": 0.7873, "step": 86260 }, { "epoch": 0.5511544407957784, "grad_norm": 1.0804799795150757, "learning_rate": 8.241601216206369e-05, "loss": 1.1629, "step": 86270 }, { "epoch": 0.5512183279455171, "grad_norm": 0.8438146114349365, "learning_rate": 8.241219169525973e-05, "loss": 0.7865, "step": 86280 }, { "epoch": 0.5512822150952558, "grad_norm": 0.9812383055686951, "learning_rate": 8.240837090203696e-05, "loss": 0.8253, "step": 86290 }, { "epoch": 0.5513461022449945, "grad_norm": 1.204162836074829, "learning_rate": 8.240454978243387e-05, "loss": 1.0205, "step": 86300 }, { "epoch": 0.5514099893947332, "grad_norm": 1.9019670486450195, "learning_rate": 8.240072833648894e-05, "loss": 0.9302, "step": 86310 }, { "epoch": 0.5514738765444719, "grad_norm": 0.8653010129928589, "learning_rate": 8.239690656424062e-05, "loss": 0.7381, "step": 86320 }, { "epoch": 0.5515377636942106, "grad_norm": 0.9398866891860962, "learning_rate": 8.239308446572742e-05, "loss": 0.9952, "step": 86330 }, { "epoch": 0.5516016508439493, "grad_norm": 1.2343336343765259, "learning_rate": 8.238926204098787e-05, "loss": 0.9225, "step": 86340 }, { "epoch": 0.551665537993688, "grad_norm": 0.6204321384429932, "learning_rate": 8.23854392900604e-05, "loss": 0.8943, "step": 86350 }, { "epoch": 0.5517294251434266, "grad_norm": 1.226536512374878, "learning_rate": 8.238161621298355e-05, "loss": 0.7769, "step": 86360 }, { "epoch": 0.5517933122931653, "grad_norm": 1.1433632373809814, "learning_rate": 8.23777928097958e-05, "loss": 0.9487, "step": 86370 }, { "epoch": 0.551857199442904, "grad_norm": 1.344122052192688, "learning_rate": 8.237396908053567e-05, "loss": 0.7983, "step": 86380 }, { "epoch": 0.5519210865926427, "grad_norm": 0.6882800459861755, "learning_rate": 8.237014502524168e-05, "loss": 0.9736, "step": 86390 }, { "epoch": 0.5519849737423814, "grad_norm": 0.9313778877258301, "learning_rate": 8.236632064395231e-05, "loss": 0.7895, "step": 86400 }, { "epoch": 0.5520488608921201, "grad_norm": 1.4271520376205444, "learning_rate": 8.236249593670609e-05, "loss": 0.9795, "step": 86410 }, { "epoch": 0.5521127480418588, "grad_norm": 2.3434298038482666, "learning_rate": 8.235867090354153e-05, "loss": 0.9954, "step": 86420 }, { "epoch": 0.5521766351915975, "grad_norm": 3.081796169281006, "learning_rate": 8.235484554449718e-05, "loss": 0.8829, "step": 86430 }, { "epoch": 0.5522405223413362, "grad_norm": 1.0064252614974976, "learning_rate": 8.235101985961154e-05, "loss": 0.8981, "step": 86440 }, { "epoch": 0.552304409491075, "grad_norm": 1.3928056955337524, "learning_rate": 8.234719384892314e-05, "loss": 1.0775, "step": 86450 }, { "epoch": 0.5523682966408137, "grad_norm": 1.0833989381790161, "learning_rate": 8.23433675124705e-05, "loss": 0.782, "step": 86460 }, { "epoch": 0.5524321837905524, "grad_norm": 0.7659380435943604, "learning_rate": 8.233954085029219e-05, "loss": 0.9672, "step": 86470 }, { "epoch": 0.5524960709402911, "grad_norm": 0.788266658782959, "learning_rate": 8.23357138624267e-05, "loss": 0.7397, "step": 86480 }, { "epoch": 0.5525599580900298, "grad_norm": 0.8975238800048828, "learning_rate": 8.233188654891262e-05, "loss": 0.9158, "step": 86490 }, { "epoch": 0.5526238452397685, "grad_norm": 1.3429840803146362, "learning_rate": 8.232805890978845e-05, "loss": 1.0506, "step": 86500 }, { "epoch": 0.5526877323895072, "grad_norm": 1.5317329168319702, "learning_rate": 8.232423094509278e-05, "loss": 0.9425, "step": 86510 }, { "epoch": 0.5527516195392459, "grad_norm": 0.8036388754844666, "learning_rate": 8.232040265486413e-05, "loss": 0.9453, "step": 86520 }, { "epoch": 0.5528155066889846, "grad_norm": 0.8902758359909058, "learning_rate": 8.231657403914107e-05, "loss": 0.7362, "step": 86530 }, { "epoch": 0.5528793938387233, "grad_norm": 2.19069242477417, "learning_rate": 8.231274509796215e-05, "loss": 0.9963, "step": 86540 }, { "epoch": 0.552943280988462, "grad_norm": 0.8316643238067627, "learning_rate": 8.230891583136593e-05, "loss": 0.9057, "step": 86550 }, { "epoch": 0.5530071681382007, "grad_norm": 1.3300632238388062, "learning_rate": 8.230508623939097e-05, "loss": 0.9182, "step": 86560 }, { "epoch": 0.5530710552879394, "grad_norm": 0.6165077686309814, "learning_rate": 8.230125632207585e-05, "loss": 0.7956, "step": 86570 }, { "epoch": 0.5531349424376781, "grad_norm": 0.7122068405151367, "learning_rate": 8.229742607945915e-05, "loss": 0.8135, "step": 86580 }, { "epoch": 0.5531988295874168, "grad_norm": 1.0047301054000854, "learning_rate": 8.229359551157941e-05, "loss": 0.7511, "step": 86590 }, { "epoch": 0.5532627167371554, "grad_norm": 0.6250356435775757, "learning_rate": 8.228976461847522e-05, "loss": 0.9118, "step": 86600 }, { "epoch": 0.5533266038868941, "grad_norm": 0.7042723894119263, "learning_rate": 8.228593340018518e-05, "loss": 1.1653, "step": 86610 }, { "epoch": 0.5533904910366328, "grad_norm": 1.037340760231018, "learning_rate": 8.228210185674784e-05, "loss": 0.9482, "step": 86620 }, { "epoch": 0.5534543781863716, "grad_norm": 0.9349649548530579, "learning_rate": 8.227826998820183e-05, "loss": 0.7801, "step": 86630 }, { "epoch": 0.5535182653361103, "grad_norm": 0.9645569920539856, "learning_rate": 8.227443779458572e-05, "loss": 0.8324, "step": 86640 }, { "epoch": 0.553582152485849, "grad_norm": 0.991255521774292, "learning_rate": 8.227060527593808e-05, "loss": 0.8347, "step": 86650 }, { "epoch": 0.5536460396355877, "grad_norm": 0.6694484353065491, "learning_rate": 8.226677243229753e-05, "loss": 0.8864, "step": 86660 }, { "epoch": 0.5537099267853264, "grad_norm": 0.9695293307304382, "learning_rate": 8.226293926370268e-05, "loss": 0.8119, "step": 86670 }, { "epoch": 0.5537738139350651, "grad_norm": 0.9662237763404846, "learning_rate": 8.22591057701921e-05, "loss": 1.2784, "step": 86680 }, { "epoch": 0.5538377010848038, "grad_norm": 0.8255831003189087, "learning_rate": 8.225527195180442e-05, "loss": 0.7241, "step": 86690 }, { "epoch": 0.5539015882345425, "grad_norm": 1.059599757194519, "learning_rate": 8.225143780857827e-05, "loss": 0.9431, "step": 86700 }, { "epoch": 0.5539654753842812, "grad_norm": 0.7622451186180115, "learning_rate": 8.224760334055222e-05, "loss": 0.8522, "step": 86710 }, { "epoch": 0.5540293625340199, "grad_norm": 0.5998595952987671, "learning_rate": 8.22437685477649e-05, "loss": 0.7598, "step": 86720 }, { "epoch": 0.5540932496837586, "grad_norm": 0.9817114472389221, "learning_rate": 8.223993343025496e-05, "loss": 0.8681, "step": 86730 }, { "epoch": 0.5541571368334973, "grad_norm": 0.8455765843391418, "learning_rate": 8.223609798806097e-05, "loss": 1.0724, "step": 86740 }, { "epoch": 0.554221023983236, "grad_norm": 0.9038847088813782, "learning_rate": 8.22322622212216e-05, "loss": 0.8695, "step": 86750 }, { "epoch": 0.5542849111329747, "grad_norm": 1.3185269832611084, "learning_rate": 8.222842612977545e-05, "loss": 0.8196, "step": 86760 }, { "epoch": 0.5543487982827134, "grad_norm": 1.6181766986846924, "learning_rate": 8.22245897137612e-05, "loss": 0.9941, "step": 86770 }, { "epoch": 0.5544126854324521, "grad_norm": 0.7883678674697876, "learning_rate": 8.222075297321742e-05, "loss": 0.9108, "step": 86780 }, { "epoch": 0.5544765725821909, "grad_norm": 1.2159056663513184, "learning_rate": 8.221691590818281e-05, "loss": 0.8442, "step": 86790 }, { "epoch": 0.5545404597319296, "grad_norm": 0.6376563310623169, "learning_rate": 8.221307851869597e-05, "loss": 0.8837, "step": 86800 }, { "epoch": 0.5546043468816683, "grad_norm": 0.8084425926208496, "learning_rate": 8.220924080479558e-05, "loss": 0.9176, "step": 86810 }, { "epoch": 0.554668234031407, "grad_norm": 0.9043588042259216, "learning_rate": 8.220540276652024e-05, "loss": 1.3496, "step": 86820 }, { "epoch": 0.5547321211811457, "grad_norm": 0.6929380893707275, "learning_rate": 8.220156440390865e-05, "loss": 0.8788, "step": 86830 }, { "epoch": 0.5547960083308843, "grad_norm": 0.8745282888412476, "learning_rate": 8.219772571699945e-05, "loss": 0.6351, "step": 86840 }, { "epoch": 0.554859895480623, "grad_norm": 0.9505758285522461, "learning_rate": 8.21938867058313e-05, "loss": 0.7184, "step": 86850 }, { "epoch": 0.5549237826303617, "grad_norm": 1.9422472715377808, "learning_rate": 8.219004737044285e-05, "loss": 0.915, "step": 86860 }, { "epoch": 0.5549876697801004, "grad_norm": 1.590299367904663, "learning_rate": 8.218620771087277e-05, "loss": 0.8802, "step": 86870 }, { "epoch": 0.5550515569298391, "grad_norm": 0.7018561959266663, "learning_rate": 8.218236772715972e-05, "loss": 0.8559, "step": 86880 }, { "epoch": 0.5551154440795778, "grad_norm": 1.1416157484054565, "learning_rate": 8.217852741934242e-05, "loss": 0.792, "step": 86890 }, { "epoch": 0.5551793312293165, "grad_norm": 0.9537761807441711, "learning_rate": 8.21746867874595e-05, "loss": 1.0657, "step": 86900 }, { "epoch": 0.5552432183790552, "grad_norm": 2.785801887512207, "learning_rate": 8.217084583154964e-05, "loss": 1.0421, "step": 86910 }, { "epoch": 0.5553071055287939, "grad_norm": 1.0733259916305542, "learning_rate": 8.216700455165152e-05, "loss": 1.1064, "step": 86920 }, { "epoch": 0.5553709926785326, "grad_norm": 0.9249892830848694, "learning_rate": 8.216316294780386e-05, "loss": 0.6912, "step": 86930 }, { "epoch": 0.5554348798282713, "grad_norm": 1.2269346714019775, "learning_rate": 8.215932102004531e-05, "loss": 0.8911, "step": 86940 }, { "epoch": 0.55549876697801, "grad_norm": 1.0599247217178345, "learning_rate": 8.215547876841459e-05, "loss": 0.9636, "step": 86950 }, { "epoch": 0.5555626541277487, "grad_norm": 1.5449506044387817, "learning_rate": 8.215163619295036e-05, "loss": 0.9123, "step": 86960 }, { "epoch": 0.5556265412774875, "grad_norm": 0.9864984154701233, "learning_rate": 8.214779329369134e-05, "loss": 0.7596, "step": 86970 }, { "epoch": 0.5556904284272262, "grad_norm": 0.9634939432144165, "learning_rate": 8.214395007067624e-05, "loss": 0.9034, "step": 86980 }, { "epoch": 0.5557543155769649, "grad_norm": 1.1555999517440796, "learning_rate": 8.214010652394376e-05, "loss": 0.7741, "step": 86990 }, { "epoch": 0.5558182027267036, "grad_norm": 1.121670126914978, "learning_rate": 8.213626265353259e-05, "loss": 0.926, "step": 87000 }, { "epoch": 0.5558820898764423, "grad_norm": 2.0978503227233887, "learning_rate": 8.213241845948145e-05, "loss": 0.7041, "step": 87010 }, { "epoch": 0.555945977026181, "grad_norm": 0.9516173601150513, "learning_rate": 8.212857394182906e-05, "loss": 1.0637, "step": 87020 }, { "epoch": 0.5560098641759197, "grad_norm": 1.092417597770691, "learning_rate": 8.212472910061415e-05, "loss": 0.8465, "step": 87030 }, { "epoch": 0.5560737513256584, "grad_norm": 0.8060597777366638, "learning_rate": 8.212088393587543e-05, "loss": 1.2128, "step": 87040 }, { "epoch": 0.5561376384753971, "grad_norm": 0.8627819418907166, "learning_rate": 8.21170384476516e-05, "loss": 0.9167, "step": 87050 }, { "epoch": 0.5562015256251358, "grad_norm": 0.9988081455230713, "learning_rate": 8.211319263598142e-05, "loss": 1.0736, "step": 87060 }, { "epoch": 0.5562654127748745, "grad_norm": 0.8938575387001038, "learning_rate": 8.210934650090361e-05, "loss": 0.9065, "step": 87070 }, { "epoch": 0.5563292999246132, "grad_norm": 0.968144953250885, "learning_rate": 8.210550004245688e-05, "loss": 1.1044, "step": 87080 }, { "epoch": 0.5563931870743518, "grad_norm": 0.6786410212516785, "learning_rate": 8.210165326068001e-05, "loss": 0.849, "step": 87090 }, { "epoch": 0.5564570742240905, "grad_norm": 0.6535912156105042, "learning_rate": 8.209780615561172e-05, "loss": 0.942, "step": 87100 }, { "epoch": 0.5565209613738292, "grad_norm": 0.8034776449203491, "learning_rate": 8.209395872729074e-05, "loss": 0.8953, "step": 87110 }, { "epoch": 0.5565848485235679, "grad_norm": 1.0414639711380005, "learning_rate": 8.209011097575584e-05, "loss": 1.1798, "step": 87120 }, { "epoch": 0.5566487356733066, "grad_norm": 0.8047749996185303, "learning_rate": 8.208626290104577e-05, "loss": 0.9456, "step": 87130 }, { "epoch": 0.5567126228230453, "grad_norm": 0.904681384563446, "learning_rate": 8.208241450319925e-05, "loss": 0.8932, "step": 87140 }, { "epoch": 0.556776509972784, "grad_norm": 0.9343833327293396, "learning_rate": 8.207856578225508e-05, "loss": 0.9492, "step": 87150 }, { "epoch": 0.5568403971225228, "grad_norm": 0.9821017384529114, "learning_rate": 8.207471673825199e-05, "loss": 0.8225, "step": 87160 }, { "epoch": 0.5569042842722615, "grad_norm": 0.9593321681022644, "learning_rate": 8.207086737122876e-05, "loss": 1.0382, "step": 87170 }, { "epoch": 0.5569681714220002, "grad_norm": 1.1232051849365234, "learning_rate": 8.206701768122415e-05, "loss": 0.8715, "step": 87180 }, { "epoch": 0.5570320585717389, "grad_norm": 0.7729601860046387, "learning_rate": 8.206316766827692e-05, "loss": 0.9942, "step": 87190 }, { "epoch": 0.5570959457214776, "grad_norm": 1.0262385606765747, "learning_rate": 8.205931733242586e-05, "loss": 1.0279, "step": 87200 }, { "epoch": 0.5571598328712163, "grad_norm": 0.9253116846084595, "learning_rate": 8.205546667370975e-05, "loss": 1.1048, "step": 87210 }, { "epoch": 0.557223720020955, "grad_norm": 1.2374264001846313, "learning_rate": 8.205161569216735e-05, "loss": 0.7685, "step": 87220 }, { "epoch": 0.5572876071706937, "grad_norm": 0.7475976347923279, "learning_rate": 8.204776438783745e-05, "loss": 0.9137, "step": 87230 }, { "epoch": 0.5573514943204324, "grad_norm": 0.5616995096206665, "learning_rate": 8.204391276075882e-05, "loss": 1.0628, "step": 87240 }, { "epoch": 0.5574153814701711, "grad_norm": 0.816423773765564, "learning_rate": 8.20400608109703e-05, "loss": 0.9533, "step": 87250 }, { "epoch": 0.5574792686199098, "grad_norm": 1.1088494062423706, "learning_rate": 8.203620853851062e-05, "loss": 1.115, "step": 87260 }, { "epoch": 0.5575431557696485, "grad_norm": 0.9146358370780945, "learning_rate": 8.203235594341862e-05, "loss": 0.8036, "step": 87270 }, { "epoch": 0.5576070429193872, "grad_norm": 0.7417098879814148, "learning_rate": 8.202850302573308e-05, "loss": 0.8677, "step": 87280 }, { "epoch": 0.5576709300691259, "grad_norm": 1.2907207012176514, "learning_rate": 8.202464978549281e-05, "loss": 0.8702, "step": 87290 }, { "epoch": 0.5577348172188646, "grad_norm": 0.6193691492080688, "learning_rate": 8.202079622273662e-05, "loss": 0.6505, "step": 87300 }, { "epoch": 0.5577987043686033, "grad_norm": 0.8418506979942322, "learning_rate": 8.20169423375033e-05, "loss": 1.0309, "step": 87310 }, { "epoch": 0.557862591518342, "grad_norm": 0.8040059208869934, "learning_rate": 8.201308812983165e-05, "loss": 0.8816, "step": 87320 }, { "epoch": 0.5579264786680806, "grad_norm": 0.9239259958267212, "learning_rate": 8.200923359976055e-05, "loss": 1.2638, "step": 87330 }, { "epoch": 0.5579903658178194, "grad_norm": 1.0745997428894043, "learning_rate": 8.200537874732876e-05, "loss": 0.839, "step": 87340 }, { "epoch": 0.5580542529675581, "grad_norm": 0.9547412395477295, "learning_rate": 8.200152357257511e-05, "loss": 1.0117, "step": 87350 }, { "epoch": 0.5581181401172968, "grad_norm": 0.6735034584999084, "learning_rate": 8.199766807553843e-05, "loss": 1.0254, "step": 87360 }, { "epoch": 0.5581820272670355, "grad_norm": 0.9394139051437378, "learning_rate": 8.199381225625755e-05, "loss": 0.9061, "step": 87370 }, { "epoch": 0.5582459144167742, "grad_norm": 0.6589183211326599, "learning_rate": 8.198995611477132e-05, "loss": 0.9975, "step": 87380 }, { "epoch": 0.5583098015665129, "grad_norm": 0.8695818185806274, "learning_rate": 8.198609965111854e-05, "loss": 0.8602, "step": 87390 }, { "epoch": 0.5583736887162516, "grad_norm": 0.8587558269500732, "learning_rate": 8.198224286533807e-05, "loss": 1.041, "step": 87400 }, { "epoch": 0.5584375758659903, "grad_norm": 0.8694300651550293, "learning_rate": 8.197838575746874e-05, "loss": 1.0423, "step": 87410 }, { "epoch": 0.558501463015729, "grad_norm": 1.0709831714630127, "learning_rate": 8.197452832754939e-05, "loss": 1.1133, "step": 87420 }, { "epoch": 0.5585653501654677, "grad_norm": 0.8248537182807922, "learning_rate": 8.19706705756189e-05, "loss": 0.8726, "step": 87430 }, { "epoch": 0.5586292373152064, "grad_norm": 2.853898525238037, "learning_rate": 8.196681250171609e-05, "loss": 0.9757, "step": 87440 }, { "epoch": 0.5586931244649451, "grad_norm": 2.0060508251190186, "learning_rate": 8.196295410587982e-05, "loss": 1.1241, "step": 87450 }, { "epoch": 0.5587570116146838, "grad_norm": 1.0533684492111206, "learning_rate": 8.195909538814895e-05, "loss": 0.746, "step": 87460 }, { "epoch": 0.5588208987644225, "grad_norm": 0.6444224715232849, "learning_rate": 8.195523634856234e-05, "loss": 0.8998, "step": 87470 }, { "epoch": 0.5588847859141612, "grad_norm": 0.8431418538093567, "learning_rate": 8.195137698715887e-05, "loss": 0.9075, "step": 87480 }, { "epoch": 0.5589486730639, "grad_norm": 5.22327995300293, "learning_rate": 8.194751730397738e-05, "loss": 0.8413, "step": 87490 }, { "epoch": 0.5590125602136387, "grad_norm": 0.5308577418327332, "learning_rate": 8.194365729905675e-05, "loss": 0.7192, "step": 87500 }, { "epoch": 0.5590764473633774, "grad_norm": 1.6924850940704346, "learning_rate": 8.193979697243586e-05, "loss": 0.9143, "step": 87510 }, { "epoch": 0.5591403345131161, "grad_norm": 0.574246883392334, "learning_rate": 8.193593632415358e-05, "loss": 0.9482, "step": 87520 }, { "epoch": 0.5592042216628548, "grad_norm": 0.572245180606842, "learning_rate": 8.19320753542488e-05, "loss": 0.6802, "step": 87530 }, { "epoch": 0.5592681088125935, "grad_norm": 1.4606465101242065, "learning_rate": 8.192821406276039e-05, "loss": 1.3266, "step": 87540 }, { "epoch": 0.5593319959623322, "grad_norm": 0.8305387496948242, "learning_rate": 8.192435244972725e-05, "loss": 0.8321, "step": 87550 }, { "epoch": 0.5593958831120709, "grad_norm": 0.7311245799064636, "learning_rate": 8.192049051518826e-05, "loss": 0.8271, "step": 87560 }, { "epoch": 0.5594597702618095, "grad_norm": 0.702063798904419, "learning_rate": 8.19166282591823e-05, "loss": 0.9991, "step": 87570 }, { "epoch": 0.5595236574115482, "grad_norm": 0.699548602104187, "learning_rate": 8.19127656817483e-05, "loss": 0.9318, "step": 87580 }, { "epoch": 0.5595875445612869, "grad_norm": 1.65463387966156, "learning_rate": 8.190890278292513e-05, "loss": 1.1159, "step": 87590 }, { "epoch": 0.5596514317110256, "grad_norm": 0.7709631323814392, "learning_rate": 8.190503956275171e-05, "loss": 0.9198, "step": 87600 }, { "epoch": 0.5597153188607643, "grad_norm": 0.7824264168739319, "learning_rate": 8.190117602126694e-05, "loss": 0.7632, "step": 87610 }, { "epoch": 0.559779206010503, "grad_norm": 0.6739460825920105, "learning_rate": 8.189731215850973e-05, "loss": 1.0088, "step": 87620 }, { "epoch": 0.5598430931602417, "grad_norm": 0.6374611258506775, "learning_rate": 8.189344797451898e-05, "loss": 0.8637, "step": 87630 }, { "epoch": 0.5599069803099804, "grad_norm": 0.7015722990036011, "learning_rate": 8.188958346933361e-05, "loss": 0.7372, "step": 87640 }, { "epoch": 0.5599708674597191, "grad_norm": 0.9866139888763428, "learning_rate": 8.188571864299257e-05, "loss": 0.8335, "step": 87650 }, { "epoch": 0.5600347546094578, "grad_norm": 1.009989619255066, "learning_rate": 8.188185349553474e-05, "loss": 0.8123, "step": 87660 }, { "epoch": 0.5600986417591965, "grad_norm": 0.5342085957527161, "learning_rate": 8.187798802699909e-05, "loss": 0.772, "step": 87670 }, { "epoch": 0.5601625289089353, "grad_norm": 0.8871537446975708, "learning_rate": 8.18741222374245e-05, "loss": 0.8139, "step": 87680 }, { "epoch": 0.560226416058674, "grad_norm": 0.8497464656829834, "learning_rate": 8.187025612684993e-05, "loss": 0.8549, "step": 87690 }, { "epoch": 0.5602903032084127, "grad_norm": 1.7199604511260986, "learning_rate": 8.18663896953143e-05, "loss": 1.0503, "step": 87700 }, { "epoch": 0.5603541903581514, "grad_norm": 0.686752200126648, "learning_rate": 8.186252294285656e-05, "loss": 0.7221, "step": 87710 }, { "epoch": 0.5604180775078901, "grad_norm": 0.6970003247261047, "learning_rate": 8.185865586951567e-05, "loss": 0.6405, "step": 87720 }, { "epoch": 0.5604819646576288, "grad_norm": 0.8833878040313721, "learning_rate": 8.185478847533052e-05, "loss": 0.742, "step": 87730 }, { "epoch": 0.5605458518073675, "grad_norm": 2.5486907958984375, "learning_rate": 8.185092076034012e-05, "loss": 0.7818, "step": 87740 }, { "epoch": 0.5606097389571062, "grad_norm": 0.5542274713516235, "learning_rate": 8.184705272458338e-05, "loss": 0.8269, "step": 87750 }, { "epoch": 0.5606736261068449, "grad_norm": 0.9123031497001648, "learning_rate": 8.184318436809927e-05, "loss": 0.9681, "step": 87760 }, { "epoch": 0.5607375132565836, "grad_norm": 1.2762703895568848, "learning_rate": 8.183931569092676e-05, "loss": 0.992, "step": 87770 }, { "epoch": 0.5608014004063223, "grad_norm": 0.723517656326294, "learning_rate": 8.183544669310477e-05, "loss": 0.7665, "step": 87780 }, { "epoch": 0.560865287556061, "grad_norm": 0.5567760467529297, "learning_rate": 8.183157737467229e-05, "loss": 0.8054, "step": 87790 }, { "epoch": 0.5609291747057997, "grad_norm": 1.262997031211853, "learning_rate": 8.182770773566833e-05, "loss": 0.9354, "step": 87800 }, { "epoch": 0.5609930618555383, "grad_norm": 0.7385443449020386, "learning_rate": 8.182383777613177e-05, "loss": 1.0054, "step": 87810 }, { "epoch": 0.561056949005277, "grad_norm": 0.8645491600036621, "learning_rate": 8.181996749610166e-05, "loss": 1.0764, "step": 87820 }, { "epoch": 0.5611208361550157, "grad_norm": 1.3922362327575684, "learning_rate": 8.181609689561693e-05, "loss": 1.2028, "step": 87830 }, { "epoch": 0.5611847233047544, "grad_norm": 1.1848599910736084, "learning_rate": 8.181222597471658e-05, "loss": 0.7765, "step": 87840 }, { "epoch": 0.5612486104544931, "grad_norm": 1.078633189201355, "learning_rate": 8.18083547334396e-05, "loss": 0.793, "step": 87850 }, { "epoch": 0.5613124976042319, "grad_norm": 0.8541363477706909, "learning_rate": 8.180448317182498e-05, "loss": 0.6918, "step": 87860 }, { "epoch": 0.5613763847539706, "grad_norm": 1.5451418161392212, "learning_rate": 8.180061128991168e-05, "loss": 0.9104, "step": 87870 }, { "epoch": 0.5614402719037093, "grad_norm": 0.9246516823768616, "learning_rate": 8.179673908773872e-05, "loss": 0.8337, "step": 87880 }, { "epoch": 0.561504159053448, "grad_norm": 1.042832851409912, "learning_rate": 8.179286656534511e-05, "loss": 1.1324, "step": 87890 }, { "epoch": 0.5615680462031867, "grad_norm": 0.7726474404335022, "learning_rate": 8.17889937227698e-05, "loss": 1.1108, "step": 87900 }, { "epoch": 0.5616319333529254, "grad_norm": 0.8662251234054565, "learning_rate": 8.178512056005184e-05, "loss": 0.6078, "step": 87910 }, { "epoch": 0.5616958205026641, "grad_norm": 0.7089243531227112, "learning_rate": 8.178124707723021e-05, "loss": 0.8552, "step": 87920 }, { "epoch": 0.5617597076524028, "grad_norm": 1.1423563957214355, "learning_rate": 8.177737327434393e-05, "loss": 0.8504, "step": 87930 }, { "epoch": 0.5618235948021415, "grad_norm": 0.934367299079895, "learning_rate": 8.1773499151432e-05, "loss": 1.1178, "step": 87940 }, { "epoch": 0.5618874819518802, "grad_norm": 1.198276162147522, "learning_rate": 8.176962470853346e-05, "loss": 0.9645, "step": 87950 }, { "epoch": 0.5619513691016189, "grad_norm": 0.8779740929603577, "learning_rate": 8.176574994568731e-05, "loss": 0.9631, "step": 87960 }, { "epoch": 0.5620152562513576, "grad_norm": 1.0242273807525635, "learning_rate": 8.176187486293258e-05, "loss": 0.8799, "step": 87970 }, { "epoch": 0.5620791434010963, "grad_norm": 0.8855654001235962, "learning_rate": 8.17579994603083e-05, "loss": 0.8782, "step": 87980 }, { "epoch": 0.562143030550835, "grad_norm": 0.9518892168998718, "learning_rate": 8.175412373785346e-05, "loss": 0.9524, "step": 87990 }, { "epoch": 0.5622069177005737, "grad_norm": 1.5354324579238892, "learning_rate": 8.175024769560714e-05, "loss": 0.8593, "step": 88000 }, { "epoch": 0.5622708048503124, "grad_norm": 0.7242043018341064, "learning_rate": 8.174637133360837e-05, "loss": 0.7903, "step": 88010 }, { "epoch": 0.5623346920000512, "grad_norm": 1.2966783046722412, "learning_rate": 8.174249465189615e-05, "loss": 0.9194, "step": 88020 }, { "epoch": 0.5623985791497899, "grad_norm": 0.7880471348762512, "learning_rate": 8.173861765050956e-05, "loss": 0.8203, "step": 88030 }, { "epoch": 0.5624624662995286, "grad_norm": 0.8181769251823425, "learning_rate": 8.173474032948764e-05, "loss": 0.8065, "step": 88040 }, { "epoch": 0.5625263534492673, "grad_norm": 1.0602998733520508, "learning_rate": 8.173086268886943e-05, "loss": 1.0891, "step": 88050 }, { "epoch": 0.5625902405990059, "grad_norm": 0.9757769107818604, "learning_rate": 8.172698472869398e-05, "loss": 0.7662, "step": 88060 }, { "epoch": 0.5626541277487446, "grad_norm": 0.7748228907585144, "learning_rate": 8.172310644900035e-05, "loss": 1.0889, "step": 88070 }, { "epoch": 0.5627180148984833, "grad_norm": 0.6546492576599121, "learning_rate": 8.171922784982757e-05, "loss": 0.7745, "step": 88080 }, { "epoch": 0.562781902048222, "grad_norm": 0.9595057368278503, "learning_rate": 8.171534893121476e-05, "loss": 0.7195, "step": 88090 }, { "epoch": 0.5628457891979607, "grad_norm": 1.4880831241607666, "learning_rate": 8.171146969320091e-05, "loss": 0.7606, "step": 88100 }, { "epoch": 0.5629096763476994, "grad_norm": 0.6960776448249817, "learning_rate": 8.17079781059329e-05, "loss": 0.8305, "step": 88110 }, { "epoch": 0.5629735634974381, "grad_norm": 0.6546306610107422, "learning_rate": 8.17040982611648e-05, "loss": 0.9146, "step": 88120 }, { "epoch": 0.5630374506471768, "grad_norm": 0.9104867577552795, "learning_rate": 8.170021809710901e-05, "loss": 0.8428, "step": 88130 }, { "epoch": 0.5631013377969155, "grad_norm": 0.8855934143066406, "learning_rate": 8.169633761380459e-05, "loss": 1.1035, "step": 88140 }, { "epoch": 0.5631652249466542, "grad_norm": 0.8834415674209595, "learning_rate": 8.169245681129063e-05, "loss": 0.7963, "step": 88150 }, { "epoch": 0.5632291120963929, "grad_norm": 1.0610204935073853, "learning_rate": 8.168857568960621e-05, "loss": 0.8703, "step": 88160 }, { "epoch": 0.5632929992461316, "grad_norm": 0.7726658582687378, "learning_rate": 8.168469424879041e-05, "loss": 0.7914, "step": 88170 }, { "epoch": 0.5633568863958703, "grad_norm": 0.835917592048645, "learning_rate": 8.168081248888236e-05, "loss": 1.1727, "step": 88180 }, { "epoch": 0.563420773545609, "grad_norm": 0.6268913149833679, "learning_rate": 8.16769304099211e-05, "loss": 0.8944, "step": 88190 }, { "epoch": 0.5634846606953477, "grad_norm": 0.7659708857536316, "learning_rate": 8.167304801194574e-05, "loss": 0.6793, "step": 88200 }, { "epoch": 0.5635485478450865, "grad_norm": 0.8323150873184204, "learning_rate": 8.166916529499539e-05, "loss": 1.0036, "step": 88210 }, { "epoch": 0.5636124349948252, "grad_norm": 0.8650678396224976, "learning_rate": 8.166528225910915e-05, "loss": 1.0035, "step": 88220 }, { "epoch": 0.5636763221445639, "grad_norm": 1.1237086057662964, "learning_rate": 8.166139890432612e-05, "loss": 0.863, "step": 88230 }, { "epoch": 0.5637402092943026, "grad_norm": 1.4714837074279785, "learning_rate": 8.165751523068541e-05, "loss": 0.7476, "step": 88240 }, { "epoch": 0.5638040964440413, "grad_norm": 1.561540126800537, "learning_rate": 8.165363123822613e-05, "loss": 0.8476, "step": 88250 }, { "epoch": 0.56386798359378, "grad_norm": 1.166987657546997, "learning_rate": 8.164974692698741e-05, "loss": 0.7393, "step": 88260 }, { "epoch": 0.5639318707435187, "grad_norm": 0.8641453385353088, "learning_rate": 8.164586229700837e-05, "loss": 0.9397, "step": 88270 }, { "epoch": 0.5639957578932574, "grad_norm": 0.8461394906044006, "learning_rate": 8.164197734832811e-05, "loss": 0.9712, "step": 88280 }, { "epoch": 0.5640596450429961, "grad_norm": 1.124748945236206, "learning_rate": 8.163809208098573e-05, "loss": 0.9336, "step": 88290 }, { "epoch": 0.5641235321927347, "grad_norm": 0.7399802803993225, "learning_rate": 8.163420649502044e-05, "loss": 0.8218, "step": 88300 }, { "epoch": 0.5641874193424734, "grad_norm": 0.5592508316040039, "learning_rate": 8.163032059047129e-05, "loss": 1.0124, "step": 88310 }, { "epoch": 0.5642513064922121, "grad_norm": 0.7099899649620056, "learning_rate": 8.162643436737747e-05, "loss": 1.0729, "step": 88320 }, { "epoch": 0.5643151936419508, "grad_norm": 0.8268618583679199, "learning_rate": 8.162254782577807e-05, "loss": 0.9777, "step": 88330 }, { "epoch": 0.5643790807916895, "grad_norm": 1.0088896751403809, "learning_rate": 8.161866096571229e-05, "loss": 0.7944, "step": 88340 }, { "epoch": 0.5644429679414282, "grad_norm": 1.0741251707077026, "learning_rate": 8.161477378721922e-05, "loss": 0.6418, "step": 88350 }, { "epoch": 0.5645068550911669, "grad_norm": 0.7108768224716187, "learning_rate": 8.161088629033802e-05, "loss": 0.792, "step": 88360 }, { "epoch": 0.5645707422409056, "grad_norm": 0.7690078020095825, "learning_rate": 8.160699847510787e-05, "loss": 1.0006, "step": 88370 }, { "epoch": 0.5646346293906443, "grad_norm": 0.9260159134864807, "learning_rate": 8.160311034156788e-05, "loss": 0.836, "step": 88380 }, { "epoch": 0.564698516540383, "grad_norm": 0.8775709867477417, "learning_rate": 8.159922188975724e-05, "loss": 1.0681, "step": 88390 }, { "epoch": 0.5647624036901218, "grad_norm": 5.725255489349365, "learning_rate": 8.159533311971509e-05, "loss": 0.8525, "step": 88400 }, { "epoch": 0.5648262908398605, "grad_norm": 0.7472132444381714, "learning_rate": 8.15914440314806e-05, "loss": 0.8077, "step": 88410 }, { "epoch": 0.5648901779895992, "grad_norm": 0.733696460723877, "learning_rate": 8.158755462509294e-05, "loss": 0.9897, "step": 88420 }, { "epoch": 0.5649540651393379, "grad_norm": 0.7739579081535339, "learning_rate": 8.15836649005913e-05, "loss": 0.9822, "step": 88430 }, { "epoch": 0.5650179522890766, "grad_norm": 0.7440978288650513, "learning_rate": 8.157977485801481e-05, "loss": 0.9445, "step": 88440 }, { "epoch": 0.5650818394388153, "grad_norm": 1.1585737466812134, "learning_rate": 8.157588449740268e-05, "loss": 0.9039, "step": 88450 }, { "epoch": 0.565145726588554, "grad_norm": 0.6025387048721313, "learning_rate": 8.157199381879406e-05, "loss": 1.0429, "step": 88460 }, { "epoch": 0.5652096137382927, "grad_norm": 1.27161705493927, "learning_rate": 8.156810282222815e-05, "loss": 0.9338, "step": 88470 }, { "epoch": 0.5652735008880314, "grad_norm": 1.4997930526733398, "learning_rate": 8.156421150774413e-05, "loss": 0.9103, "step": 88480 }, { "epoch": 0.5653373880377701, "grad_norm": 1.060680866241455, "learning_rate": 8.156031987538121e-05, "loss": 0.8186, "step": 88490 }, { "epoch": 0.5654012751875088, "grad_norm": 0.8524025082588196, "learning_rate": 8.155642792517854e-05, "loss": 0.6961, "step": 88500 }, { "epoch": 0.5654651623372475, "grad_norm": 0.7399099469184875, "learning_rate": 8.155253565717538e-05, "loss": 0.9526, "step": 88510 }, { "epoch": 0.5655290494869862, "grad_norm": 0.5844240784645081, "learning_rate": 8.154864307141086e-05, "loss": 0.7829, "step": 88520 }, { "epoch": 0.5655929366367249, "grad_norm": 1.2243051528930664, "learning_rate": 8.154475016792422e-05, "loss": 1.1276, "step": 88530 }, { "epoch": 0.5656568237864635, "grad_norm": 0.8951583504676819, "learning_rate": 8.154085694675465e-05, "loss": 0.5625, "step": 88540 }, { "epoch": 0.5657207109362022, "grad_norm": 0.9766507148742676, "learning_rate": 8.153696340794137e-05, "loss": 0.8022, "step": 88550 }, { "epoch": 0.565784598085941, "grad_norm": 1.5120043754577637, "learning_rate": 8.153306955152358e-05, "loss": 1.0359, "step": 88560 }, { "epoch": 0.5658484852356797, "grad_norm": 0.8103669881820679, "learning_rate": 8.15291753775405e-05, "loss": 0.9112, "step": 88570 }, { "epoch": 0.5659123723854184, "grad_norm": 0.9393633604049683, "learning_rate": 8.152528088603136e-05, "loss": 1.0055, "step": 88580 }, { "epoch": 0.5659762595351571, "grad_norm": 0.7244747281074524, "learning_rate": 8.152138607703534e-05, "loss": 0.9209, "step": 88590 }, { "epoch": 0.5660401466848958, "grad_norm": 0.9815077185630798, "learning_rate": 8.151749095059172e-05, "loss": 1.0403, "step": 88600 }, { "epoch": 0.5661040338346345, "grad_norm": 0.7395929098129272, "learning_rate": 8.151359550673968e-05, "loss": 0.8846, "step": 88610 }, { "epoch": 0.5661679209843732, "grad_norm": 1.4598946571350098, "learning_rate": 8.150969974551848e-05, "loss": 0.8897, "step": 88620 }, { "epoch": 0.5662318081341119, "grad_norm": 1.4247801303863525, "learning_rate": 8.150580366696734e-05, "loss": 0.8266, "step": 88630 }, { "epoch": 0.5662956952838506, "grad_norm": 0.9204801321029663, "learning_rate": 8.150190727112551e-05, "loss": 0.9737, "step": 88640 }, { "epoch": 0.5663595824335893, "grad_norm": 0.9166013598442078, "learning_rate": 8.149801055803222e-05, "loss": 0.7175, "step": 88650 }, { "epoch": 0.566423469583328, "grad_norm": 0.6634111404418945, "learning_rate": 8.149411352772672e-05, "loss": 0.8279, "step": 88660 }, { "epoch": 0.5664873567330667, "grad_norm": 0.7929425835609436, "learning_rate": 8.149021618024823e-05, "loss": 0.938, "step": 88670 }, { "epoch": 0.5665512438828054, "grad_norm": 0.834792971611023, "learning_rate": 8.148631851563602e-05, "loss": 0.9483, "step": 88680 }, { "epoch": 0.5666151310325441, "grad_norm": 0.9581144452095032, "learning_rate": 8.148242053392937e-05, "loss": 0.8688, "step": 88690 }, { "epoch": 0.5666790181822828, "grad_norm": 1.2036710977554321, "learning_rate": 8.147852223516747e-05, "loss": 0.76, "step": 88700 }, { "epoch": 0.5667429053320215, "grad_norm": 0.9013402462005615, "learning_rate": 8.147462361938965e-05, "loss": 0.7514, "step": 88710 }, { "epoch": 0.5668067924817602, "grad_norm": 1.2912942171096802, "learning_rate": 8.147072468663514e-05, "loss": 0.934, "step": 88720 }, { "epoch": 0.566870679631499, "grad_norm": 0.8134447336196899, "learning_rate": 8.146682543694318e-05, "loss": 1.0641, "step": 88730 }, { "epoch": 0.5669345667812377, "grad_norm": 1.032091736793518, "learning_rate": 8.14629258703531e-05, "loss": 0.9206, "step": 88740 }, { "epoch": 0.5669984539309764, "grad_norm": 0.8850687146186829, "learning_rate": 8.145902598690411e-05, "loss": 1.1147, "step": 88750 }, { "epoch": 0.5670623410807151, "grad_norm": 1.0859354734420776, "learning_rate": 8.145512578663553e-05, "loss": 1.1992, "step": 88760 }, { "epoch": 0.5671262282304538, "grad_norm": 1.4345630407333374, "learning_rate": 8.14512252695866e-05, "loss": 0.7953, "step": 88770 }, { "epoch": 0.5671901153801925, "grad_norm": 0.8256250023841858, "learning_rate": 8.144732443579664e-05, "loss": 0.8639, "step": 88780 }, { "epoch": 0.5672540025299311, "grad_norm": 1.0019294023513794, "learning_rate": 8.14434232853049e-05, "loss": 0.9049, "step": 88790 }, { "epoch": 0.5673178896796698, "grad_norm": 0.7967015504837036, "learning_rate": 8.14395218181507e-05, "loss": 1.0006, "step": 88800 }, { "epoch": 0.5673817768294085, "grad_norm": 0.729104220867157, "learning_rate": 8.143562003437331e-05, "loss": 0.724, "step": 88810 }, { "epoch": 0.5674456639791472, "grad_norm": 1.0281226634979248, "learning_rate": 8.143171793401204e-05, "loss": 0.7041, "step": 88820 }, { "epoch": 0.5675095511288859, "grad_norm": 0.5173068046569824, "learning_rate": 8.142781551710617e-05, "loss": 0.7518, "step": 88830 }, { "epoch": 0.5675734382786246, "grad_norm": 1.003184199333191, "learning_rate": 8.1423912783695e-05, "loss": 0.9193, "step": 88840 }, { "epoch": 0.5676373254283633, "grad_norm": 0.8439940214157104, "learning_rate": 8.142000973381787e-05, "loss": 0.874, "step": 88850 }, { "epoch": 0.567701212578102, "grad_norm": 1.0235188007354736, "learning_rate": 8.141610636751405e-05, "loss": 1.3299, "step": 88860 }, { "epoch": 0.5677650997278407, "grad_norm": 1.1160435676574707, "learning_rate": 8.141220268482284e-05, "loss": 0.9362, "step": 88870 }, { "epoch": 0.5678289868775794, "grad_norm": 0.9153828620910645, "learning_rate": 8.140829868578359e-05, "loss": 1.0991, "step": 88880 }, { "epoch": 0.5678928740273181, "grad_norm": 0.7839625477790833, "learning_rate": 8.140439437043558e-05, "loss": 0.8622, "step": 88890 }, { "epoch": 0.5679567611770568, "grad_norm": 0.9344704151153564, "learning_rate": 8.140048973881817e-05, "loss": 0.9697, "step": 88900 }, { "epoch": 0.5680206483267956, "grad_norm": 1.2923128604888916, "learning_rate": 8.139697529998467e-05, "loss": 0.9158, "step": 88910 }, { "epoch": 0.5680845354765343, "grad_norm": 0.7165302038192749, "learning_rate": 8.139307006756369e-05, "loss": 0.9479, "step": 88920 }, { "epoch": 0.568148422626273, "grad_norm": 0.9380423426628113, "learning_rate": 8.138916451898734e-05, "loss": 1.0259, "step": 88930 }, { "epoch": 0.5682123097760117, "grad_norm": 0.9337356686592102, "learning_rate": 8.138525865429494e-05, "loss": 0.7998, "step": 88940 }, { "epoch": 0.5682761969257504, "grad_norm": 1.1728187799453735, "learning_rate": 8.138135247352586e-05, "loss": 0.7984, "step": 88950 }, { "epoch": 0.5683400840754891, "grad_norm": 0.7271674871444702, "learning_rate": 8.137744597671938e-05, "loss": 0.8608, "step": 88960 }, { "epoch": 0.5684039712252278, "grad_norm": 0.8207966089248657, "learning_rate": 8.137353916391488e-05, "loss": 0.8993, "step": 88970 }, { "epoch": 0.5684678583749665, "grad_norm": 0.8019614815711975, "learning_rate": 8.136963203515173e-05, "loss": 0.9974, "step": 88980 }, { "epoch": 0.5685317455247052, "grad_norm": 0.8686451315879822, "learning_rate": 8.136572459046921e-05, "loss": 0.7662, "step": 88990 }, { "epoch": 0.5685956326744439, "grad_norm": 0.7033054232597351, "learning_rate": 8.136181682990673e-05, "loss": 0.9693, "step": 89000 }, { "epoch": 0.5686595198241826, "grad_norm": 0.5672471523284912, "learning_rate": 8.135790875350361e-05, "loss": 0.8768, "step": 89010 }, { "epoch": 0.5687234069739213, "grad_norm": 0.8341061472892761, "learning_rate": 8.135400036129923e-05, "loss": 0.8212, "step": 89020 }, { "epoch": 0.5687872941236599, "grad_norm": 0.6905550956726074, "learning_rate": 8.135009165333294e-05, "loss": 0.8526, "step": 89030 }, { "epoch": 0.5688511812733986, "grad_norm": 1.169189453125, "learning_rate": 8.134618262964409e-05, "loss": 0.8095, "step": 89040 }, { "epoch": 0.5689150684231373, "grad_norm": 0.8723841309547424, "learning_rate": 8.134227329027208e-05, "loss": 0.934, "step": 89050 }, { "epoch": 0.568978955572876, "grad_norm": 0.7888420224189758, "learning_rate": 8.133836363525626e-05, "loss": 0.8145, "step": 89060 }, { "epoch": 0.5690428427226147, "grad_norm": 0.774649441242218, "learning_rate": 8.133445366463601e-05, "loss": 0.8815, "step": 89070 }, { "epoch": 0.5691067298723534, "grad_norm": 0.6284635066986084, "learning_rate": 8.13305433784507e-05, "loss": 0.8557, "step": 89080 }, { "epoch": 0.5691706170220922, "grad_norm": 0.747380793094635, "learning_rate": 8.132663277673971e-05, "loss": 1.0254, "step": 89090 }, { "epoch": 0.5692345041718309, "grad_norm": 0.6164722442626953, "learning_rate": 8.132272185954243e-05, "loss": 0.8931, "step": 89100 }, { "epoch": 0.5692983913215696, "grad_norm": 1.5792529582977295, "learning_rate": 8.131881062689823e-05, "loss": 1.1431, "step": 89110 }, { "epoch": 0.5693622784713083, "grad_norm": 0.8760352730751038, "learning_rate": 8.131489907884653e-05, "loss": 1.0246, "step": 89120 }, { "epoch": 0.569426165621047, "grad_norm": 1.4259626865386963, "learning_rate": 8.13109872154267e-05, "loss": 0.7266, "step": 89130 }, { "epoch": 0.5694900527707857, "grad_norm": 1.4645694494247437, "learning_rate": 8.130707503667814e-05, "loss": 0.645, "step": 89140 }, { "epoch": 0.5695539399205244, "grad_norm": 1.1326792240142822, "learning_rate": 8.130316254264024e-05, "loss": 1.1472, "step": 89150 }, { "epoch": 0.5696178270702631, "grad_norm": 0.9139853715896606, "learning_rate": 8.129924973335243e-05, "loss": 1.025, "step": 89160 }, { "epoch": 0.5696817142200018, "grad_norm": 0.5680515766143799, "learning_rate": 8.129533660885407e-05, "loss": 0.9172, "step": 89170 }, { "epoch": 0.5697456013697405, "grad_norm": 0.7290217876434326, "learning_rate": 8.129142316918463e-05, "loss": 0.9759, "step": 89180 }, { "epoch": 0.5698094885194792, "grad_norm": 0.5047475099563599, "learning_rate": 8.128750941438346e-05, "loss": 0.9244, "step": 89190 }, { "epoch": 0.5698733756692179, "grad_norm": 1.2016278505325317, "learning_rate": 8.128359534449002e-05, "loss": 0.6997, "step": 89200 }, { "epoch": 0.5699372628189566, "grad_norm": 1.542084813117981, "learning_rate": 8.127968095954371e-05, "loss": 0.8523, "step": 89210 }, { "epoch": 0.5700011499686953, "grad_norm": 0.9595821499824524, "learning_rate": 8.127576625958394e-05, "loss": 0.9706, "step": 89220 }, { "epoch": 0.570065037118434, "grad_norm": 0.6322153210639954, "learning_rate": 8.127185124465016e-05, "loss": 0.7701, "step": 89230 }, { "epoch": 0.5701289242681727, "grad_norm": 1.8529796600341797, "learning_rate": 8.126793591478177e-05, "loss": 0.877, "step": 89240 }, { "epoch": 0.5701928114179114, "grad_norm": 0.6426035761833191, "learning_rate": 8.126402027001822e-05, "loss": 0.9249, "step": 89250 }, { "epoch": 0.5702566985676502, "grad_norm": 0.822106659412384, "learning_rate": 8.126010431039895e-05, "loss": 0.7386, "step": 89260 }, { "epoch": 0.5703205857173888, "grad_norm": 1.577788233757019, "learning_rate": 8.125618803596338e-05, "loss": 0.9725, "step": 89270 }, { "epoch": 0.5703844728671275, "grad_norm": 0.9563323259353638, "learning_rate": 8.125227144675096e-05, "loss": 0.6633, "step": 89280 }, { "epoch": 0.5704483600168662, "grad_norm": 0.5990996956825256, "learning_rate": 8.12483545428011e-05, "loss": 0.969, "step": 89290 }, { "epoch": 0.5705122471666049, "grad_norm": 0.8185253739356995, "learning_rate": 8.124443732415331e-05, "loss": 0.8085, "step": 89300 }, { "epoch": 0.5705761343163436, "grad_norm": 1.3149315118789673, "learning_rate": 8.124051979084699e-05, "loss": 1.0496, "step": 89310 }, { "epoch": 0.5706400214660823, "grad_norm": 0.9711151123046875, "learning_rate": 8.123660194292162e-05, "loss": 0.8151, "step": 89320 }, { "epoch": 0.570703908615821, "grad_norm": 0.8611701726913452, "learning_rate": 8.123268378041664e-05, "loss": 1.007, "step": 89330 }, { "epoch": 0.5707677957655597, "grad_norm": 0.8310745358467102, "learning_rate": 8.122876530337151e-05, "loss": 1.0438, "step": 89340 }, { "epoch": 0.5708316829152984, "grad_norm": 1.0816930532455444, "learning_rate": 8.12248465118257e-05, "loss": 0.7444, "step": 89350 }, { "epoch": 0.5708955700650371, "grad_norm": 0.9173716902732849, "learning_rate": 8.122092740581867e-05, "loss": 1.183, "step": 89360 }, { "epoch": 0.5709594572147758, "grad_norm": 1.3235098123550415, "learning_rate": 8.121700798538989e-05, "loss": 0.9047, "step": 89370 }, { "epoch": 0.5710233443645145, "grad_norm": 0.7508029341697693, "learning_rate": 8.121308825057882e-05, "loss": 1.086, "step": 89380 }, { "epoch": 0.5710872315142532, "grad_norm": 0.840752124786377, "learning_rate": 8.120916820142498e-05, "loss": 1.1446, "step": 89390 }, { "epoch": 0.5711511186639919, "grad_norm": 1.024584412574768, "learning_rate": 8.12052478379678e-05, "loss": 1.057, "step": 89400 }, { "epoch": 0.5712150058137306, "grad_norm": 0.9996415972709656, "learning_rate": 8.120132716024678e-05, "loss": 0.9998, "step": 89410 }, { "epoch": 0.5712788929634693, "grad_norm": 0.9148262143135071, "learning_rate": 8.11974061683014e-05, "loss": 1.3289, "step": 89420 }, { "epoch": 0.571342780113208, "grad_norm": 1.0694867372512817, "learning_rate": 8.119348486217116e-05, "loss": 0.9789, "step": 89430 }, { "epoch": 0.5714066672629468, "grad_norm": 0.6613714694976807, "learning_rate": 8.118956324189553e-05, "loss": 1.0028, "step": 89440 }, { "epoch": 0.5714705544126855, "grad_norm": 0.981604814529419, "learning_rate": 8.1185641307514e-05, "loss": 0.9502, "step": 89450 }, { "epoch": 0.5715344415624242, "grad_norm": 1.0735020637512207, "learning_rate": 8.118171905906611e-05, "loss": 0.7949, "step": 89460 }, { "epoch": 0.5715983287121629, "grad_norm": 0.7923375368118286, "learning_rate": 8.117779649659132e-05, "loss": 0.7809, "step": 89470 }, { "epoch": 0.5716622158619016, "grad_norm": 1.0053727626800537, "learning_rate": 8.117387362012915e-05, "loss": 0.772, "step": 89480 }, { "epoch": 0.5717261030116403, "grad_norm": 0.8714725375175476, "learning_rate": 8.116995042971909e-05, "loss": 1.0257, "step": 89490 }, { "epoch": 0.571789990161379, "grad_norm": 0.8163152933120728, "learning_rate": 8.116602692540069e-05, "loss": 0.7977, "step": 89500 }, { "epoch": 0.5718538773111176, "grad_norm": 0.49878737330436707, "learning_rate": 8.116210310721342e-05, "loss": 0.8128, "step": 89510 }, { "epoch": 0.5719177644608563, "grad_norm": 0.6907072067260742, "learning_rate": 8.115817897519682e-05, "loss": 0.7639, "step": 89520 }, { "epoch": 0.571981651610595, "grad_norm": 0.6646702885627747, "learning_rate": 8.115425452939039e-05, "loss": 0.8343, "step": 89530 }, { "epoch": 0.5720455387603337, "grad_norm": 0.7955873608589172, "learning_rate": 8.115032976983368e-05, "loss": 0.8254, "step": 89540 }, { "epoch": 0.5721094259100724, "grad_norm": 1.2668566703796387, "learning_rate": 8.114640469656619e-05, "loss": 0.879, "step": 89550 }, { "epoch": 0.5721733130598111, "grad_norm": 0.8631924390792847, "learning_rate": 8.114247930962746e-05, "loss": 0.9851, "step": 89560 }, { "epoch": 0.5722372002095498, "grad_norm": 1.4024226665496826, "learning_rate": 8.113855360905702e-05, "loss": 0.9293, "step": 89570 }, { "epoch": 0.5723010873592885, "grad_norm": 0.826225996017456, "learning_rate": 8.113462759489441e-05, "loss": 0.8176, "step": 89580 }, { "epoch": 0.5723649745090272, "grad_norm": 0.7401711344718933, "learning_rate": 8.113070126717916e-05, "loss": 0.7405, "step": 89590 }, { "epoch": 0.5724288616587659, "grad_norm": 0.8626922369003296, "learning_rate": 8.112677462595084e-05, "loss": 0.9589, "step": 89600 }, { "epoch": 0.5724927488085046, "grad_norm": 0.7971317172050476, "learning_rate": 8.112284767124894e-05, "loss": 0.9191, "step": 89610 }, { "epoch": 0.5725566359582434, "grad_norm": 0.7105472683906555, "learning_rate": 8.111892040311305e-05, "loss": 1.0642, "step": 89620 }, { "epoch": 0.5726205231079821, "grad_norm": 0.8470255732536316, "learning_rate": 8.111499282158271e-05, "loss": 0.9904, "step": 89630 }, { "epoch": 0.5726844102577208, "grad_norm": 1.0502973794937134, "learning_rate": 8.111106492669747e-05, "loss": 0.7748, "step": 89640 }, { "epoch": 0.5727482974074595, "grad_norm": 0.9017430543899536, "learning_rate": 8.11071367184969e-05, "loss": 1.0352, "step": 89650 }, { "epoch": 0.5728121845571982, "grad_norm": 1.4443550109863281, "learning_rate": 8.110320819702055e-05, "loss": 0.7986, "step": 89660 }, { "epoch": 0.5728760717069369, "grad_norm": 0.8041828274726868, "learning_rate": 8.109927936230798e-05, "loss": 0.8368, "step": 89670 }, { "epoch": 0.5729399588566756, "grad_norm": 0.8347397446632385, "learning_rate": 8.109535021439876e-05, "loss": 1.0824, "step": 89680 }, { "epoch": 0.5730038460064143, "grad_norm": 1.0206586122512817, "learning_rate": 8.109142075333247e-05, "loss": 0.9412, "step": 89690 }, { "epoch": 0.573067733156153, "grad_norm": 1.7678121328353882, "learning_rate": 8.108749097914867e-05, "loss": 0.698, "step": 89700 }, { "epoch": 0.5731316203058917, "grad_norm": 0.7423132658004761, "learning_rate": 8.108356089188694e-05, "loss": 1.1062, "step": 89710 }, { "epoch": 0.5731955074556304, "grad_norm": 0.5420845746994019, "learning_rate": 8.107963049158686e-05, "loss": 0.9447, "step": 89720 }, { "epoch": 0.5732593946053691, "grad_norm": 1.7676161527633667, "learning_rate": 8.107569977828803e-05, "loss": 0.9414, "step": 89730 }, { "epoch": 0.5733232817551078, "grad_norm": 0.9517451524734497, "learning_rate": 8.107176875203e-05, "loss": 0.8824, "step": 89740 }, { "epoch": 0.5733871689048465, "grad_norm": 3.0669705867767334, "learning_rate": 8.106783741285237e-05, "loss": 0.9844, "step": 89750 }, { "epoch": 0.5734510560545851, "grad_norm": 1.1198365688323975, "learning_rate": 8.106390576079477e-05, "loss": 0.9505, "step": 89760 }, { "epoch": 0.5735149432043238, "grad_norm": 0.8078299164772034, "learning_rate": 8.105997379589675e-05, "loss": 0.9776, "step": 89770 }, { "epoch": 0.5735788303540625, "grad_norm": 2.656707286834717, "learning_rate": 8.105604151819793e-05, "loss": 0.6794, "step": 89780 }, { "epoch": 0.5736427175038012, "grad_norm": 1.2671973705291748, "learning_rate": 8.105210892773789e-05, "loss": 0.7823, "step": 89790 }, { "epoch": 0.57370660465354, "grad_norm": 0.7666161060333252, "learning_rate": 8.104817602455626e-05, "loss": 0.7727, "step": 89800 }, { "epoch": 0.5737704918032787, "grad_norm": 1.4913362264633179, "learning_rate": 8.104424280869263e-05, "loss": 0.8733, "step": 89810 }, { "epoch": 0.5738343789530174, "grad_norm": 1.1301679611206055, "learning_rate": 8.104030928018662e-05, "loss": 0.7014, "step": 89820 }, { "epoch": 0.5738982661027561, "grad_norm": 1.294581413269043, "learning_rate": 8.103637543907784e-05, "loss": 1.1176, "step": 89830 }, { "epoch": 0.5739621532524948, "grad_norm": 0.9880439639091492, "learning_rate": 8.103244128540591e-05, "loss": 1.1367, "step": 89840 }, { "epoch": 0.5740260404022335, "grad_norm": 1.8410000801086426, "learning_rate": 8.102850681921046e-05, "loss": 0.9493, "step": 89850 }, { "epoch": 0.5740899275519722, "grad_norm": 0.7882397770881653, "learning_rate": 8.102457204053109e-05, "loss": 0.9901, "step": 89860 }, { "epoch": 0.5741538147017109, "grad_norm": 0.833531379699707, "learning_rate": 8.102063694940745e-05, "loss": 0.8304, "step": 89870 }, { "epoch": 0.5742177018514496, "grad_norm": 0.7302635908126831, "learning_rate": 8.101670154587915e-05, "loss": 0.9241, "step": 89880 }, { "epoch": 0.5742815890011883, "grad_norm": 0.7871063947677612, "learning_rate": 8.101276582998583e-05, "loss": 0.7141, "step": 89890 }, { "epoch": 0.574345476150927, "grad_norm": 1.1040356159210205, "learning_rate": 8.100882980176712e-05, "loss": 0.8373, "step": 89900 }, { "epoch": 0.5744093633006657, "grad_norm": 0.5926182866096497, "learning_rate": 8.100489346126268e-05, "loss": 0.6924, "step": 89910 }, { "epoch": 0.5744732504504044, "grad_norm": 1.047753930091858, "learning_rate": 8.100095680851214e-05, "loss": 0.8614, "step": 89920 }, { "epoch": 0.5745371376001431, "grad_norm": 0.9589722752571106, "learning_rate": 8.099701984355514e-05, "loss": 0.796, "step": 89930 }, { "epoch": 0.5746010247498818, "grad_norm": 1.2964690923690796, "learning_rate": 8.099308256643134e-05, "loss": 0.7672, "step": 89940 }, { "epoch": 0.5746649118996205, "grad_norm": 1.9228540658950806, "learning_rate": 8.09891449771804e-05, "loss": 0.8787, "step": 89950 }, { "epoch": 0.5747287990493593, "grad_norm": 0.5643669962882996, "learning_rate": 8.098520707584195e-05, "loss": 0.855, "step": 89960 }, { "epoch": 0.574792686199098, "grad_norm": 0.9459285736083984, "learning_rate": 8.098126886245564e-05, "loss": 0.8887, "step": 89970 }, { "epoch": 0.5748565733488367, "grad_norm": 0.8743549585342407, "learning_rate": 8.097733033706117e-05, "loss": 1.0167, "step": 89980 }, { "epoch": 0.5749204604985754, "grad_norm": 1.0637538433074951, "learning_rate": 8.097339149969818e-05, "loss": 0.76, "step": 89990 }, { "epoch": 0.574984347648314, "grad_norm": 0.6457778215408325, "learning_rate": 8.096945235040634e-05, "loss": 0.8873, "step": 90000 }, { "epoch": 0.5750482347980527, "grad_norm": 0.7007945775985718, "learning_rate": 8.096551288922532e-05, "loss": 0.6989, "step": 90010 }, { "epoch": 0.5751121219477914, "grad_norm": 1.048057198524475, "learning_rate": 8.096157311619479e-05, "loss": 0.7702, "step": 90020 }, { "epoch": 0.5751760090975301, "grad_norm": 0.8541986346244812, "learning_rate": 8.095763303135444e-05, "loss": 0.996, "step": 90030 }, { "epoch": 0.5752398962472688, "grad_norm": 1.4196783304214478, "learning_rate": 8.095369263474396e-05, "loss": 0.5537, "step": 90040 }, { "epoch": 0.5753037833970075, "grad_norm": 0.8951913118362427, "learning_rate": 8.094975192640299e-05, "loss": 0.6825, "step": 90050 }, { "epoch": 0.5753676705467462, "grad_norm": 0.6169331669807434, "learning_rate": 8.094581090637127e-05, "loss": 0.8536, "step": 90060 }, { "epoch": 0.5754315576964849, "grad_norm": 0.7136598825454712, "learning_rate": 8.094186957468843e-05, "loss": 0.9564, "step": 90070 }, { "epoch": 0.5754954448462236, "grad_norm": 1.0115174055099487, "learning_rate": 8.093792793139421e-05, "loss": 0.7456, "step": 90080 }, { "epoch": 0.5755593319959623, "grad_norm": 0.7271766662597656, "learning_rate": 8.09339859765283e-05, "loss": 0.8566, "step": 90090 }, { "epoch": 0.575623219145701, "grad_norm": 0.870293140411377, "learning_rate": 8.093004371013038e-05, "loss": 1.0552, "step": 90100 }, { "epoch": 0.5756871062954397, "grad_norm": 0.8186811208724976, "learning_rate": 8.092610113224017e-05, "loss": 0.7465, "step": 90110 }, { "epoch": 0.5757509934451784, "grad_norm": 0.918304979801178, "learning_rate": 8.092215824289735e-05, "loss": 0.8869, "step": 90120 }, { "epoch": 0.5758148805949171, "grad_norm": 0.9499895572662354, "learning_rate": 8.091821504214166e-05, "loss": 0.8331, "step": 90130 }, { "epoch": 0.5758787677446559, "grad_norm": 1.6564801931381226, "learning_rate": 8.091427153001278e-05, "loss": 0.9647, "step": 90140 }, { "epoch": 0.5759426548943946, "grad_norm": 1.2745451927185059, "learning_rate": 8.091032770655048e-05, "loss": 0.7745, "step": 90150 }, { "epoch": 0.5760065420441333, "grad_norm": 1.276982069015503, "learning_rate": 8.09063835717944e-05, "loss": 1.0932, "step": 90160 }, { "epoch": 0.576070429193872, "grad_norm": 1.1688791513442993, "learning_rate": 8.09024391257843e-05, "loss": 0.8497, "step": 90170 }, { "epoch": 0.5761343163436107, "grad_norm": 0.6883856058120728, "learning_rate": 8.089849436855992e-05, "loss": 0.8468, "step": 90180 }, { "epoch": 0.5761982034933494, "grad_norm": 1.2166844606399536, "learning_rate": 8.089454930016095e-05, "loss": 0.9501, "step": 90190 }, { "epoch": 0.5762620906430881, "grad_norm": 0.9265638589859009, "learning_rate": 8.089060392062718e-05, "loss": 0.828, "step": 90200 }, { "epoch": 0.5763259777928268, "grad_norm": 1.2245463132858276, "learning_rate": 8.088665822999827e-05, "loss": 1.0158, "step": 90210 }, { "epoch": 0.5763898649425655, "grad_norm": 1.1771318912506104, "learning_rate": 8.088271222831401e-05, "loss": 0.8838, "step": 90220 }, { "epoch": 0.5764537520923042, "grad_norm": 0.6416171193122864, "learning_rate": 8.08787659156141e-05, "loss": 0.8364, "step": 90230 }, { "epoch": 0.5765176392420428, "grad_norm": 0.7262217998504639, "learning_rate": 8.087481929193831e-05, "loss": 0.8772, "step": 90240 }, { "epoch": 0.5765815263917815, "grad_norm": 1.0803191661834717, "learning_rate": 8.08708723573264e-05, "loss": 0.9563, "step": 90250 }, { "epoch": 0.5766454135415202, "grad_norm": 0.8832546472549438, "learning_rate": 8.086692511181806e-05, "loss": 0.9067, "step": 90260 }, { "epoch": 0.5767093006912589, "grad_norm": 0.8966745138168335, "learning_rate": 8.086297755545312e-05, "loss": 1.0014, "step": 90270 }, { "epoch": 0.5767731878409976, "grad_norm": 1.051358938217163, "learning_rate": 8.085902968827128e-05, "loss": 0.8469, "step": 90280 }, { "epoch": 0.5768370749907363, "grad_norm": 0.5282111763954163, "learning_rate": 8.085508151031232e-05, "loss": 0.7655, "step": 90290 }, { "epoch": 0.576900962140475, "grad_norm": 0.9313019514083862, "learning_rate": 8.085113302161598e-05, "loss": 0.8174, "step": 90300 }, { "epoch": 0.5769648492902137, "grad_norm": 1.0769340991973877, "learning_rate": 8.084718422222205e-05, "loss": 0.6471, "step": 90310 }, { "epoch": 0.5770287364399524, "grad_norm": 0.8049689531326294, "learning_rate": 8.084323511217029e-05, "loss": 0.9863, "step": 90320 }, { "epoch": 0.5770926235896912, "grad_norm": 1.1444233655929565, "learning_rate": 8.083928569150045e-05, "loss": 0.7216, "step": 90330 }, { "epoch": 0.5771565107394299, "grad_norm": 0.6875047087669373, "learning_rate": 8.083533596025234e-05, "loss": 0.8915, "step": 90340 }, { "epoch": 0.5772203978891686, "grad_norm": 0.9097625613212585, "learning_rate": 8.083138591846574e-05, "loss": 0.7636, "step": 90350 }, { "epoch": 0.5772842850389073, "grad_norm": 1.2456170320510864, "learning_rate": 8.082743556618038e-05, "loss": 0.8581, "step": 90360 }, { "epoch": 0.577348172188646, "grad_norm": 0.8649427890777588, "learning_rate": 8.082348490343608e-05, "loss": 1.0361, "step": 90370 }, { "epoch": 0.5774120593383847, "grad_norm": 2.0383450984954834, "learning_rate": 8.081953393027263e-05, "loss": 0.7167, "step": 90380 }, { "epoch": 0.5774759464881234, "grad_norm": 1.4382243156433105, "learning_rate": 8.081558264672982e-05, "loss": 0.881, "step": 90390 }, { "epoch": 0.5775398336378621, "grad_norm": 0.6351116895675659, "learning_rate": 8.081163105284741e-05, "loss": 0.7479, "step": 90400 }, { "epoch": 0.5776037207876008, "grad_norm": 0.8547778129577637, "learning_rate": 8.080767914866523e-05, "loss": 0.9155, "step": 90410 }, { "epoch": 0.5776676079373395, "grad_norm": 0.6154083609580994, "learning_rate": 8.080372693422307e-05, "loss": 0.7414, "step": 90420 }, { "epoch": 0.5777314950870782, "grad_norm": 0.8668635487556458, "learning_rate": 8.079977440956073e-05, "loss": 0.6981, "step": 90430 }, { "epoch": 0.5777953822368169, "grad_norm": 0.6722155809402466, "learning_rate": 8.079582157471801e-05, "loss": 0.9792, "step": 90440 }, { "epoch": 0.5778592693865556, "grad_norm": 0.5810309648513794, "learning_rate": 8.079186842973473e-05, "loss": 0.5862, "step": 90450 }, { "epoch": 0.5779231565362943, "grad_norm": 1.0260207653045654, "learning_rate": 8.07879149746507e-05, "loss": 0.803, "step": 90460 }, { "epoch": 0.577987043686033, "grad_norm": 0.8295899033546448, "learning_rate": 8.078396120950572e-05, "loss": 0.955, "step": 90470 }, { "epoch": 0.5780509308357717, "grad_norm": 0.7317106127738953, "learning_rate": 8.078000713433962e-05, "loss": 1.0108, "step": 90480 }, { "epoch": 0.5781148179855103, "grad_norm": 0.925954282283783, "learning_rate": 8.077605274919224e-05, "loss": 0.675, "step": 90490 }, { "epoch": 0.578178705135249, "grad_norm": 1.5816576480865479, "learning_rate": 8.077209805410336e-05, "loss": 0.802, "step": 90500 }, { "epoch": 0.5782425922849878, "grad_norm": 1.3754863739013672, "learning_rate": 8.076814304911285e-05, "loss": 0.7063, "step": 90510 }, { "epoch": 0.5783064794347265, "grad_norm": 1.1097428798675537, "learning_rate": 8.076418773426051e-05, "loss": 0.8896, "step": 90520 }, { "epoch": 0.5783703665844652, "grad_norm": 0.9202744364738464, "learning_rate": 8.076023210958618e-05, "loss": 1.1369, "step": 90530 }, { "epoch": 0.5784342537342039, "grad_norm": 0.8386440873146057, "learning_rate": 8.07562761751297e-05, "loss": 1.1063, "step": 90540 }, { "epoch": 0.5784981408839426, "grad_norm": 1.387734293937683, "learning_rate": 8.075231993093093e-05, "loss": 0.8543, "step": 90550 }, { "epoch": 0.5785620280336813, "grad_norm": 1.1072419881820679, "learning_rate": 8.074836337702969e-05, "loss": 1.1178, "step": 90560 }, { "epoch": 0.57862591518342, "grad_norm": 0.7916972041130066, "learning_rate": 8.074440651346582e-05, "loss": 1.2825, "step": 90570 }, { "epoch": 0.5786898023331587, "grad_norm": 1.407332181930542, "learning_rate": 8.074044934027918e-05, "loss": 0.9507, "step": 90580 }, { "epoch": 0.5787536894828974, "grad_norm": 0.8653108477592468, "learning_rate": 8.073649185750962e-05, "loss": 0.7288, "step": 90590 }, { "epoch": 0.5788175766326361, "grad_norm": 1.2476141452789307, "learning_rate": 8.073253406519699e-05, "loss": 0.8561, "step": 90600 }, { "epoch": 0.5788814637823748, "grad_norm": 0.8226998448371887, "learning_rate": 8.072857596338116e-05, "loss": 0.8944, "step": 90610 }, { "epoch": 0.5789453509321135, "grad_norm": 0.7982886433601379, "learning_rate": 8.0724617552102e-05, "loss": 0.8097, "step": 90620 }, { "epoch": 0.5790092380818522, "grad_norm": 0.7011058926582336, "learning_rate": 8.072065883139935e-05, "loss": 1.0291, "step": 90630 }, { "epoch": 0.5790731252315909, "grad_norm": 1.2704604864120483, "learning_rate": 8.071669980131307e-05, "loss": 0.893, "step": 90640 }, { "epoch": 0.5791370123813296, "grad_norm": 0.7645861506462097, "learning_rate": 8.071274046188306e-05, "loss": 0.6751, "step": 90650 }, { "epoch": 0.5792008995310683, "grad_norm": 3.0247256755828857, "learning_rate": 8.07087808131492e-05, "loss": 0.7969, "step": 90660 }, { "epoch": 0.579264786680807, "grad_norm": 1.2994235754013062, "learning_rate": 8.070482085515134e-05, "loss": 1.3255, "step": 90670 }, { "epoch": 0.5793286738305458, "grad_norm": 1.6243011951446533, "learning_rate": 8.070086058792937e-05, "loss": 0.8921, "step": 90680 }, { "epoch": 0.5793925609802845, "grad_norm": 0.907557487487793, "learning_rate": 8.069690001152317e-05, "loss": 0.8408, "step": 90690 }, { "epoch": 0.5794564481300232, "grad_norm": 0.7467745542526245, "learning_rate": 8.069293912597263e-05, "loss": 0.881, "step": 90700 }, { "epoch": 0.5795203352797619, "grad_norm": 0.7291324734687805, "learning_rate": 8.068897793131764e-05, "loss": 1.0837, "step": 90710 }, { "epoch": 0.5795842224295006, "grad_norm": 0.7500112056732178, "learning_rate": 8.068501642759811e-05, "loss": 0.6602, "step": 90720 }, { "epoch": 0.5796481095792392, "grad_norm": 0.3867965042591095, "learning_rate": 8.068105461485391e-05, "loss": 0.7279, "step": 90730 }, { "epoch": 0.5797119967289779, "grad_norm": 0.6861584186553955, "learning_rate": 8.067709249312494e-05, "loss": 0.924, "step": 90740 }, { "epoch": 0.5797758838787166, "grad_norm": 0.9725950956344604, "learning_rate": 8.06731300624511e-05, "loss": 0.686, "step": 90750 }, { "epoch": 0.5798397710284553, "grad_norm": 0.7137267589569092, "learning_rate": 8.066916732287232e-05, "loss": 0.7585, "step": 90760 }, { "epoch": 0.579903658178194, "grad_norm": 0.8141860961914062, "learning_rate": 8.06652042744285e-05, "loss": 1.2448, "step": 90770 }, { "epoch": 0.5799675453279327, "grad_norm": 0.9452531337738037, "learning_rate": 8.066124091715952e-05, "loss": 0.8939, "step": 90780 }, { "epoch": 0.5800314324776714, "grad_norm": 0.8053810596466064, "learning_rate": 8.065727725110533e-05, "loss": 0.8234, "step": 90790 }, { "epoch": 0.5800953196274101, "grad_norm": 0.8168609142303467, "learning_rate": 8.065331327630585e-05, "loss": 1.1362, "step": 90800 }, { "epoch": 0.5801592067771488, "grad_norm": 0.7657856941223145, "learning_rate": 8.064934899280096e-05, "loss": 0.9269, "step": 90810 }, { "epoch": 0.5802230939268875, "grad_norm": 1.230660319328308, "learning_rate": 8.064538440063063e-05, "loss": 0.7815, "step": 90820 }, { "epoch": 0.5802869810766262, "grad_norm": 0.7919756770133972, "learning_rate": 8.064141949983476e-05, "loss": 0.7874, "step": 90830 }, { "epoch": 0.580350868226365, "grad_norm": 0.6535720229148865, "learning_rate": 8.063745429045329e-05, "loss": 0.7333, "step": 90840 }, { "epoch": 0.5804147553761037, "grad_norm": 0.8120725750923157, "learning_rate": 8.063348877252614e-05, "loss": 0.7505, "step": 90850 }, { "epoch": 0.5804786425258424, "grad_norm": 0.7102304697036743, "learning_rate": 8.062952294609327e-05, "loss": 0.6825, "step": 90860 }, { "epoch": 0.5805425296755811, "grad_norm": 0.9454075694084167, "learning_rate": 8.062555681119459e-05, "loss": 0.6988, "step": 90870 }, { "epoch": 0.5806064168253198, "grad_norm": 1.1664451360702515, "learning_rate": 8.062159036787007e-05, "loss": 0.8987, "step": 90880 }, { "epoch": 0.5806703039750585, "grad_norm": 1.2764151096343994, "learning_rate": 8.061762361615964e-05, "loss": 0.9188, "step": 90890 }, { "epoch": 0.5807341911247972, "grad_norm": 1.244565725326538, "learning_rate": 8.061365655610325e-05, "loss": 1.1752, "step": 90900 }, { "epoch": 0.5807980782745359, "grad_norm": 0.8151182532310486, "learning_rate": 8.060968918774085e-05, "loss": 0.8404, "step": 90910 }, { "epoch": 0.5808619654242746, "grad_norm": 0.8376042246818542, "learning_rate": 8.06057215111124e-05, "loss": 0.8567, "step": 90920 }, { "epoch": 0.5809258525740133, "grad_norm": 1.4422600269317627, "learning_rate": 8.060175352625787e-05, "loss": 0.9805, "step": 90930 }, { "epoch": 0.580989739723752, "grad_norm": 0.6964714527130127, "learning_rate": 8.05977852332172e-05, "loss": 0.6621, "step": 90940 }, { "epoch": 0.5810536268734907, "grad_norm": 0.9028936624526978, "learning_rate": 8.059381663203036e-05, "loss": 0.9439, "step": 90950 }, { "epoch": 0.5811175140232294, "grad_norm": 1.128549575805664, "learning_rate": 8.058984772273733e-05, "loss": 0.7059, "step": 90960 }, { "epoch": 0.581181401172968, "grad_norm": 1.0536413192749023, "learning_rate": 8.058587850537804e-05, "loss": 0.8569, "step": 90970 }, { "epoch": 0.5812452883227067, "grad_norm": 0.8410016894340515, "learning_rate": 8.058190897999252e-05, "loss": 0.9881, "step": 90980 }, { "epoch": 0.5813091754724454, "grad_norm": 0.5887959003448486, "learning_rate": 8.057793914662071e-05, "loss": 1.2143, "step": 90990 }, { "epoch": 0.5813730626221841, "grad_norm": 0.9902825951576233, "learning_rate": 8.057396900530261e-05, "loss": 1.0165, "step": 91000 }, { "epoch": 0.5814369497719228, "grad_norm": 0.7618111371994019, "learning_rate": 8.056999855607819e-05, "loss": 0.9192, "step": 91010 }, { "epoch": 0.5815008369216615, "grad_norm": 1.465938925743103, "learning_rate": 8.056602779898742e-05, "loss": 1.0972, "step": 91020 }, { "epoch": 0.5815647240714003, "grad_norm": 0.6929851770401001, "learning_rate": 8.056205673407031e-05, "loss": 0.8941, "step": 91030 }, { "epoch": 0.581628611221139, "grad_norm": 0.7662091851234436, "learning_rate": 8.055808536136687e-05, "loss": 1.1148, "step": 91040 }, { "epoch": 0.5816924983708777, "grad_norm": 1.1561191082000732, "learning_rate": 8.055411368091706e-05, "loss": 0.9246, "step": 91050 }, { "epoch": 0.5817563855206164, "grad_norm": 1.0664466619491577, "learning_rate": 8.05501416927609e-05, "loss": 1.0493, "step": 91060 }, { "epoch": 0.5818202726703551, "grad_norm": 1.0375691652297974, "learning_rate": 8.054616939693837e-05, "loss": 0.8237, "step": 91070 }, { "epoch": 0.5818841598200938, "grad_norm": 1.251013159751892, "learning_rate": 8.054219679348949e-05, "loss": 0.9984, "step": 91080 }, { "epoch": 0.5819480469698325, "grad_norm": 0.7131451964378357, "learning_rate": 8.053822388245426e-05, "loss": 0.8158, "step": 91090 }, { "epoch": 0.5820119341195712, "grad_norm": 0.6554450988769531, "learning_rate": 8.053425066387271e-05, "loss": 0.988, "step": 91100 }, { "epoch": 0.5820758212693099, "grad_norm": 0.5957306623458862, "learning_rate": 8.053027713778484e-05, "loss": 0.8844, "step": 91110 }, { "epoch": 0.5821397084190486, "grad_norm": 2.745039463043213, "learning_rate": 8.052630330423066e-05, "loss": 0.8555, "step": 91120 }, { "epoch": 0.5822035955687873, "grad_norm": 1.19644033908844, "learning_rate": 8.05223291632502e-05, "loss": 1.1978, "step": 91130 }, { "epoch": 0.582267482718526, "grad_norm": 0.7143746614456177, "learning_rate": 8.051835471488347e-05, "loss": 1.0662, "step": 91140 }, { "epoch": 0.5823313698682647, "grad_norm": 0.6921029686927795, "learning_rate": 8.051437995917051e-05, "loss": 0.8375, "step": 91150 }, { "epoch": 0.5823952570180034, "grad_norm": 1.4767210483551025, "learning_rate": 8.051040489615136e-05, "loss": 0.9358, "step": 91160 }, { "epoch": 0.5824591441677421, "grad_norm": 1.0952929258346558, "learning_rate": 8.050642952586602e-05, "loss": 0.8761, "step": 91170 }, { "epoch": 0.5825230313174808, "grad_norm": 0.7604880332946777, "learning_rate": 8.050245384835455e-05, "loss": 0.9666, "step": 91180 }, { "epoch": 0.5825869184672195, "grad_norm": 0.6385078430175781, "learning_rate": 8.049847786365698e-05, "loss": 1.0319, "step": 91190 }, { "epoch": 0.5826508056169583, "grad_norm": 0.8328523635864258, "learning_rate": 8.049450157181336e-05, "loss": 0.6776, "step": 91200 }, { "epoch": 0.5827146927666969, "grad_norm": 0.9883635640144348, "learning_rate": 8.049052497286372e-05, "loss": 0.8049, "step": 91210 }, { "epoch": 0.5827785799164356, "grad_norm": 0.9479039907455444, "learning_rate": 8.048654806684812e-05, "loss": 0.8743, "step": 91220 }, { "epoch": 0.5828424670661743, "grad_norm": 1.1088539361953735, "learning_rate": 8.048257085380659e-05, "loss": 0.9272, "step": 91230 }, { "epoch": 0.582906354215913, "grad_norm": 0.6845352053642273, "learning_rate": 8.047859333377923e-05, "loss": 0.9951, "step": 91240 }, { "epoch": 0.5829702413656517, "grad_norm": 1.5272711515426636, "learning_rate": 8.047461550680606e-05, "loss": 0.8264, "step": 91250 }, { "epoch": 0.5830341285153904, "grad_norm": 0.7585494518280029, "learning_rate": 8.047063737292712e-05, "loss": 0.7845, "step": 91260 }, { "epoch": 0.5830980156651291, "grad_norm": 0.8442081809043884, "learning_rate": 8.046665893218253e-05, "loss": 0.8805, "step": 91270 }, { "epoch": 0.5831619028148678, "grad_norm": 0.7809866666793823, "learning_rate": 8.046268018461232e-05, "loss": 0.8469, "step": 91280 }, { "epoch": 0.5832257899646065, "grad_norm": 0.5992255806922913, "learning_rate": 8.045870113025655e-05, "loss": 0.9114, "step": 91290 }, { "epoch": 0.5832896771143452, "grad_norm": 0.9385843873023987, "learning_rate": 8.045472176915533e-05, "loss": 0.9167, "step": 91300 }, { "epoch": 0.5833535642640839, "grad_norm": 0.6832097172737122, "learning_rate": 8.04507421013487e-05, "loss": 1.0817, "step": 91310 }, { "epoch": 0.5834174514138226, "grad_norm": 0.5917803049087524, "learning_rate": 8.044676212687677e-05, "loss": 0.8989, "step": 91320 }, { "epoch": 0.5834813385635613, "grad_norm": 0.6751442551612854, "learning_rate": 8.04427818457796e-05, "loss": 0.8093, "step": 91330 }, { "epoch": 0.5835452257133, "grad_norm": 0.5237501859664917, "learning_rate": 8.043880125809727e-05, "loss": 0.8325, "step": 91340 }, { "epoch": 0.5836091128630387, "grad_norm": 1.9701563119888306, "learning_rate": 8.043482036386989e-05, "loss": 0.8434, "step": 91350 }, { "epoch": 0.5836730000127774, "grad_norm": 1.0748164653778076, "learning_rate": 8.043083916313752e-05, "loss": 0.8942, "step": 91360 }, { "epoch": 0.5837368871625161, "grad_norm": 0.7747710347175598, "learning_rate": 8.042685765594029e-05, "loss": 0.7678, "step": 91370 }, { "epoch": 0.5838007743122549, "grad_norm": 1.0838667154312134, "learning_rate": 8.042287584231828e-05, "loss": 0.9147, "step": 91380 }, { "epoch": 0.5838646614619936, "grad_norm": 1.229852557182312, "learning_rate": 8.041889372231159e-05, "loss": 1.0037, "step": 91390 }, { "epoch": 0.5839285486117323, "grad_norm": 1.2635694742202759, "learning_rate": 8.041491129596032e-05, "loss": 0.8378, "step": 91400 }, { "epoch": 0.583992435761471, "grad_norm": 1.1819652318954468, "learning_rate": 8.041092856330457e-05, "loss": 0.7489, "step": 91410 }, { "epoch": 0.5840563229112097, "grad_norm": 1.0836447477340698, "learning_rate": 8.040694552438448e-05, "loss": 0.9781, "step": 91420 }, { "epoch": 0.5841202100609484, "grad_norm": 2.449270248413086, "learning_rate": 8.040296217924014e-05, "loss": 0.9975, "step": 91430 }, { "epoch": 0.5841840972106871, "grad_norm": 0.9335359334945679, "learning_rate": 8.039897852791167e-05, "loss": 0.8676, "step": 91440 }, { "epoch": 0.5842479843604258, "grad_norm": 1.0198067426681519, "learning_rate": 8.039499457043918e-05, "loss": 0.9543, "step": 91450 }, { "epoch": 0.5843118715101644, "grad_norm": 0.7770729660987854, "learning_rate": 8.03910103068628e-05, "loss": 0.8293, "step": 91460 }, { "epoch": 0.5843757586599031, "grad_norm": 0.9211755990982056, "learning_rate": 8.038702573722266e-05, "loss": 0.9459, "step": 91470 }, { "epoch": 0.5844396458096418, "grad_norm": 0.6153119802474976, "learning_rate": 8.038304086155887e-05, "loss": 0.9555, "step": 91480 }, { "epoch": 0.5845035329593805, "grad_norm": 0.5979563593864441, "learning_rate": 8.037905567991158e-05, "loss": 0.906, "step": 91490 }, { "epoch": 0.5845674201091192, "grad_norm": 0.7104209065437317, "learning_rate": 8.037507019232091e-05, "loss": 0.7133, "step": 91500 }, { "epoch": 0.5846313072588579, "grad_norm": 0.8748192191123962, "learning_rate": 8.037108439882702e-05, "loss": 0.7568, "step": 91510 }, { "epoch": 0.5846951944085966, "grad_norm": 0.9937753081321716, "learning_rate": 8.036709829947003e-05, "loss": 0.7123, "step": 91520 }, { "epoch": 0.5847590815583353, "grad_norm": 1.2634817361831665, "learning_rate": 8.036311189429009e-05, "loss": 0.9345, "step": 91530 }, { "epoch": 0.584822968708074, "grad_norm": 0.8244264721870422, "learning_rate": 8.035912518332733e-05, "loss": 1.0218, "step": 91540 }, { "epoch": 0.5848868558578127, "grad_norm": 1.2409876585006714, "learning_rate": 8.035513816662194e-05, "loss": 0.9595, "step": 91550 }, { "epoch": 0.5849507430075515, "grad_norm": 0.9279502034187317, "learning_rate": 8.035115084421404e-05, "loss": 0.7282, "step": 91560 }, { "epoch": 0.5850146301572902, "grad_norm": 1.0995663404464722, "learning_rate": 8.034716321614377e-05, "loss": 1.0674, "step": 91570 }, { "epoch": 0.5850785173070289, "grad_norm": 1.0315444469451904, "learning_rate": 8.034317528245134e-05, "loss": 0.8631, "step": 91580 }, { "epoch": 0.5851424044567676, "grad_norm": 0.8768134117126465, "learning_rate": 8.033918704317686e-05, "loss": 0.7553, "step": 91590 }, { "epoch": 0.5852062916065063, "grad_norm": 1.049591064453125, "learning_rate": 8.033519849836055e-05, "loss": 0.979, "step": 91600 }, { "epoch": 0.585270178756245, "grad_norm": 0.7832081317901611, "learning_rate": 8.033120964804252e-05, "loss": 0.8019, "step": 91610 }, { "epoch": 0.5853340659059837, "grad_norm": 1.217409372329712, "learning_rate": 8.0327220492263e-05, "loss": 0.96, "step": 91620 }, { "epoch": 0.5853979530557224, "grad_norm": 1.7218462228775024, "learning_rate": 8.03232310310621e-05, "loss": 1.3111, "step": 91630 }, { "epoch": 0.5854618402054611, "grad_norm": 0.9196959733963013, "learning_rate": 8.031924126448005e-05, "loss": 0.9832, "step": 91640 }, { "epoch": 0.5855257273551998, "grad_norm": 0.9768834114074707, "learning_rate": 8.031525119255701e-05, "loss": 0.9437, "step": 91650 }, { "epoch": 0.5855896145049385, "grad_norm": 0.707797646522522, "learning_rate": 8.031126081533315e-05, "loss": 0.7751, "step": 91660 }, { "epoch": 0.5856535016546772, "grad_norm": 3.0832972526550293, "learning_rate": 8.030727013284868e-05, "loss": 0.8823, "step": 91670 }, { "epoch": 0.5857173888044159, "grad_norm": 0.8001325130462646, "learning_rate": 8.030327914514377e-05, "loss": 0.9358, "step": 91680 }, { "epoch": 0.5857812759541546, "grad_norm": 2.1798999309539795, "learning_rate": 8.029928785225864e-05, "loss": 0.8331, "step": 91690 }, { "epoch": 0.5858451631038932, "grad_norm": 0.8021385669708252, "learning_rate": 8.029529625423347e-05, "loss": 0.9211, "step": 91700 }, { "epoch": 0.5859090502536319, "grad_norm": 0.7000755071640015, "learning_rate": 8.029130435110844e-05, "loss": 0.8239, "step": 91710 }, { "epoch": 0.5859729374033706, "grad_norm": 0.9345866441726685, "learning_rate": 8.028731214292377e-05, "loss": 0.8656, "step": 91720 }, { "epoch": 0.5860368245531093, "grad_norm": 0.9513382911682129, "learning_rate": 8.02833196297197e-05, "loss": 0.8464, "step": 91730 }, { "epoch": 0.586100711702848, "grad_norm": 1.2676148414611816, "learning_rate": 8.027932681153636e-05, "loss": 0.9357, "step": 91740 }, { "epoch": 0.5861645988525868, "grad_norm": 0.7879144549369812, "learning_rate": 8.027533368841402e-05, "loss": 1.1884, "step": 91750 }, { "epoch": 0.5862284860023255, "grad_norm": 0.6441530585289001, "learning_rate": 8.027134026039288e-05, "loss": 1.0553, "step": 91760 }, { "epoch": 0.5862923731520642, "grad_norm": 2.0362207889556885, "learning_rate": 8.026734652751316e-05, "loss": 0.751, "step": 91770 }, { "epoch": 0.5863562603018029, "grad_norm": 0.9429267644882202, "learning_rate": 8.026335248981506e-05, "loss": 0.7756, "step": 91780 }, { "epoch": 0.5864201474515416, "grad_norm": 1.2869027853012085, "learning_rate": 8.025935814733883e-05, "loss": 0.7654, "step": 91790 }, { "epoch": 0.5864840346012803, "grad_norm": 1.0939258337020874, "learning_rate": 8.025536350012468e-05, "loss": 0.9738, "step": 91800 }, { "epoch": 0.586547921751019, "grad_norm": 0.5842766761779785, "learning_rate": 8.025136854821285e-05, "loss": 0.7524, "step": 91810 }, { "epoch": 0.5866118089007577, "grad_norm": 0.8317599892616272, "learning_rate": 8.024737329164356e-05, "loss": 1.1187, "step": 91820 }, { "epoch": 0.5866756960504964, "grad_norm": 0.8482229113578796, "learning_rate": 8.024337773045704e-05, "loss": 0.6012, "step": 91830 }, { "epoch": 0.5867395832002351, "grad_norm": 0.7396560311317444, "learning_rate": 8.023938186469357e-05, "loss": 0.9791, "step": 91840 }, { "epoch": 0.5868034703499738, "grad_norm": 0.7632973790168762, "learning_rate": 8.023538569439335e-05, "loss": 0.9775, "step": 91850 }, { "epoch": 0.5868673574997125, "grad_norm": 1.459350347518921, "learning_rate": 8.023138921959665e-05, "loss": 0.7903, "step": 91860 }, { "epoch": 0.5869312446494512, "grad_norm": 0.5813467502593994, "learning_rate": 8.022739244034369e-05, "loss": 0.8206, "step": 91870 }, { "epoch": 0.5869951317991899, "grad_norm": 0.9439472556114197, "learning_rate": 8.022339535667476e-05, "loss": 0.6998, "step": 91880 }, { "epoch": 0.5870590189489286, "grad_norm": 1.054968237876892, "learning_rate": 8.021939796863007e-05, "loss": 0.8882, "step": 91890 }, { "epoch": 0.5871229060986674, "grad_norm": 0.7589655518531799, "learning_rate": 8.021540027624991e-05, "loss": 1.0338, "step": 91900 }, { "epoch": 0.5871867932484061, "grad_norm": 5.119521617889404, "learning_rate": 8.021140227957451e-05, "loss": 1.1161, "step": 91910 }, { "epoch": 0.5872506803981448, "grad_norm": 0.9572505354881287, "learning_rate": 8.020740397864418e-05, "loss": 0.9184, "step": 91920 }, { "epoch": 0.5873145675478835, "grad_norm": 1.0495151281356812, "learning_rate": 8.020340537349915e-05, "loss": 0.8544, "step": 91930 }, { "epoch": 0.5873784546976221, "grad_norm": 0.8135344386100769, "learning_rate": 8.019940646417969e-05, "loss": 0.8227, "step": 91940 }, { "epoch": 0.5874423418473608, "grad_norm": 1.2222908735275269, "learning_rate": 8.019540725072609e-05, "loss": 1.034, "step": 91950 }, { "epoch": 0.5875062289970995, "grad_norm": 0.953247606754303, "learning_rate": 8.019140773317862e-05, "loss": 0.9178, "step": 91960 }, { "epoch": 0.5875701161468382, "grad_norm": 0.7658291459083557, "learning_rate": 8.018740791157755e-05, "loss": 0.7629, "step": 91970 }, { "epoch": 0.5876340032965769, "grad_norm": 1.0047904253005981, "learning_rate": 8.018340778596316e-05, "loss": 0.9189, "step": 91980 }, { "epoch": 0.5876978904463156, "grad_norm": 0.9360259771347046, "learning_rate": 8.017940735637574e-05, "loss": 1.0436, "step": 91990 }, { "epoch": 0.5877617775960543, "grad_norm": 0.74342942237854, "learning_rate": 8.017540662285558e-05, "loss": 0.6901, "step": 92000 }, { "epoch": 0.587825664745793, "grad_norm": 0.7133846879005432, "learning_rate": 8.017140558544299e-05, "loss": 0.9163, "step": 92010 }, { "epoch": 0.5878895518955317, "grad_norm": 1.2013561725616455, "learning_rate": 8.016740424417822e-05, "loss": 0.8493, "step": 92020 }, { "epoch": 0.5879534390452704, "grad_norm": 1.1158215999603271, "learning_rate": 8.01634025991016e-05, "loss": 0.9638, "step": 92030 }, { "epoch": 0.5880173261950091, "grad_norm": 0.8271144032478333, "learning_rate": 8.015940065025343e-05, "loss": 0.7683, "step": 92040 }, { "epoch": 0.5880812133447478, "grad_norm": 1.0541661977767944, "learning_rate": 8.015539839767399e-05, "loss": 0.8228, "step": 92050 }, { "epoch": 0.5881451004944865, "grad_norm": 1.1830748319625854, "learning_rate": 8.01513958414036e-05, "loss": 0.8117, "step": 92060 }, { "epoch": 0.5882089876442252, "grad_norm": 0.5275201201438904, "learning_rate": 8.014739298148258e-05, "loss": 0.7335, "step": 92070 }, { "epoch": 0.588272874793964, "grad_norm": 0.9330576062202454, "learning_rate": 8.014338981795122e-05, "loss": 0.8961, "step": 92080 }, { "epoch": 0.5883367619437027, "grad_norm": 1.2388197183609009, "learning_rate": 8.013938635084983e-05, "loss": 0.697, "step": 92090 }, { "epoch": 0.5884006490934414, "grad_norm": 1.1590933799743652, "learning_rate": 8.013538258021877e-05, "loss": 1.1775, "step": 92100 }, { "epoch": 0.5884645362431801, "grad_norm": 1.2639012336730957, "learning_rate": 8.013137850609833e-05, "loss": 0.7401, "step": 92110 }, { "epoch": 0.5885284233929188, "grad_norm": 0.6682813763618469, "learning_rate": 8.012737412852886e-05, "loss": 0.8262, "step": 92120 }, { "epoch": 0.5885923105426575, "grad_norm": 0.7417098879814148, "learning_rate": 8.012336944755064e-05, "loss": 1.0828, "step": 92130 }, { "epoch": 0.5886561976923962, "grad_norm": 0.7538353800773621, "learning_rate": 8.011936446320405e-05, "loss": 0.647, "step": 92140 }, { "epoch": 0.5887200848421349, "grad_norm": 0.7363274097442627, "learning_rate": 8.01153591755294e-05, "loss": 1.1043, "step": 92150 }, { "epoch": 0.5887839719918736, "grad_norm": 0.6675977110862732, "learning_rate": 8.011135358456701e-05, "loss": 0.9313, "step": 92160 }, { "epoch": 0.5888478591416123, "grad_norm": 0.7670975923538208, "learning_rate": 8.010734769035726e-05, "loss": 0.8746, "step": 92170 }, { "epoch": 0.588911746291351, "grad_norm": 1.1923779249191284, "learning_rate": 8.010334149294045e-05, "loss": 1.0088, "step": 92180 }, { "epoch": 0.5889756334410896, "grad_norm": 0.8132577538490295, "learning_rate": 8.009933499235698e-05, "loss": 1.0256, "step": 92190 }, { "epoch": 0.5890395205908283, "grad_norm": 0.7252603769302368, "learning_rate": 8.009532818864714e-05, "loss": 0.7461, "step": 92200 }, { "epoch": 0.589103407740567, "grad_norm": 1.4953992366790771, "learning_rate": 8.009132108185132e-05, "loss": 1.3034, "step": 92210 }, { "epoch": 0.5891672948903057, "grad_norm": 0.7073407769203186, "learning_rate": 8.008731367200988e-05, "loss": 0.9938, "step": 92220 }, { "epoch": 0.5892311820400444, "grad_norm": 1.0197674036026, "learning_rate": 8.008330595916314e-05, "loss": 0.9457, "step": 92230 }, { "epoch": 0.5892950691897831, "grad_norm": 0.8998727798461914, "learning_rate": 8.00792979433515e-05, "loss": 0.7637, "step": 92240 }, { "epoch": 0.5893589563395218, "grad_norm": 0.5019026398658752, "learning_rate": 8.007528962461527e-05, "loss": 0.9488, "step": 92250 }, { "epoch": 0.5894228434892606, "grad_norm": 1.0908100605010986, "learning_rate": 8.007128100299491e-05, "loss": 1.1868, "step": 92260 }, { "epoch": 0.5894867306389993, "grad_norm": 1.2412331104278564, "learning_rate": 8.006727207853069e-05, "loss": 0.7634, "step": 92270 }, { "epoch": 0.589550617788738, "grad_norm": 0.8074179291725159, "learning_rate": 8.006326285126305e-05, "loss": 0.8855, "step": 92280 }, { "epoch": 0.5896145049384767, "grad_norm": 1.471113920211792, "learning_rate": 8.005925332123235e-05, "loss": 0.7663, "step": 92290 }, { "epoch": 0.5896783920882154, "grad_norm": 0.7869247794151306, "learning_rate": 8.005524348847894e-05, "loss": 0.9511, "step": 92300 }, { "epoch": 0.5897422792379541, "grad_norm": 1.1059610843658447, "learning_rate": 8.005123335304322e-05, "loss": 0.9348, "step": 92310 }, { "epoch": 0.5898061663876928, "grad_norm": 1.1069176197052002, "learning_rate": 8.004722291496562e-05, "loss": 0.8676, "step": 92320 }, { "epoch": 0.5898700535374315, "grad_norm": 0.5974422693252563, "learning_rate": 8.004321217428647e-05, "loss": 0.7969, "step": 92330 }, { "epoch": 0.5899339406871702, "grad_norm": 1.1670259237289429, "learning_rate": 8.003920113104618e-05, "loss": 0.8566, "step": 92340 }, { "epoch": 0.5899978278369089, "grad_norm": 0.9760884642601013, "learning_rate": 8.003518978528515e-05, "loss": 0.8049, "step": 92350 }, { "epoch": 0.5900617149866476, "grad_norm": 0.7791697978973389, "learning_rate": 8.003117813704378e-05, "loss": 0.6515, "step": 92360 }, { "epoch": 0.5901256021363863, "grad_norm": 0.8998212218284607, "learning_rate": 8.002716618636245e-05, "loss": 0.9429, "step": 92370 }, { "epoch": 0.590189489286125, "grad_norm": 0.9141538739204407, "learning_rate": 8.00231539332816e-05, "loss": 0.8178, "step": 92380 }, { "epoch": 0.5902533764358637, "grad_norm": 0.765386164188385, "learning_rate": 8.001914137784161e-05, "loss": 0.973, "step": 92390 }, { "epoch": 0.5903172635856024, "grad_norm": 0.7694385647773743, "learning_rate": 8.00151285200829e-05, "loss": 0.7036, "step": 92400 }, { "epoch": 0.5903811507353411, "grad_norm": 1.3476502895355225, "learning_rate": 8.001111536004586e-05, "loss": 0.7961, "step": 92410 }, { "epoch": 0.5904450378850798, "grad_norm": 1.1224573850631714, "learning_rate": 8.000710189777094e-05, "loss": 0.8736, "step": 92420 }, { "epoch": 0.5905089250348184, "grad_norm": 0.8447276949882507, "learning_rate": 8.000308813329855e-05, "loss": 1.0167, "step": 92430 }, { "epoch": 0.5905728121845571, "grad_norm": 2.5802526473999023, "learning_rate": 7.999907406666909e-05, "loss": 0.8453, "step": 92440 }, { "epoch": 0.5906366993342959, "grad_norm": 0.7821781635284424, "learning_rate": 7.999505969792302e-05, "loss": 0.8587, "step": 92450 }, { "epoch": 0.5907005864840346, "grad_norm": 0.9936961531639099, "learning_rate": 7.999104502710074e-05, "loss": 1.1057, "step": 92460 }, { "epoch": 0.5907644736337733, "grad_norm": 0.6793760061264038, "learning_rate": 7.998703005424268e-05, "loss": 1.0557, "step": 92470 }, { "epoch": 0.590828360783512, "grad_norm": 1.253307819366455, "learning_rate": 7.99830147793893e-05, "loss": 0.7205, "step": 92480 }, { "epoch": 0.5908922479332507, "grad_norm": 0.8064923882484436, "learning_rate": 7.997899920258101e-05, "loss": 0.7813, "step": 92490 }, { "epoch": 0.5909561350829894, "grad_norm": 1.2392529249191284, "learning_rate": 7.997498332385827e-05, "loss": 0.9077, "step": 92500 }, { "epoch": 0.5910200222327281, "grad_norm": 1.3533644676208496, "learning_rate": 7.997096714326151e-05, "loss": 0.8915, "step": 92510 }, { "epoch": 0.5910839093824668, "grad_norm": 0.8305091261863708, "learning_rate": 7.99669506608312e-05, "loss": 1.0454, "step": 92520 }, { "epoch": 0.5911477965322055, "grad_norm": 0.884864866733551, "learning_rate": 7.996293387660776e-05, "loss": 0.9556, "step": 92530 }, { "epoch": 0.5912116836819442, "grad_norm": 0.6807804703712463, "learning_rate": 7.995891679063165e-05, "loss": 1.0857, "step": 92540 }, { "epoch": 0.5912755708316829, "grad_norm": 1.4398140907287598, "learning_rate": 7.995489940294333e-05, "loss": 0.8875, "step": 92550 }, { "epoch": 0.5913394579814216, "grad_norm": 0.9925829768180847, "learning_rate": 7.995088171358325e-05, "loss": 1.038, "step": 92560 }, { "epoch": 0.5914033451311603, "grad_norm": 0.7312915325164795, "learning_rate": 7.99468637225919e-05, "loss": 1.0752, "step": 92570 }, { "epoch": 0.591467232280899, "grad_norm": 1.0309982299804688, "learning_rate": 7.994284543000972e-05, "loss": 0.8225, "step": 92580 }, { "epoch": 0.5915311194306377, "grad_norm": 0.844560980796814, "learning_rate": 7.993882683587717e-05, "loss": 0.8288, "step": 92590 }, { "epoch": 0.5915950065803764, "grad_norm": 1.377153754234314, "learning_rate": 7.993480794023473e-05, "loss": 0.8974, "step": 92600 }, { "epoch": 0.5916588937301152, "grad_norm": 1.0687837600708008, "learning_rate": 7.99307887431229e-05, "loss": 0.6578, "step": 92610 }, { "epoch": 0.5917227808798539, "grad_norm": 1.0184049606323242, "learning_rate": 7.992676924458212e-05, "loss": 0.7034, "step": 92620 }, { "epoch": 0.5917866680295926, "grad_norm": 1.7974580526351929, "learning_rate": 7.992274944465287e-05, "loss": 0.8216, "step": 92630 }, { "epoch": 0.5918505551793313, "grad_norm": 0.9643707871437073, "learning_rate": 7.991872934337568e-05, "loss": 0.9556, "step": 92640 }, { "epoch": 0.59191444232907, "grad_norm": 2.1205992698669434, "learning_rate": 7.991470894079098e-05, "loss": 1.1122, "step": 92650 }, { "epoch": 0.5919783294788087, "grad_norm": 0.7959349751472473, "learning_rate": 7.991068823693928e-05, "loss": 0.892, "step": 92660 }, { "epoch": 0.5920422166285473, "grad_norm": 0.8903279900550842, "learning_rate": 7.990666723186107e-05, "loss": 0.909, "step": 92670 }, { "epoch": 0.592106103778286, "grad_norm": 0.8091008067131042, "learning_rate": 7.990264592559686e-05, "loss": 1.0945, "step": 92680 }, { "epoch": 0.5921699909280247, "grad_norm": 0.8795812726020813, "learning_rate": 7.989862431818713e-05, "loss": 1.0445, "step": 92690 }, { "epoch": 0.5922338780777634, "grad_norm": 0.7716434001922607, "learning_rate": 7.989460240967239e-05, "loss": 1.0517, "step": 92700 }, { "epoch": 0.5922977652275021, "grad_norm": 0.9718101024627686, "learning_rate": 7.989058020009315e-05, "loss": 0.9155, "step": 92710 }, { "epoch": 0.5923616523772408, "grad_norm": 1.8687045574188232, "learning_rate": 7.98865576894899e-05, "loss": 0.8678, "step": 92720 }, { "epoch": 0.5924255395269795, "grad_norm": 0.5522985458374023, "learning_rate": 7.988253487790315e-05, "loss": 0.9144, "step": 92730 }, { "epoch": 0.5924894266767182, "grad_norm": 0.9412902593612671, "learning_rate": 7.987851176537342e-05, "loss": 0.7785, "step": 92740 }, { "epoch": 0.5925533138264569, "grad_norm": 0.5858872532844543, "learning_rate": 7.987448835194124e-05, "loss": 0.7684, "step": 92750 }, { "epoch": 0.5926172009761956, "grad_norm": 0.7545718550682068, "learning_rate": 7.987046463764712e-05, "loss": 0.8157, "step": 92760 }, { "epoch": 0.5926810881259343, "grad_norm": 1.0280770063400269, "learning_rate": 7.986644062253157e-05, "loss": 0.8308, "step": 92770 }, { "epoch": 0.592744975275673, "grad_norm": 0.6888710260391235, "learning_rate": 7.986241630663512e-05, "loss": 0.827, "step": 92780 }, { "epoch": 0.5928088624254118, "grad_norm": 0.7648938298225403, "learning_rate": 7.985839168999831e-05, "loss": 0.7851, "step": 92790 }, { "epoch": 0.5928727495751505, "grad_norm": 1.144452452659607, "learning_rate": 7.985436677266166e-05, "loss": 0.8219, "step": 92800 }, { "epoch": 0.5929366367248892, "grad_norm": 0.9473939538002014, "learning_rate": 7.985034155466572e-05, "loss": 0.9113, "step": 92810 }, { "epoch": 0.5930005238746279, "grad_norm": 1.33318030834198, "learning_rate": 7.984631603605102e-05, "loss": 0.7248, "step": 92820 }, { "epoch": 0.5930644110243666, "grad_norm": 1.1216987371444702, "learning_rate": 7.984229021685807e-05, "loss": 0.7527, "step": 92830 }, { "epoch": 0.5931282981741053, "grad_norm": 1.0254566669464111, "learning_rate": 7.983826409712747e-05, "loss": 1.0293, "step": 92840 }, { "epoch": 0.593192185323844, "grad_norm": 0.7557952404022217, "learning_rate": 7.983423767689972e-05, "loss": 0.8593, "step": 92850 }, { "epoch": 0.5932560724735827, "grad_norm": 0.8302872180938721, "learning_rate": 7.983021095621539e-05, "loss": 0.8756, "step": 92860 }, { "epoch": 0.5933199596233214, "grad_norm": 0.7966361045837402, "learning_rate": 7.982618393511503e-05, "loss": 0.8578, "step": 92870 }, { "epoch": 0.5933838467730601, "grad_norm": 1.1069227457046509, "learning_rate": 7.982215661363918e-05, "loss": 0.7703, "step": 92880 }, { "epoch": 0.5934477339227988, "grad_norm": 0.5603930354118347, "learning_rate": 7.981812899182844e-05, "loss": 0.8665, "step": 92890 }, { "epoch": 0.5935116210725375, "grad_norm": 1.1370865106582642, "learning_rate": 7.981410106972333e-05, "loss": 0.8621, "step": 92900 }, { "epoch": 0.5935755082222761, "grad_norm": 1.1741241216659546, "learning_rate": 7.981007284736442e-05, "loss": 1.0893, "step": 92910 }, { "epoch": 0.5936393953720148, "grad_norm": 1.065045714378357, "learning_rate": 7.98060443247923e-05, "loss": 1.0186, "step": 92920 }, { "epoch": 0.5937032825217535, "grad_norm": 0.7145927548408508, "learning_rate": 7.980201550204753e-05, "loss": 0.9665, "step": 92930 }, { "epoch": 0.5937671696714922, "grad_norm": 0.7131385803222656, "learning_rate": 7.979798637917068e-05, "loss": 0.9271, "step": 92940 }, { "epoch": 0.5938310568212309, "grad_norm": 2.897143840789795, "learning_rate": 7.979395695620234e-05, "loss": 0.9113, "step": 92950 }, { "epoch": 0.5938949439709696, "grad_norm": 0.9042292237281799, "learning_rate": 7.978992723318305e-05, "loss": 0.869, "step": 92960 }, { "epoch": 0.5939588311207084, "grad_norm": 0.5825813412666321, "learning_rate": 7.978589721015343e-05, "loss": 1.1739, "step": 92970 }, { "epoch": 0.5940227182704471, "grad_norm": 1.031434416770935, "learning_rate": 7.978186688715406e-05, "loss": 0.817, "step": 92980 }, { "epoch": 0.5940866054201858, "grad_norm": 0.81146639585495, "learning_rate": 7.977783626422553e-05, "loss": 1.0523, "step": 92990 }, { "epoch": 0.5941504925699245, "grad_norm": 1.5747389793395996, "learning_rate": 7.977380534140843e-05, "loss": 0.7111, "step": 93000 }, { "epoch": 0.5942143797196632, "grad_norm": 0.8901327848434448, "learning_rate": 7.976977411874334e-05, "loss": 0.8639, "step": 93010 }, { "epoch": 0.5942782668694019, "grad_norm": 0.6581230163574219, "learning_rate": 7.976574259627087e-05, "loss": 0.7677, "step": 93020 }, { "epoch": 0.5943421540191406, "grad_norm": 0.660140335559845, "learning_rate": 7.976171077403163e-05, "loss": 1.058, "step": 93030 }, { "epoch": 0.5944060411688793, "grad_norm": 1.2601295709609985, "learning_rate": 7.97576786520662e-05, "loss": 1.0485, "step": 93040 }, { "epoch": 0.594469928318618, "grad_norm": 0.7179937362670898, "learning_rate": 7.975364623041523e-05, "loss": 0.7853, "step": 93050 }, { "epoch": 0.5945338154683567, "grad_norm": 1.2381994724273682, "learning_rate": 7.974961350911926e-05, "loss": 0.8814, "step": 93060 }, { "epoch": 0.5945977026180954, "grad_norm": 0.8336678743362427, "learning_rate": 7.974558048821898e-05, "loss": 0.8024, "step": 93070 }, { "epoch": 0.5946615897678341, "grad_norm": 0.6095098257064819, "learning_rate": 7.974154716775497e-05, "loss": 1.0271, "step": 93080 }, { "epoch": 0.5947254769175728, "grad_norm": 0.7402802109718323, "learning_rate": 7.973791692324393e-05, "loss": 1.0233, "step": 93090 }, { "epoch": 0.5947893640673115, "grad_norm": 1.3775721788406372, "learning_rate": 7.973388303372073e-05, "loss": 0.8029, "step": 93100 }, { "epoch": 0.5948532512170502, "grad_norm": 1.3130244016647339, "learning_rate": 7.972984884475162e-05, "loss": 0.7344, "step": 93110 }, { "epoch": 0.594917138366789, "grad_norm": 1.00970458984375, "learning_rate": 7.97258143563772e-05, "loss": 0.871, "step": 93120 }, { "epoch": 0.5949810255165277, "grad_norm": 0.5588665008544922, "learning_rate": 7.972177956863811e-05, "loss": 0.8927, "step": 93130 }, { "epoch": 0.5950449126662664, "grad_norm": 0.5917448997497559, "learning_rate": 7.971774448157499e-05, "loss": 0.922, "step": 93140 }, { "epoch": 0.5951087998160051, "grad_norm": 0.8795225620269775, "learning_rate": 7.971370909522847e-05, "loss": 1.1024, "step": 93150 }, { "epoch": 0.5951726869657437, "grad_norm": 0.7276126146316528, "learning_rate": 7.97096734096392e-05, "loss": 0.9061, "step": 93160 }, { "epoch": 0.5952365741154824, "grad_norm": 1.1168190240859985, "learning_rate": 7.970563742484782e-05, "loss": 0.8808, "step": 93170 }, { "epoch": 0.5953004612652211, "grad_norm": 0.7256569266319275, "learning_rate": 7.970160114089496e-05, "loss": 0.8881, "step": 93180 }, { "epoch": 0.5953643484149598, "grad_norm": 1.2579829692840576, "learning_rate": 7.969756455782129e-05, "loss": 0.986, "step": 93190 }, { "epoch": 0.5954282355646985, "grad_norm": 1.1053801774978638, "learning_rate": 7.969352767566744e-05, "loss": 0.9049, "step": 93200 }, { "epoch": 0.5954921227144372, "grad_norm": 1.0768349170684814, "learning_rate": 7.96894904944741e-05, "loss": 0.9947, "step": 93210 }, { "epoch": 0.5955560098641759, "grad_norm": 0.8521894812583923, "learning_rate": 7.968545301428188e-05, "loss": 0.7064, "step": 93220 }, { "epoch": 0.5956198970139146, "grad_norm": 0.9622153639793396, "learning_rate": 7.968141523513149e-05, "loss": 0.8189, "step": 93230 }, { "epoch": 0.5956837841636533, "grad_norm": 1.014563798904419, "learning_rate": 7.967737715706354e-05, "loss": 1.0966, "step": 93240 }, { "epoch": 0.595747671313392, "grad_norm": 1.3811198472976685, "learning_rate": 7.967333878011875e-05, "loss": 1.043, "step": 93250 }, { "epoch": 0.5958115584631307, "grad_norm": 0.7607221007347107, "learning_rate": 7.966930010433777e-05, "loss": 0.9357, "step": 93260 }, { "epoch": 0.5958754456128694, "grad_norm": 0.9977759122848511, "learning_rate": 7.966526112976126e-05, "loss": 1.0732, "step": 93270 }, { "epoch": 0.5959393327626081, "grad_norm": 1.0473393201828003, "learning_rate": 7.966122185642992e-05, "loss": 0.9741, "step": 93280 }, { "epoch": 0.5960032199123468, "grad_norm": 1.254073977470398, "learning_rate": 7.965718228438442e-05, "loss": 0.7127, "step": 93290 }, { "epoch": 0.5960671070620855, "grad_norm": 0.9612134099006653, "learning_rate": 7.965314241366542e-05, "loss": 0.8608, "step": 93300 }, { "epoch": 0.5961309942118242, "grad_norm": 1.8050369024276733, "learning_rate": 7.964910224431361e-05, "loss": 0.6996, "step": 93310 }, { "epoch": 0.596194881361563, "grad_norm": 1.1563001871109009, "learning_rate": 7.96450617763697e-05, "loss": 0.7266, "step": 93320 }, { "epoch": 0.5962587685113017, "grad_norm": 1.396944284439087, "learning_rate": 7.964102100987439e-05, "loss": 1.2127, "step": 93330 }, { "epoch": 0.5963226556610404, "grad_norm": 1.3693512678146362, "learning_rate": 7.963697994486834e-05, "loss": 0.8019, "step": 93340 }, { "epoch": 0.5963865428107791, "grad_norm": 0.8561686277389526, "learning_rate": 7.963293858139227e-05, "loss": 1.2762, "step": 93350 }, { "epoch": 0.5964504299605178, "grad_norm": 1.4842015504837036, "learning_rate": 7.962889691948687e-05, "loss": 1.0344, "step": 93360 }, { "epoch": 0.5965143171102565, "grad_norm": 0.6768175363540649, "learning_rate": 7.962485495919285e-05, "loss": 0.7479, "step": 93370 }, { "epoch": 0.5965782042599952, "grad_norm": 0.8603546023368835, "learning_rate": 7.96208127005509e-05, "loss": 0.7013, "step": 93380 }, { "epoch": 0.5966420914097339, "grad_norm": 0.8126310706138611, "learning_rate": 7.961677014360174e-05, "loss": 0.9827, "step": 93390 }, { "epoch": 0.5967059785594725, "grad_norm": 0.7931829690933228, "learning_rate": 7.961272728838609e-05, "loss": 0.8245, "step": 93400 }, { "epoch": 0.5967698657092112, "grad_norm": 0.7296050786972046, "learning_rate": 7.960868413494465e-05, "loss": 0.7299, "step": 93410 }, { "epoch": 0.5968337528589499, "grad_norm": 1.148210883140564, "learning_rate": 7.960464068331814e-05, "loss": 0.9199, "step": 93420 }, { "epoch": 0.5968976400086886, "grad_norm": 1.0481257438659668, "learning_rate": 7.960059693354731e-05, "loss": 0.8088, "step": 93430 }, { "epoch": 0.5969615271584273, "grad_norm": 0.6699005365371704, "learning_rate": 7.959655288567285e-05, "loss": 1.055, "step": 93440 }, { "epoch": 0.597025414308166, "grad_norm": 1.0113779306411743, "learning_rate": 7.959250853973549e-05, "loss": 1.0175, "step": 93450 }, { "epoch": 0.5970893014579047, "grad_norm": 1.1636087894439697, "learning_rate": 7.958846389577597e-05, "loss": 0.8509, "step": 93460 }, { "epoch": 0.5971531886076434, "grad_norm": 0.8707061409950256, "learning_rate": 7.958441895383503e-05, "loss": 1.0673, "step": 93470 }, { "epoch": 0.5972170757573821, "grad_norm": 0.4944153130054474, "learning_rate": 7.95803737139534e-05, "loss": 0.8289, "step": 93480 }, { "epoch": 0.5972809629071208, "grad_norm": 0.9866294860839844, "learning_rate": 7.95763281761718e-05, "loss": 0.9177, "step": 93490 }, { "epoch": 0.5973448500568596, "grad_norm": 0.7539530992507935, "learning_rate": 7.957228234053099e-05, "loss": 0.6764, "step": 93500 }, { "epoch": 0.5974087372065983, "grad_norm": 0.793980062007904, "learning_rate": 7.956823620707172e-05, "loss": 0.9874, "step": 93510 }, { "epoch": 0.597472624356337, "grad_norm": 0.760221004486084, "learning_rate": 7.956418977583474e-05, "loss": 0.8189, "step": 93520 }, { "epoch": 0.5975365115060757, "grad_norm": 0.4851871728897095, "learning_rate": 7.956014304686076e-05, "loss": 0.9784, "step": 93530 }, { "epoch": 0.5976003986558144, "grad_norm": 0.6846779584884644, "learning_rate": 7.95560960201906e-05, "loss": 0.796, "step": 93540 }, { "epoch": 0.5976642858055531, "grad_norm": 0.6320601105690002, "learning_rate": 7.955204869586497e-05, "loss": 0.9733, "step": 93550 }, { "epoch": 0.5977281729552918, "grad_norm": 1.2107212543487549, "learning_rate": 7.954800107392463e-05, "loss": 0.928, "step": 93560 }, { "epoch": 0.5977920601050305, "grad_norm": 1.3490923643112183, "learning_rate": 7.954395315441039e-05, "loss": 1.0885, "step": 93570 }, { "epoch": 0.5978559472547692, "grad_norm": 0.724120020866394, "learning_rate": 7.953990493736296e-05, "loss": 0.7475, "step": 93580 }, { "epoch": 0.5979198344045079, "grad_norm": 1.5954548120498657, "learning_rate": 7.953585642282314e-05, "loss": 1.1069, "step": 93590 }, { "epoch": 0.5979837215542466, "grad_norm": 1.4548444747924805, "learning_rate": 7.953180761083169e-05, "loss": 1.0161, "step": 93600 }, { "epoch": 0.5980476087039853, "grad_norm": 0.6197888851165771, "learning_rate": 7.952775850142939e-05, "loss": 0.7571, "step": 93610 }, { "epoch": 0.598111495853724, "grad_norm": 0.7104484438896179, "learning_rate": 7.952370909465702e-05, "loss": 0.8744, "step": 93620 }, { "epoch": 0.5981753830034627, "grad_norm": 0.6192456483840942, "learning_rate": 7.951965939055535e-05, "loss": 1.1207, "step": 93630 }, { "epoch": 0.5982392701532013, "grad_norm": 1.01494300365448, "learning_rate": 7.951560938916517e-05, "loss": 0.7774, "step": 93640 }, { "epoch": 0.59830315730294, "grad_norm": 1.5885846614837646, "learning_rate": 7.951155909052727e-05, "loss": 0.7792, "step": 93650 }, { "epoch": 0.5983670444526787, "grad_norm": 0.9878436326980591, "learning_rate": 7.950750849468245e-05, "loss": 0.8927, "step": 93660 }, { "epoch": 0.5984309316024174, "grad_norm": 0.7260859608650208, "learning_rate": 7.950345760167148e-05, "loss": 0.9279, "step": 93670 }, { "epoch": 0.5984948187521562, "grad_norm": 0.9209891557693481, "learning_rate": 7.949940641153517e-05, "loss": 0.7823, "step": 93680 }, { "epoch": 0.5985587059018949, "grad_norm": 0.6021539568901062, "learning_rate": 7.949535492431433e-05, "loss": 0.8444, "step": 93690 }, { "epoch": 0.5986225930516336, "grad_norm": 1.3056902885437012, "learning_rate": 7.949130314004974e-05, "loss": 0.812, "step": 93700 }, { "epoch": 0.5986864802013723, "grad_norm": 0.8002413511276245, "learning_rate": 7.948725105878221e-05, "loss": 0.9184, "step": 93710 }, { "epoch": 0.598750367351111, "grad_norm": 1.4868652820587158, "learning_rate": 7.948319868055254e-05, "loss": 0.8872, "step": 93720 }, { "epoch": 0.5988142545008497, "grad_norm": 0.8618303537368774, "learning_rate": 7.947914600540158e-05, "loss": 0.7252, "step": 93730 }, { "epoch": 0.5988781416505884, "grad_norm": 0.5396475791931152, "learning_rate": 7.947509303337009e-05, "loss": 0.7807, "step": 93740 }, { "epoch": 0.5989420288003271, "grad_norm": 0.7746068239212036, "learning_rate": 7.947103976449892e-05, "loss": 0.8018, "step": 93750 }, { "epoch": 0.5990059159500658, "grad_norm": 1.0696126222610474, "learning_rate": 7.94669861988289e-05, "loss": 0.7254, "step": 93760 }, { "epoch": 0.5990698030998045, "grad_norm": 0.8595736622810364, "learning_rate": 7.946293233640082e-05, "loss": 0.765, "step": 93770 }, { "epoch": 0.5991336902495432, "grad_norm": 1.1637159585952759, "learning_rate": 7.945887817725552e-05, "loss": 0.8776, "step": 93780 }, { "epoch": 0.5991975773992819, "grad_norm": 0.9262639284133911, "learning_rate": 7.945482372143385e-05, "loss": 1.2434, "step": 93790 }, { "epoch": 0.5992614645490206, "grad_norm": 0.6639724969863892, "learning_rate": 7.945076896897661e-05, "loss": 0.9795, "step": 93800 }, { "epoch": 0.5993253516987593, "grad_norm": 0.7169008255004883, "learning_rate": 7.944671391992465e-05, "loss": 1.1887, "step": 93810 }, { "epoch": 0.599389238848498, "grad_norm": 0.9885384440422058, "learning_rate": 7.944265857431881e-05, "loss": 0.9445, "step": 93820 }, { "epoch": 0.5994531259982367, "grad_norm": 0.639473021030426, "learning_rate": 7.943860293219993e-05, "loss": 0.7167, "step": 93830 }, { "epoch": 0.5995170131479755, "grad_norm": 0.9396152496337891, "learning_rate": 7.943454699360884e-05, "loss": 0.7037, "step": 93840 }, { "epoch": 0.5995809002977142, "grad_norm": 1.092712163925171, "learning_rate": 7.94304907585864e-05, "loss": 1.2453, "step": 93850 }, { "epoch": 0.5996447874474529, "grad_norm": 1.040013313293457, "learning_rate": 7.942643422717346e-05, "loss": 0.7103, "step": 93860 }, { "epoch": 0.5997086745971916, "grad_norm": 1.5030107498168945, "learning_rate": 7.942237739941086e-05, "loss": 1.5542, "step": 93870 }, { "epoch": 0.5997725617469303, "grad_norm": 1.024461030960083, "learning_rate": 7.941832027533948e-05, "loss": 0.947, "step": 93880 }, { "epoch": 0.5998364488966689, "grad_norm": 1.201267957687378, "learning_rate": 7.941426285500016e-05, "loss": 0.7646, "step": 93890 }, { "epoch": 0.5999003360464076, "grad_norm": 1.2446759939193726, "learning_rate": 7.941020513843376e-05, "loss": 0.8437, "step": 93900 }, { "epoch": 0.5999642231961463, "grad_norm": 0.7842534780502319, "learning_rate": 7.940614712568115e-05, "loss": 0.6985, "step": 93910 }, { "epoch": 0.600028110345885, "grad_norm": 1.0466797351837158, "learning_rate": 7.940208881678322e-05, "loss": 0.9707, "step": 93920 }, { "epoch": 0.6000919974956237, "grad_norm": 0.7793298363685608, "learning_rate": 7.939803021178078e-05, "loss": 0.9014, "step": 93930 }, { "epoch": 0.6001558846453624, "grad_norm": 0.6594678163528442, "learning_rate": 7.939397131071478e-05, "loss": 0.8811, "step": 93940 }, { "epoch": 0.6002197717951011, "grad_norm": 0.7732535004615784, "learning_rate": 7.938991211362602e-05, "loss": 0.9198, "step": 93950 }, { "epoch": 0.6002836589448398, "grad_norm": 0.6953932642936707, "learning_rate": 7.938585262055546e-05, "loss": 0.8225, "step": 93960 }, { "epoch": 0.6003475460945785, "grad_norm": 0.4991307556629181, "learning_rate": 7.938179283154392e-05, "loss": 0.7595, "step": 93970 }, { "epoch": 0.6004114332443172, "grad_norm": 1.582554817199707, "learning_rate": 7.937773274663231e-05, "loss": 0.9514, "step": 93980 }, { "epoch": 0.6004753203940559, "grad_norm": 0.8112611770629883, "learning_rate": 7.937367236586153e-05, "loss": 0.9182, "step": 93990 }, { "epoch": 0.6005392075437946, "grad_norm": 0.8769091367721558, "learning_rate": 7.936961168927244e-05, "loss": 1.1743, "step": 94000 }, { "epoch": 0.6006030946935333, "grad_norm": 0.733625054359436, "learning_rate": 7.936555071690597e-05, "loss": 0.9444, "step": 94010 }, { "epoch": 0.600666981843272, "grad_norm": 0.9377986192703247, "learning_rate": 7.936148944880297e-05, "loss": 0.7817, "step": 94020 }, { "epoch": 0.6007308689930108, "grad_norm": 0.7029353380203247, "learning_rate": 7.935742788500438e-05, "loss": 0.8874, "step": 94030 }, { "epoch": 0.6007947561427495, "grad_norm": 1.3966723680496216, "learning_rate": 7.93533660255511e-05, "loss": 1.0347, "step": 94040 }, { "epoch": 0.6008586432924882, "grad_norm": 0.5431897044181824, "learning_rate": 7.934930387048405e-05, "loss": 0.8037, "step": 94050 }, { "epoch": 0.6009225304422269, "grad_norm": 0.6450621485710144, "learning_rate": 7.93452414198441e-05, "loss": 0.8367, "step": 94060 }, { "epoch": 0.6009864175919656, "grad_norm": 0.6869795322418213, "learning_rate": 7.93411786736722e-05, "loss": 1.1556, "step": 94070 }, { "epoch": 0.6010503047417043, "grad_norm": 1.2084323167800903, "learning_rate": 7.93371156320092e-05, "loss": 0.7216, "step": 94080 }, { "epoch": 0.601114191891443, "grad_norm": 0.8116541504859924, "learning_rate": 7.93330522948961e-05, "loss": 0.9562, "step": 94090 }, { "epoch": 0.6011780790411817, "grad_norm": 1.043238878250122, "learning_rate": 7.932898866237378e-05, "loss": 0.7446, "step": 94100 }, { "epoch": 0.6012419661909204, "grad_norm": 1.0671076774597168, "learning_rate": 7.932492473448318e-05, "loss": 0.952, "step": 94110 }, { "epoch": 0.6013058533406591, "grad_norm": 1.1957184076309204, "learning_rate": 7.932086051126521e-05, "loss": 0.8455, "step": 94120 }, { "epoch": 0.6013697404903977, "grad_norm": 0.6923540830612183, "learning_rate": 7.931679599276081e-05, "loss": 0.8397, "step": 94130 }, { "epoch": 0.6014336276401364, "grad_norm": 0.699149489402771, "learning_rate": 7.931273117901091e-05, "loss": 0.8874, "step": 94140 }, { "epoch": 0.6014975147898751, "grad_norm": 2.1960628032684326, "learning_rate": 7.930866607005643e-05, "loss": 0.811, "step": 94150 }, { "epoch": 0.6015614019396138, "grad_norm": 0.6790569424629211, "learning_rate": 7.930460066593836e-05, "loss": 1.0083, "step": 94160 }, { "epoch": 0.6016252890893525, "grad_norm": 0.6594065427780151, "learning_rate": 7.930053496669758e-05, "loss": 0.9659, "step": 94170 }, { "epoch": 0.6016891762390912, "grad_norm": 0.7281797528266907, "learning_rate": 7.929646897237509e-05, "loss": 0.8501, "step": 94180 }, { "epoch": 0.60175306338883, "grad_norm": 1.0084353685379028, "learning_rate": 7.929240268301179e-05, "loss": 1.0269, "step": 94190 }, { "epoch": 0.6018169505385687, "grad_norm": 0.6016590595245361, "learning_rate": 7.928833609864867e-05, "loss": 1.0357, "step": 94200 }, { "epoch": 0.6018808376883074, "grad_norm": 0.7156760692596436, "learning_rate": 7.928426921932665e-05, "loss": 0.7571, "step": 94210 }, { "epoch": 0.6019447248380461, "grad_norm": 0.6921888589859009, "learning_rate": 7.928020204508673e-05, "loss": 0.7635, "step": 94220 }, { "epoch": 0.6020086119877848, "grad_norm": 0.9318345189094543, "learning_rate": 7.927613457596983e-05, "loss": 0.8359, "step": 94230 }, { "epoch": 0.6020724991375235, "grad_norm": 1.3739084005355835, "learning_rate": 7.927206681201693e-05, "loss": 1.111, "step": 94240 }, { "epoch": 0.6021363862872622, "grad_norm": 1.0711864233016968, "learning_rate": 7.926799875326898e-05, "loss": 0.941, "step": 94250 }, { "epoch": 0.6022002734370009, "grad_norm": 0.7577251195907593, "learning_rate": 7.926393039976698e-05, "loss": 0.767, "step": 94260 }, { "epoch": 0.6022641605867396, "grad_norm": 1.2173386812210083, "learning_rate": 7.925986175155188e-05, "loss": 0.9573, "step": 94270 }, { "epoch": 0.6023280477364783, "grad_norm": 0.8785862922668457, "learning_rate": 7.925579280866465e-05, "loss": 0.9249, "step": 94280 }, { "epoch": 0.602391934886217, "grad_norm": 0.6809684038162231, "learning_rate": 7.925172357114628e-05, "loss": 0.7592, "step": 94290 }, { "epoch": 0.6024558220359557, "grad_norm": 0.7136757373809814, "learning_rate": 7.924765403903775e-05, "loss": 0.8912, "step": 94300 }, { "epoch": 0.6025197091856944, "grad_norm": 1.4909855127334595, "learning_rate": 7.924358421238005e-05, "loss": 0.7308, "step": 94310 }, { "epoch": 0.6025835963354331, "grad_norm": 0.7292258739471436, "learning_rate": 7.923951409121416e-05, "loss": 0.8375, "step": 94320 }, { "epoch": 0.6026474834851718, "grad_norm": 0.7449434995651245, "learning_rate": 7.923544367558104e-05, "loss": 0.7606, "step": 94330 }, { "epoch": 0.6027113706349105, "grad_norm": 1.1462843418121338, "learning_rate": 7.923137296552174e-05, "loss": 0.9557, "step": 94340 }, { "epoch": 0.6027752577846492, "grad_norm": 1.5585012435913086, "learning_rate": 7.92273019610772e-05, "loss": 0.9248, "step": 94350 }, { "epoch": 0.602839144934388, "grad_norm": 0.7382988929748535, "learning_rate": 7.922323066228845e-05, "loss": 0.7405, "step": 94360 }, { "epoch": 0.6029030320841265, "grad_norm": 0.7861965894699097, "learning_rate": 7.92191590691965e-05, "loss": 0.8206, "step": 94370 }, { "epoch": 0.6029669192338653, "grad_norm": 0.8503504395484924, "learning_rate": 7.921508718184233e-05, "loss": 0.7665, "step": 94380 }, { "epoch": 0.603030806383604, "grad_norm": 1.1574392318725586, "learning_rate": 7.921101500026695e-05, "loss": 0.8627, "step": 94390 }, { "epoch": 0.6030946935333427, "grad_norm": 1.1855578422546387, "learning_rate": 7.92069425245114e-05, "loss": 0.8954, "step": 94400 }, { "epoch": 0.6031585806830814, "grad_norm": 0.5956348776817322, "learning_rate": 7.920286975461665e-05, "loss": 1.0047, "step": 94410 }, { "epoch": 0.6032224678328201, "grad_norm": 1.5525617599487305, "learning_rate": 7.919879669062376e-05, "loss": 0.9846, "step": 94420 }, { "epoch": 0.6032863549825588, "grad_norm": 0.8425145745277405, "learning_rate": 7.919472333257369e-05, "loss": 0.7559, "step": 94430 }, { "epoch": 0.6033502421322975, "grad_norm": 3.6817235946655273, "learning_rate": 7.919064968050753e-05, "loss": 0.8537, "step": 94440 }, { "epoch": 0.6034141292820362, "grad_norm": 1.1978166103363037, "learning_rate": 7.918657573446626e-05, "loss": 0.7715, "step": 94450 }, { "epoch": 0.6034780164317749, "grad_norm": 1.2537955045700073, "learning_rate": 7.918250149449093e-05, "loss": 0.9165, "step": 94460 }, { "epoch": 0.6035419035815136, "grad_norm": 0.7937589883804321, "learning_rate": 7.917842696062257e-05, "loss": 1.1595, "step": 94470 }, { "epoch": 0.6036057907312523, "grad_norm": 0.7704179883003235, "learning_rate": 7.917435213290218e-05, "loss": 0.7351, "step": 94480 }, { "epoch": 0.603669677880991, "grad_norm": 0.5766093730926514, "learning_rate": 7.917027701137085e-05, "loss": 0.763, "step": 94490 }, { "epoch": 0.6037335650307297, "grad_norm": 1.0193499326705933, "learning_rate": 7.916620159606958e-05, "loss": 0.85, "step": 94500 }, { "epoch": 0.6037974521804684, "grad_norm": 0.7235758304595947, "learning_rate": 7.916212588703944e-05, "loss": 0.9735, "step": 94510 }, { "epoch": 0.6038613393302071, "grad_norm": 1.022512435913086, "learning_rate": 7.915804988432146e-05, "loss": 0.9828, "step": 94520 }, { "epoch": 0.6039252264799458, "grad_norm": 1.0742284059524536, "learning_rate": 7.915397358795669e-05, "loss": 0.8061, "step": 94530 }, { "epoch": 0.6039891136296845, "grad_norm": 0.8321564793586731, "learning_rate": 7.914989699798618e-05, "loss": 0.7689, "step": 94540 }, { "epoch": 0.6040530007794233, "grad_norm": 1.0344573259353638, "learning_rate": 7.9145820114451e-05, "loss": 0.6514, "step": 94550 }, { "epoch": 0.604116887929162, "grad_norm": 0.8881844878196716, "learning_rate": 7.914174293739221e-05, "loss": 0.8515, "step": 94560 }, { "epoch": 0.6041807750789007, "grad_norm": 0.4412252604961395, "learning_rate": 7.913766546685083e-05, "loss": 0.824, "step": 94570 }, { "epoch": 0.6042446622286394, "grad_norm": 0.8809495568275452, "learning_rate": 7.913358770286796e-05, "loss": 0.7449, "step": 94580 }, { "epoch": 0.6043085493783781, "grad_norm": 0.5300993919372559, "learning_rate": 7.912950964548466e-05, "loss": 0.7912, "step": 94590 }, { "epoch": 0.6043724365281168, "grad_norm": 1.1538459062576294, "learning_rate": 7.9125431294742e-05, "loss": 0.9233, "step": 94600 }, { "epoch": 0.6044363236778555, "grad_norm": 1.1883444786071777, "learning_rate": 7.912135265068104e-05, "loss": 0.9529, "step": 94610 }, { "epoch": 0.6045002108275941, "grad_norm": 0.4078890085220337, "learning_rate": 7.911727371334285e-05, "loss": 0.8081, "step": 94620 }, { "epoch": 0.6045640979773328, "grad_norm": 0.7821246981620789, "learning_rate": 7.911319448276855e-05, "loss": 0.71, "step": 94630 }, { "epoch": 0.6046279851270715, "grad_norm": 1.401476502418518, "learning_rate": 7.910911495899919e-05, "loss": 0.7582, "step": 94640 }, { "epoch": 0.6046918722768102, "grad_norm": 0.75636887550354, "learning_rate": 7.910503514207585e-05, "loss": 0.8437, "step": 94650 }, { "epoch": 0.6047557594265489, "grad_norm": 0.6574771404266357, "learning_rate": 7.910095503203964e-05, "loss": 0.701, "step": 94660 }, { "epoch": 0.6048196465762876, "grad_norm": 0.7583115100860596, "learning_rate": 7.909687462893163e-05, "loss": 0.6434, "step": 94670 }, { "epoch": 0.6048835337260263, "grad_norm": 0.9831967353820801, "learning_rate": 7.909279393279292e-05, "loss": 0.8715, "step": 94680 }, { "epoch": 0.604947420875765, "grad_norm": 0.7744137048721313, "learning_rate": 7.908871294366461e-05, "loss": 0.9322, "step": 94690 }, { "epoch": 0.6050113080255037, "grad_norm": 0.7049340605735779, "learning_rate": 7.90846316615878e-05, "loss": 0.7142, "step": 94700 }, { "epoch": 0.6050751951752424, "grad_norm": 1.377447247505188, "learning_rate": 7.908055008660358e-05, "loss": 0.7331, "step": 94710 }, { "epoch": 0.6051390823249811, "grad_norm": 0.7816984057426453, "learning_rate": 7.907646821875305e-05, "loss": 0.8407, "step": 94720 }, { "epoch": 0.6052029694747199, "grad_norm": 0.7210595011711121, "learning_rate": 7.907238605807734e-05, "loss": 0.8329, "step": 94730 }, { "epoch": 0.6052668566244586, "grad_norm": 0.8540478944778442, "learning_rate": 7.906830360461757e-05, "loss": 1.0039, "step": 94740 }, { "epoch": 0.6053307437741973, "grad_norm": 1.395521879196167, "learning_rate": 7.906422085841481e-05, "loss": 0.8576, "step": 94750 }, { "epoch": 0.605394630923936, "grad_norm": 1.551674246788025, "learning_rate": 7.906013781951022e-05, "loss": 0.8292, "step": 94760 }, { "epoch": 0.6054585180736747, "grad_norm": 0.5742878317832947, "learning_rate": 7.905605448794489e-05, "loss": 0.7943, "step": 94770 }, { "epoch": 0.6055224052234134, "grad_norm": 1.2160093784332275, "learning_rate": 7.905197086375995e-05, "loss": 0.7231, "step": 94780 }, { "epoch": 0.6055862923731521, "grad_norm": 0.824417769908905, "learning_rate": 7.904788694699654e-05, "loss": 0.7877, "step": 94790 }, { "epoch": 0.6056501795228908, "grad_norm": 0.9510350227355957, "learning_rate": 7.904380273769578e-05, "loss": 0.9848, "step": 94800 }, { "epoch": 0.6057140666726295, "grad_norm": 1.160369634628296, "learning_rate": 7.90397182358988e-05, "loss": 0.7315, "step": 94810 }, { "epoch": 0.6057779538223682, "grad_norm": 0.949501633644104, "learning_rate": 7.903563344164673e-05, "loss": 0.9625, "step": 94820 }, { "epoch": 0.6058418409721069, "grad_norm": 1.4712127447128296, "learning_rate": 7.90315483549807e-05, "loss": 0.8038, "step": 94830 }, { "epoch": 0.6059057281218456, "grad_norm": 1.125845193862915, "learning_rate": 7.902746297594187e-05, "loss": 0.9969, "step": 94840 }, { "epoch": 0.6059696152715843, "grad_norm": 1.1382551193237305, "learning_rate": 7.90233773045714e-05, "loss": 1.1508, "step": 94850 }, { "epoch": 0.6060335024213229, "grad_norm": 0.8378157615661621, "learning_rate": 7.901929134091038e-05, "loss": 0.8542, "step": 94860 }, { "epoch": 0.6060973895710616, "grad_norm": 1.001591682434082, "learning_rate": 7.9015205085e-05, "loss": 0.81, "step": 94870 }, { "epoch": 0.6061612767208003, "grad_norm": 0.8801831603050232, "learning_rate": 7.901111853688141e-05, "loss": 1.3241, "step": 94880 }, { "epoch": 0.606225163870539, "grad_norm": 0.739589273929596, "learning_rate": 7.900703169659574e-05, "loss": 0.7344, "step": 94890 }, { "epoch": 0.6062890510202777, "grad_norm": 0.6854998469352722, "learning_rate": 7.900294456418418e-05, "loss": 0.7173, "step": 94900 }, { "epoch": 0.6063529381700165, "grad_norm": 0.9052863121032715, "learning_rate": 7.899885713968789e-05, "loss": 0.8141, "step": 94910 }, { "epoch": 0.6064168253197552, "grad_norm": 0.6532935500144958, "learning_rate": 7.8994769423148e-05, "loss": 0.8135, "step": 94920 }, { "epoch": 0.6064807124694939, "grad_norm": 1.091203212738037, "learning_rate": 7.89906814146057e-05, "loss": 0.9978, "step": 94930 }, { "epoch": 0.6065445996192326, "grad_norm": 0.9277145266532898, "learning_rate": 7.898659311410218e-05, "loss": 0.8818, "step": 94940 }, { "epoch": 0.6066084867689713, "grad_norm": 1.1531500816345215, "learning_rate": 7.898250452167856e-05, "loss": 0.7621, "step": 94950 }, { "epoch": 0.60667237391871, "grad_norm": 0.7892251014709473, "learning_rate": 7.897841563737605e-05, "loss": 0.9858, "step": 94960 }, { "epoch": 0.6067362610684487, "grad_norm": 0.8957139849662781, "learning_rate": 7.897432646123583e-05, "loss": 0.8984, "step": 94970 }, { "epoch": 0.6068001482181874, "grad_norm": 1.0226024389266968, "learning_rate": 7.89702369932991e-05, "loss": 0.902, "step": 94980 }, { "epoch": 0.6068640353679261, "grad_norm": 1.1574631929397583, "learning_rate": 7.8966147233607e-05, "loss": 0.9923, "step": 94990 }, { "epoch": 0.6069279225176648, "grad_norm": 0.5747475624084473, "learning_rate": 7.896205718220073e-05, "loss": 0.8992, "step": 95000 }, { "epoch": 0.6069918096674035, "grad_norm": 0.7874606847763062, "learning_rate": 7.895796683912148e-05, "loss": 0.9888, "step": 95010 }, { "epoch": 0.6070556968171422, "grad_norm": 2.17077898979187, "learning_rate": 7.895387620441049e-05, "loss": 0.8266, "step": 95020 }, { "epoch": 0.6071195839668809, "grad_norm": 1.0135728120803833, "learning_rate": 7.894978527810889e-05, "loss": 0.7683, "step": 95030 }, { "epoch": 0.6071834711166196, "grad_norm": 1.4031306505203247, "learning_rate": 7.894569406025791e-05, "loss": 0.8458, "step": 95040 }, { "epoch": 0.6072473582663583, "grad_norm": 2.591813325881958, "learning_rate": 7.894160255089876e-05, "loss": 0.8058, "step": 95050 }, { "epoch": 0.607311245416097, "grad_norm": 0.827282726764679, "learning_rate": 7.893751075007263e-05, "loss": 1.1121, "step": 95060 }, { "epoch": 0.6073751325658358, "grad_norm": 0.9045417904853821, "learning_rate": 7.893341865782073e-05, "loss": 0.9837, "step": 95070 }, { "epoch": 0.6074390197155745, "grad_norm": 1.0362218618392944, "learning_rate": 7.892932627418428e-05, "loss": 0.9518, "step": 95080 }, { "epoch": 0.6075029068653132, "grad_norm": 0.9262713193893433, "learning_rate": 7.892523359920447e-05, "loss": 0.7016, "step": 95090 }, { "epoch": 0.6075667940150518, "grad_norm": 0.7739233374595642, "learning_rate": 7.892114063292256e-05, "loss": 0.726, "step": 95100 }, { "epoch": 0.6076306811647905, "grad_norm": 0.9387316703796387, "learning_rate": 7.891704737537972e-05, "loss": 0.9781, "step": 95110 }, { "epoch": 0.6076945683145292, "grad_norm": 0.8391212821006775, "learning_rate": 7.89129538266172e-05, "loss": 0.7901, "step": 95120 }, { "epoch": 0.6077584554642679, "grad_norm": 1.2274523973464966, "learning_rate": 7.890885998667623e-05, "loss": 0.7709, "step": 95130 }, { "epoch": 0.6078223426140066, "grad_norm": 2.197125196456909, "learning_rate": 7.890476585559802e-05, "loss": 0.8797, "step": 95140 }, { "epoch": 0.6078862297637453, "grad_norm": 0.8918281197547913, "learning_rate": 7.890067143342381e-05, "loss": 0.9798, "step": 95150 }, { "epoch": 0.607950116913484, "grad_norm": 0.9237262010574341, "learning_rate": 7.889657672019483e-05, "loss": 0.9019, "step": 95160 }, { "epoch": 0.6080140040632227, "grad_norm": 2.1953394412994385, "learning_rate": 7.889248171595235e-05, "loss": 1.0639, "step": 95170 }, { "epoch": 0.6080778912129614, "grad_norm": 1.0482672452926636, "learning_rate": 7.888838642073757e-05, "loss": 1.0944, "step": 95180 }, { "epoch": 0.6081417783627001, "grad_norm": 0.8295831084251404, "learning_rate": 7.888429083459175e-05, "loss": 0.7337, "step": 95190 }, { "epoch": 0.6082056655124388, "grad_norm": 0.771742045879364, "learning_rate": 7.888019495755612e-05, "loss": 0.8807, "step": 95200 }, { "epoch": 0.6082695526621775, "grad_norm": 0.9289408922195435, "learning_rate": 7.887609878967195e-05, "loss": 0.8625, "step": 95210 }, { "epoch": 0.6083334398119162, "grad_norm": 0.9988054633140564, "learning_rate": 7.887200233098049e-05, "loss": 0.992, "step": 95220 }, { "epoch": 0.6083973269616549, "grad_norm": 1.2625335454940796, "learning_rate": 7.8867905581523e-05, "loss": 0.798, "step": 95230 }, { "epoch": 0.6084612141113936, "grad_norm": 0.8305104374885559, "learning_rate": 7.886421825844037e-05, "loss": 0.9033, "step": 95240 }, { "epoch": 0.6085251012611324, "grad_norm": 0.540634274482727, "learning_rate": 7.886012095664107e-05, "loss": 1.109, "step": 95250 }, { "epoch": 0.6085889884108711, "grad_norm": 0.7756277918815613, "learning_rate": 7.885602336419534e-05, "loss": 1.0592, "step": 95260 }, { "epoch": 0.6086528755606098, "grad_norm": 0.8292693495750427, "learning_rate": 7.885192548114453e-05, "loss": 0.9055, "step": 95270 }, { "epoch": 0.6087167627103485, "grad_norm": 1.1361613273620605, "learning_rate": 7.884782730752984e-05, "loss": 0.9744, "step": 95280 }, { "epoch": 0.6087806498600872, "grad_norm": 0.6474214792251587, "learning_rate": 7.88437288433926e-05, "loss": 0.8828, "step": 95290 }, { "epoch": 0.6088445370098259, "grad_norm": 0.8289141058921814, "learning_rate": 7.883963008877404e-05, "loss": 0.8699, "step": 95300 }, { "epoch": 0.6089084241595646, "grad_norm": 1.024628758430481, "learning_rate": 7.883553104371547e-05, "loss": 0.8444, "step": 95310 }, { "epoch": 0.6089723113093033, "grad_norm": 0.7327681183815002, "learning_rate": 7.883143170825815e-05, "loss": 0.883, "step": 95320 }, { "epoch": 0.609036198459042, "grad_norm": 1.0134698152542114, "learning_rate": 7.882733208244337e-05, "loss": 0.8742, "step": 95330 }, { "epoch": 0.6091000856087806, "grad_norm": 1.6711806058883667, "learning_rate": 7.882323216631241e-05, "loss": 1.0554, "step": 95340 }, { "epoch": 0.6091639727585193, "grad_norm": 0.8501431345939636, "learning_rate": 7.881913195990658e-05, "loss": 0.9567, "step": 95350 }, { "epoch": 0.609227859908258, "grad_norm": 1.3267399072647095, "learning_rate": 7.881503146326714e-05, "loss": 0.763, "step": 95360 }, { "epoch": 0.6092917470579967, "grad_norm": 1.0088346004486084, "learning_rate": 7.881093067643541e-05, "loss": 1.0992, "step": 95370 }, { "epoch": 0.6093556342077354, "grad_norm": 0.7467685341835022, "learning_rate": 7.88068295994527e-05, "loss": 0.9101, "step": 95380 }, { "epoch": 0.6094195213574741, "grad_norm": 1.0090692043304443, "learning_rate": 7.880272823236027e-05, "loss": 0.9231, "step": 95390 }, { "epoch": 0.6094834085072128, "grad_norm": 0.705679178237915, "learning_rate": 7.879862657519948e-05, "loss": 0.7734, "step": 95400 }, { "epoch": 0.6095472956569515, "grad_norm": 1.0415382385253906, "learning_rate": 7.879452462801158e-05, "loss": 0.767, "step": 95410 }, { "epoch": 0.6096111828066902, "grad_norm": 1.0252981185913086, "learning_rate": 7.879042239083792e-05, "loss": 0.775, "step": 95420 }, { "epoch": 0.609675069956429, "grad_norm": 1.6463427543640137, "learning_rate": 7.878631986371978e-05, "loss": 0.856, "step": 95430 }, { "epoch": 0.6097389571061677, "grad_norm": 0.9976633191108704, "learning_rate": 7.878221704669852e-05, "loss": 0.8753, "step": 95440 }, { "epoch": 0.6098028442559064, "grad_norm": 0.9319173693656921, "learning_rate": 7.877811393981542e-05, "loss": 0.7858, "step": 95450 }, { "epoch": 0.6098667314056451, "grad_norm": 0.7984362840652466, "learning_rate": 7.877401054311182e-05, "loss": 0.7756, "step": 95460 }, { "epoch": 0.6099306185553838, "grad_norm": 1.3641668558120728, "learning_rate": 7.876990685662903e-05, "loss": 0.8173, "step": 95470 }, { "epoch": 0.6099945057051225, "grad_norm": 1.6693613529205322, "learning_rate": 7.87658028804084e-05, "loss": 0.7144, "step": 95480 }, { "epoch": 0.6100583928548612, "grad_norm": 1.5207191705703735, "learning_rate": 7.876169861449125e-05, "loss": 0.8905, "step": 95490 }, { "epoch": 0.6101222800045999, "grad_norm": 0.7093350887298584, "learning_rate": 7.875759405891891e-05, "loss": 1.0028, "step": 95500 }, { "epoch": 0.6101861671543386, "grad_norm": 0.6331580281257629, "learning_rate": 7.875348921373271e-05, "loss": 0.699, "step": 95510 }, { "epoch": 0.6102500543040773, "grad_norm": 0.6561687588691711, "learning_rate": 7.8749384078974e-05, "loss": 0.826, "step": 95520 }, { "epoch": 0.610313941453816, "grad_norm": 1.0018503665924072, "learning_rate": 7.874527865468414e-05, "loss": 0.8969, "step": 95530 }, { "epoch": 0.6103778286035547, "grad_norm": 0.9253289103507996, "learning_rate": 7.874117294090445e-05, "loss": 0.8585, "step": 95540 }, { "epoch": 0.6104417157532934, "grad_norm": 0.924781858921051, "learning_rate": 7.873706693767626e-05, "loss": 1.0908, "step": 95550 }, { "epoch": 0.6105056029030321, "grad_norm": 0.8251575231552124, "learning_rate": 7.873296064504096e-05, "loss": 0.7825, "step": 95560 }, { "epoch": 0.6105694900527708, "grad_norm": 1.3431663513183594, "learning_rate": 7.87288540630399e-05, "loss": 0.9041, "step": 95570 }, { "epoch": 0.6106333772025095, "grad_norm": 0.9507653117179871, "learning_rate": 7.872474719171441e-05, "loss": 0.8849, "step": 95580 }, { "epoch": 0.6106972643522481, "grad_norm": 0.655210018157959, "learning_rate": 7.872064003110589e-05, "loss": 0.7671, "step": 95590 }, { "epoch": 0.6107611515019868, "grad_norm": 0.8236376047134399, "learning_rate": 7.871653258125564e-05, "loss": 0.7553, "step": 95600 }, { "epoch": 0.6108250386517255, "grad_norm": 1.1168328523635864, "learning_rate": 7.871242484220509e-05, "loss": 0.9552, "step": 95610 }, { "epoch": 0.6108889258014643, "grad_norm": 0.9979621767997742, "learning_rate": 7.870831681399558e-05, "loss": 0.8804, "step": 95620 }, { "epoch": 0.610952812951203, "grad_norm": 0.9995028972625732, "learning_rate": 7.870420849666847e-05, "loss": 0.9085, "step": 95630 }, { "epoch": 0.6110167001009417, "grad_norm": 0.6226816773414612, "learning_rate": 7.870009989026516e-05, "loss": 0.8221, "step": 95640 }, { "epoch": 0.6110805872506804, "grad_norm": 0.6372109055519104, "learning_rate": 7.8695990994827e-05, "loss": 0.7805, "step": 95650 }, { "epoch": 0.6111444744004191, "grad_norm": 0.5445452928543091, "learning_rate": 7.86918818103954e-05, "loss": 0.8984, "step": 95660 }, { "epoch": 0.6112083615501578, "grad_norm": 1.3520268201828003, "learning_rate": 7.868777233701174e-05, "loss": 0.6649, "step": 95670 }, { "epoch": 0.6112722486998965, "grad_norm": 3.7762889862060547, "learning_rate": 7.868366257471737e-05, "loss": 1.2053, "step": 95680 }, { "epoch": 0.6113361358496352, "grad_norm": 0.8047236204147339, "learning_rate": 7.867955252355371e-05, "loss": 0.8838, "step": 95690 }, { "epoch": 0.6114000229993739, "grad_norm": 0.9412931799888611, "learning_rate": 7.867544218356215e-05, "loss": 1.0598, "step": 95700 }, { "epoch": 0.6114639101491126, "grad_norm": 0.8420425057411194, "learning_rate": 7.867133155478408e-05, "loss": 1.0705, "step": 95710 }, { "epoch": 0.6115277972988513, "grad_norm": 3.140885353088379, "learning_rate": 7.866722063726089e-05, "loss": 1.1464, "step": 95720 }, { "epoch": 0.61159168444859, "grad_norm": 1.0284521579742432, "learning_rate": 7.866310943103399e-05, "loss": 1.22, "step": 95730 }, { "epoch": 0.6116555715983287, "grad_norm": 0.5975868105888367, "learning_rate": 7.865899793614478e-05, "loss": 0.67, "step": 95740 }, { "epoch": 0.6117194587480674, "grad_norm": 0.9867643117904663, "learning_rate": 7.865488615263467e-05, "loss": 0.8758, "step": 95750 }, { "epoch": 0.6117833458978061, "grad_norm": 1.000255823135376, "learning_rate": 7.865077408054507e-05, "loss": 0.8155, "step": 95760 }, { "epoch": 0.6118472330475448, "grad_norm": 0.6636435389518738, "learning_rate": 7.864666171991736e-05, "loss": 0.8283, "step": 95770 }, { "epoch": 0.6119111201972836, "grad_norm": 0.43315425515174866, "learning_rate": 7.864254907079302e-05, "loss": 0.7155, "step": 95780 }, { "epoch": 0.6119750073470223, "grad_norm": 0.7825480103492737, "learning_rate": 7.863843613321342e-05, "loss": 0.7891, "step": 95790 }, { "epoch": 0.612038894496761, "grad_norm": 1.0271575450897217, "learning_rate": 7.863432290722e-05, "loss": 1.0863, "step": 95800 }, { "epoch": 0.6121027816464997, "grad_norm": 0.8053982853889465, "learning_rate": 7.863020939285415e-05, "loss": 1.0562, "step": 95810 }, { "epoch": 0.6121666687962384, "grad_norm": 0.6690106987953186, "learning_rate": 7.862609559015735e-05, "loss": 0.9627, "step": 95820 }, { "epoch": 0.612230555945977, "grad_norm": 0.6722576022148132, "learning_rate": 7.862198149917099e-05, "loss": 0.9642, "step": 95830 }, { "epoch": 0.6122944430957157, "grad_norm": 1.5476411581039429, "learning_rate": 7.86178671199365e-05, "loss": 0.9018, "step": 95840 }, { "epoch": 0.6123583302454544, "grad_norm": 0.9946299195289612, "learning_rate": 7.861375245249536e-05, "loss": 0.8733, "step": 95850 }, { "epoch": 0.6124222173951931, "grad_norm": 0.9345956444740295, "learning_rate": 7.860963749688897e-05, "loss": 0.6437, "step": 95860 }, { "epoch": 0.6124861045449318, "grad_norm": 0.6753872036933899, "learning_rate": 7.860552225315877e-05, "loss": 0.8364, "step": 95870 }, { "epoch": 0.6125499916946705, "grad_norm": 0.6145283579826355, "learning_rate": 7.860140672134622e-05, "loss": 0.7866, "step": 95880 }, { "epoch": 0.6126138788444092, "grad_norm": 1.010877251625061, "learning_rate": 7.859729090149275e-05, "loss": 0.9073, "step": 95890 }, { "epoch": 0.6126777659941479, "grad_norm": 0.9995620250701904, "learning_rate": 7.859317479363983e-05, "loss": 0.8959, "step": 95900 }, { "epoch": 0.6127416531438866, "grad_norm": 0.7354846596717834, "learning_rate": 7.85890583978289e-05, "loss": 1.1287, "step": 95910 }, { "epoch": 0.6128055402936253, "grad_norm": 1.1408978700637817, "learning_rate": 7.858494171410144e-05, "loss": 0.9378, "step": 95920 }, { "epoch": 0.612869427443364, "grad_norm": 0.7992193698883057, "learning_rate": 7.858082474249886e-05, "loss": 0.7438, "step": 95930 }, { "epoch": 0.6129333145931027, "grad_norm": 0.8835758566856384, "learning_rate": 7.857670748306267e-05, "loss": 0.8875, "step": 95940 }, { "epoch": 0.6129972017428414, "grad_norm": 1.195634365081787, "learning_rate": 7.857258993583429e-05, "loss": 0.9826, "step": 95950 }, { "epoch": 0.6130610888925802, "grad_norm": 0.5840584635734558, "learning_rate": 7.856847210085523e-05, "loss": 0.7557, "step": 95960 }, { "epoch": 0.6131249760423189, "grad_norm": 0.8779674768447876, "learning_rate": 7.856435397816693e-05, "loss": 1.0267, "step": 95970 }, { "epoch": 0.6131888631920576, "grad_norm": 0.7890565991401672, "learning_rate": 7.856023556781087e-05, "loss": 0.9257, "step": 95980 }, { "epoch": 0.6132527503417963, "grad_norm": 0.8011789321899414, "learning_rate": 7.855611686982854e-05, "loss": 0.7494, "step": 95990 }, { "epoch": 0.613316637491535, "grad_norm": 0.802932620048523, "learning_rate": 7.85519978842614e-05, "loss": 0.6817, "step": 96000 }, { "epoch": 0.6133805246412737, "grad_norm": 0.604083240032196, "learning_rate": 7.854787861115093e-05, "loss": 0.7141, "step": 96010 }, { "epoch": 0.6134444117910124, "grad_norm": 0.9903905391693115, "learning_rate": 7.854375905053866e-05, "loss": 0.8143, "step": 96020 }, { "epoch": 0.6135082989407511, "grad_norm": 1.5344460010528564, "learning_rate": 7.853963920246601e-05, "loss": 0.6426, "step": 96030 }, { "epoch": 0.6135721860904898, "grad_norm": 0.6864035129547119, "learning_rate": 7.853551906697452e-05, "loss": 0.7646, "step": 96040 }, { "epoch": 0.6136360732402285, "grad_norm": 0.9132956862449646, "learning_rate": 7.853139864410565e-05, "loss": 1.0619, "step": 96050 }, { "epoch": 0.6136999603899672, "grad_norm": 1.0728709697723389, "learning_rate": 7.852727793390094e-05, "loss": 0.962, "step": 96060 }, { "epoch": 0.6137638475397058, "grad_norm": 0.7748542428016663, "learning_rate": 7.852315693640184e-05, "loss": 0.7858, "step": 96070 }, { "epoch": 0.6138277346894445, "grad_norm": 0.7569340467453003, "learning_rate": 7.851903565164987e-05, "loss": 1.1526, "step": 96080 }, { "epoch": 0.6138916218391832, "grad_norm": 1.0770772695541382, "learning_rate": 7.851491407968655e-05, "loss": 0.8305, "step": 96090 }, { "epoch": 0.6139555089889219, "grad_norm": 0.9833508729934692, "learning_rate": 7.851079222055338e-05, "loss": 0.8214, "step": 96100 }, { "epoch": 0.6140193961386606, "grad_norm": 0.6459149718284607, "learning_rate": 7.850667007429187e-05, "loss": 0.9574, "step": 96110 }, { "epoch": 0.6140832832883993, "grad_norm": 0.6652225255966187, "learning_rate": 7.850254764094351e-05, "loss": 0.7369, "step": 96120 }, { "epoch": 0.614147170438138, "grad_norm": 1.0020723342895508, "learning_rate": 7.849842492054986e-05, "loss": 0.8702, "step": 96130 }, { "epoch": 0.6142110575878768, "grad_norm": 0.847522497177124, "learning_rate": 7.84943019131524e-05, "loss": 0.7167, "step": 96140 }, { "epoch": 0.6142749447376155, "grad_norm": 0.7430154085159302, "learning_rate": 7.849017861879266e-05, "loss": 0.7177, "step": 96150 }, { "epoch": 0.6143388318873542, "grad_norm": 0.7399063110351562, "learning_rate": 7.84860550375122e-05, "loss": 1.1364, "step": 96160 }, { "epoch": 0.6144027190370929, "grad_norm": 1.0138983726501465, "learning_rate": 7.84819311693525e-05, "loss": 0.9364, "step": 96170 }, { "epoch": 0.6144666061868316, "grad_norm": 0.8306713104248047, "learning_rate": 7.847780701435514e-05, "loss": 0.9233, "step": 96180 }, { "epoch": 0.6145304933365703, "grad_norm": 0.8946832418441772, "learning_rate": 7.84736825725616e-05, "loss": 1.2262, "step": 96190 }, { "epoch": 0.614594380486309, "grad_norm": 0.6287519931793213, "learning_rate": 7.846955784401345e-05, "loss": 0.7374, "step": 96200 }, { "epoch": 0.6146582676360477, "grad_norm": 0.8874316215515137, "learning_rate": 7.846543282875222e-05, "loss": 0.8743, "step": 96210 }, { "epoch": 0.6147221547857864, "grad_norm": 0.7302953004837036, "learning_rate": 7.846130752681946e-05, "loss": 0.7668, "step": 96220 }, { "epoch": 0.6147860419355251, "grad_norm": 0.9165903925895691, "learning_rate": 7.845718193825671e-05, "loss": 0.8457, "step": 96230 }, { "epoch": 0.6148499290852638, "grad_norm": 0.7827330827713013, "learning_rate": 7.845305606310552e-05, "loss": 0.9301, "step": 96240 }, { "epoch": 0.6149138162350025, "grad_norm": 1.272709846496582, "learning_rate": 7.844892990140744e-05, "loss": 0.6134, "step": 96250 }, { "epoch": 0.6149777033847412, "grad_norm": 0.8357341885566711, "learning_rate": 7.844480345320402e-05, "loss": 1.3462, "step": 96260 }, { "epoch": 0.6150415905344799, "grad_norm": 1.107619047164917, "learning_rate": 7.844067671853683e-05, "loss": 1.1149, "step": 96270 }, { "epoch": 0.6151054776842186, "grad_norm": 0.9972173571586609, "learning_rate": 7.843654969744741e-05, "loss": 1.0077, "step": 96280 }, { "epoch": 0.6151693648339573, "grad_norm": 0.5411679148674011, "learning_rate": 7.843242238997735e-05, "loss": 0.9392, "step": 96290 }, { "epoch": 0.615233251983696, "grad_norm": 0.6733404397964478, "learning_rate": 7.842829479616818e-05, "loss": 1.0999, "step": 96300 }, { "epoch": 0.6152971391334348, "grad_norm": 1.565991997718811, "learning_rate": 7.842416691606149e-05, "loss": 1.13, "step": 96310 }, { "epoch": 0.6153610262831734, "grad_norm": 0.7156278491020203, "learning_rate": 7.842003874969886e-05, "loss": 1.3407, "step": 96320 }, { "epoch": 0.6154249134329121, "grad_norm": 1.1994881629943848, "learning_rate": 7.841591029712185e-05, "loss": 0.9944, "step": 96330 }, { "epoch": 0.6154888005826508, "grad_norm": 0.923414945602417, "learning_rate": 7.841178155837204e-05, "loss": 0.8468, "step": 96340 }, { "epoch": 0.6155526877323895, "grad_norm": 1.0727185010910034, "learning_rate": 7.8407652533491e-05, "loss": 0.8268, "step": 96350 }, { "epoch": 0.6156165748821282, "grad_norm": 1.3032314777374268, "learning_rate": 7.840352322252032e-05, "loss": 0.8223, "step": 96360 }, { "epoch": 0.6156804620318669, "grad_norm": 1.055098056793213, "learning_rate": 7.839939362550161e-05, "loss": 1.0348, "step": 96370 }, { "epoch": 0.6157443491816056, "grad_norm": 1.0928640365600586, "learning_rate": 7.839526374247642e-05, "loss": 0.7047, "step": 96380 }, { "epoch": 0.6158082363313443, "grad_norm": 1.1519923210144043, "learning_rate": 7.839113357348637e-05, "loss": 1.3878, "step": 96390 }, { "epoch": 0.615872123481083, "grad_norm": 1.0324209928512573, "learning_rate": 7.838700311857303e-05, "loss": 1.0287, "step": 96400 }, { "epoch": 0.6159360106308217, "grad_norm": 1.1393169164657593, "learning_rate": 7.838287237777802e-05, "loss": 0.7189, "step": 96410 }, { "epoch": 0.6159998977805604, "grad_norm": 0.7285189628601074, "learning_rate": 7.837874135114294e-05, "loss": 0.8353, "step": 96420 }, { "epoch": 0.6160637849302991, "grad_norm": 0.8310270309448242, "learning_rate": 7.837461003870936e-05, "loss": 0.9424, "step": 96430 }, { "epoch": 0.6161276720800378, "grad_norm": 0.8968883752822876, "learning_rate": 7.837047844051893e-05, "loss": 1.1358, "step": 96440 }, { "epoch": 0.6161915592297765, "grad_norm": 0.7500566244125366, "learning_rate": 7.836634655661323e-05, "loss": 0.9524, "step": 96450 }, { "epoch": 0.6162554463795152, "grad_norm": 0.8886957764625549, "learning_rate": 7.836221438703388e-05, "loss": 0.6771, "step": 96460 }, { "epoch": 0.6163193335292539, "grad_norm": 0.7337315082550049, "learning_rate": 7.835808193182248e-05, "loss": 0.7978, "step": 96470 }, { "epoch": 0.6163832206789926, "grad_norm": 0.7529950141906738, "learning_rate": 7.835394919102068e-05, "loss": 0.8103, "step": 96480 }, { "epoch": 0.6164471078287314, "grad_norm": 1.0321061611175537, "learning_rate": 7.834981616467007e-05, "loss": 1.1549, "step": 96490 }, { "epoch": 0.6165109949784701, "grad_norm": 1.114965558052063, "learning_rate": 7.83456828528123e-05, "loss": 0.8008, "step": 96500 }, { "epoch": 0.6165748821282088, "grad_norm": 0.8190954923629761, "learning_rate": 7.834154925548898e-05, "loss": 1.0127, "step": 96510 }, { "epoch": 0.6166387692779475, "grad_norm": 1.0019639730453491, "learning_rate": 7.833741537274173e-05, "loss": 0.735, "step": 96520 }, { "epoch": 0.6167026564276862, "grad_norm": 0.6965848207473755, "learning_rate": 7.833328120461219e-05, "loss": 1.2864, "step": 96530 }, { "epoch": 0.6167665435774249, "grad_norm": 1.2692817449569702, "learning_rate": 7.8329146751142e-05, "loss": 0.8433, "step": 96540 }, { "epoch": 0.6168304307271636, "grad_norm": 1.0348045825958252, "learning_rate": 7.832501201237279e-05, "loss": 0.7535, "step": 96550 }, { "epoch": 0.6168943178769022, "grad_norm": 0.7855243682861328, "learning_rate": 7.832087698834621e-05, "loss": 0.8068, "step": 96560 }, { "epoch": 0.6169582050266409, "grad_norm": 0.7087273001670837, "learning_rate": 7.83167416791039e-05, "loss": 1.015, "step": 96570 }, { "epoch": 0.6170220921763796, "grad_norm": 0.6482358574867249, "learning_rate": 7.83126060846875e-05, "loss": 0.8353, "step": 96580 }, { "epoch": 0.6170859793261183, "grad_norm": 1.1553382873535156, "learning_rate": 7.830847020513867e-05, "loss": 0.8833, "step": 96590 }, { "epoch": 0.617149866475857, "grad_norm": 1.0533820390701294, "learning_rate": 7.830433404049904e-05, "loss": 0.9203, "step": 96600 }, { "epoch": 0.6172137536255957, "grad_norm": 0.9476677775382996, "learning_rate": 7.830019759081028e-05, "loss": 0.9134, "step": 96610 }, { "epoch": 0.6172776407753344, "grad_norm": 1.1443191766738892, "learning_rate": 7.829606085611408e-05, "loss": 1.0249, "step": 96620 }, { "epoch": 0.6173415279250731, "grad_norm": 1.9042986631393433, "learning_rate": 7.829192383645203e-05, "loss": 0.8699, "step": 96630 }, { "epoch": 0.6174054150748118, "grad_norm": 0.6363811492919922, "learning_rate": 7.828778653186586e-05, "loss": 0.7259, "step": 96640 }, { "epoch": 0.6174693022245505, "grad_norm": 0.6034536957740784, "learning_rate": 7.82836489423972e-05, "loss": 1.0086, "step": 96650 }, { "epoch": 0.6175331893742892, "grad_norm": 0.7996253371238708, "learning_rate": 7.827951106808771e-05, "loss": 0.8618, "step": 96660 }, { "epoch": 0.617597076524028, "grad_norm": 0.9885534644126892, "learning_rate": 7.827537290897908e-05, "loss": 0.8571, "step": 96670 }, { "epoch": 0.6176609636737667, "grad_norm": 0.7702460885047913, "learning_rate": 7.827123446511298e-05, "loss": 0.8013, "step": 96680 }, { "epoch": 0.6177248508235054, "grad_norm": 1.5067464113235474, "learning_rate": 7.82670957365311e-05, "loss": 0.8273, "step": 96690 }, { "epoch": 0.6177887379732441, "grad_norm": 0.8331496119499207, "learning_rate": 7.826295672327512e-05, "loss": 1.0143, "step": 96700 }, { "epoch": 0.6178526251229828, "grad_norm": 1.1344146728515625, "learning_rate": 7.82588174253867e-05, "loss": 1.0309, "step": 96710 }, { "epoch": 0.6179165122727215, "grad_norm": 0.6412261128425598, "learning_rate": 7.825467784290755e-05, "loss": 0.8596, "step": 96720 }, { "epoch": 0.6179803994224602, "grad_norm": 0.5586232542991638, "learning_rate": 7.825053797587936e-05, "loss": 1.0329, "step": 96730 }, { "epoch": 0.6180442865721989, "grad_norm": 0.8391451835632324, "learning_rate": 7.824639782434379e-05, "loss": 1.0402, "step": 96740 }, { "epoch": 0.6181081737219376, "grad_norm": 0.9592933058738708, "learning_rate": 7.824225738834256e-05, "loss": 0.9333, "step": 96750 }, { "epoch": 0.6181720608716763, "grad_norm": 0.5291448831558228, "learning_rate": 7.823811666791738e-05, "loss": 0.6241, "step": 96760 }, { "epoch": 0.618235948021415, "grad_norm": 1.1414803266525269, "learning_rate": 7.823397566310992e-05, "loss": 0.8571, "step": 96770 }, { "epoch": 0.6182998351711537, "grad_norm": 0.7651611566543579, "learning_rate": 7.822983437396192e-05, "loss": 0.7009, "step": 96780 }, { "epoch": 0.6183637223208924, "grad_norm": 1.016514539718628, "learning_rate": 7.822569280051505e-05, "loss": 0.9783, "step": 96790 }, { "epoch": 0.618427609470631, "grad_norm": 0.9900182485580444, "learning_rate": 7.822155094281104e-05, "loss": 0.919, "step": 96800 }, { "epoch": 0.6184914966203697, "grad_norm": 0.969688892364502, "learning_rate": 7.821740880089159e-05, "loss": 1.054, "step": 96810 }, { "epoch": 0.6185553837701084, "grad_norm": 0.9642791748046875, "learning_rate": 7.821326637479842e-05, "loss": 0.7227, "step": 96820 }, { "epoch": 0.6186192709198471, "grad_norm": 0.9115810394287109, "learning_rate": 7.820912366457327e-05, "loss": 0.9451, "step": 96830 }, { "epoch": 0.6186831580695858, "grad_norm": 1.4009279012680054, "learning_rate": 7.820498067025782e-05, "loss": 1.1126, "step": 96840 }, { "epoch": 0.6187470452193246, "grad_norm": 1.6597306728363037, "learning_rate": 7.820083739189381e-05, "loss": 0.8138, "step": 96850 }, { "epoch": 0.6188109323690633, "grad_norm": 2.7487285137176514, "learning_rate": 7.819669382952299e-05, "loss": 0.929, "step": 96860 }, { "epoch": 0.618874819518802, "grad_norm": 0.7340418100357056, "learning_rate": 7.819254998318706e-05, "loss": 0.9411, "step": 96870 }, { "epoch": 0.6189387066685407, "grad_norm": 0.8978639245033264, "learning_rate": 7.818840585292775e-05, "loss": 0.755, "step": 96880 }, { "epoch": 0.6190025938182794, "grad_norm": 0.8307545781135559, "learning_rate": 7.818426143878683e-05, "loss": 0.8567, "step": 96890 }, { "epoch": 0.6190664809680181, "grad_norm": 0.6987618803977966, "learning_rate": 7.818011674080601e-05, "loss": 0.7964, "step": 96900 }, { "epoch": 0.6191303681177568, "grad_norm": 1.2218877077102661, "learning_rate": 7.817597175902702e-05, "loss": 1.2578, "step": 96910 }, { "epoch": 0.6191942552674955, "grad_norm": 1.1471195220947266, "learning_rate": 7.817182649349164e-05, "loss": 0.8193, "step": 96920 }, { "epoch": 0.6192581424172342, "grad_norm": 0.7587412595748901, "learning_rate": 7.816768094424157e-05, "loss": 0.8189, "step": 96930 }, { "epoch": 0.6193220295669729, "grad_norm": 0.5350973010063171, "learning_rate": 7.81635351113186e-05, "loss": 0.6833, "step": 96940 }, { "epoch": 0.6193859167167116, "grad_norm": 0.5886098146438599, "learning_rate": 7.815938899476447e-05, "loss": 0.7651, "step": 96950 }, { "epoch": 0.6194498038664503, "grad_norm": 0.8069875240325928, "learning_rate": 7.815524259462093e-05, "loss": 0.8622, "step": 96960 }, { "epoch": 0.619513691016189, "grad_norm": 0.9382511973381042, "learning_rate": 7.815109591092973e-05, "loss": 0.7166, "step": 96970 }, { "epoch": 0.6195775781659277, "grad_norm": 1.3142880201339722, "learning_rate": 7.814694894373263e-05, "loss": 1.2337, "step": 96980 }, { "epoch": 0.6196414653156664, "grad_norm": 0.8636249303817749, "learning_rate": 7.814280169307142e-05, "loss": 0.7238, "step": 96990 }, { "epoch": 0.6197053524654051, "grad_norm": 0.7896556258201599, "learning_rate": 7.813865415898785e-05, "loss": 0.7152, "step": 97000 }, { "epoch": 0.6197692396151439, "grad_norm": 0.7915673851966858, "learning_rate": 7.813450634152369e-05, "loss": 0.9909, "step": 97010 }, { "epoch": 0.6198331267648826, "grad_norm": 0.6319288611412048, "learning_rate": 7.81303582407207e-05, "loss": 0.8935, "step": 97020 }, { "epoch": 0.6198970139146213, "grad_norm": 0.72498619556427, "learning_rate": 7.812620985662066e-05, "loss": 0.8319, "step": 97030 }, { "epoch": 0.6199609010643599, "grad_norm": 0.5626809000968933, "learning_rate": 7.812206118926539e-05, "loss": 0.6742, "step": 97040 }, { "epoch": 0.6200247882140986, "grad_norm": 1.0397377014160156, "learning_rate": 7.81179122386966e-05, "loss": 0.863, "step": 97050 }, { "epoch": 0.6200886753638373, "grad_norm": 0.5984945297241211, "learning_rate": 7.811376300495612e-05, "loss": 1.0026, "step": 97060 }, { "epoch": 0.620152562513576, "grad_norm": 1.6048803329467773, "learning_rate": 7.810961348808572e-05, "loss": 0.9494, "step": 97070 }, { "epoch": 0.6202164496633147, "grad_norm": 0.6135510206222534, "learning_rate": 7.810546368812721e-05, "loss": 0.7765, "step": 97080 }, { "epoch": 0.6202803368130534, "grad_norm": 1.2818505764007568, "learning_rate": 7.810131360512236e-05, "loss": 0.7591, "step": 97090 }, { "epoch": 0.6203442239627921, "grad_norm": 0.8664326071739197, "learning_rate": 7.809716323911296e-05, "loss": 0.8345, "step": 97100 }, { "epoch": 0.6204081111125308, "grad_norm": 0.9286889433860779, "learning_rate": 7.809301259014083e-05, "loss": 1.1506, "step": 97110 }, { "epoch": 0.6204719982622695, "grad_norm": 0.7341832518577576, "learning_rate": 7.808886165824775e-05, "loss": 0.8342, "step": 97120 }, { "epoch": 0.6205358854120082, "grad_norm": 2.2999391555786133, "learning_rate": 7.808471044347555e-05, "loss": 0.8378, "step": 97130 }, { "epoch": 0.6205997725617469, "grad_norm": 0.6908975839614868, "learning_rate": 7.808055894586602e-05, "loss": 1.0413, "step": 97140 }, { "epoch": 0.6206636597114856, "grad_norm": 0.7688397765159607, "learning_rate": 7.807640716546094e-05, "loss": 0.7239, "step": 97150 }, { "epoch": 0.6207275468612243, "grad_norm": 0.830764889717102, "learning_rate": 7.807225510230216e-05, "loss": 0.8647, "step": 97160 }, { "epoch": 0.620791434010963, "grad_norm": 1.2622300386428833, "learning_rate": 7.80681027564315e-05, "loss": 0.8693, "step": 97170 }, { "epoch": 0.6208553211607017, "grad_norm": 0.9150146842002869, "learning_rate": 7.806395012789074e-05, "loss": 0.854, "step": 97180 }, { "epoch": 0.6209192083104405, "grad_norm": 0.8529565334320068, "learning_rate": 7.805979721672175e-05, "loss": 0.8418, "step": 97190 }, { "epoch": 0.6209830954601792, "grad_norm": 1.1613361835479736, "learning_rate": 7.80556440229663e-05, "loss": 0.93, "step": 97200 }, { "epoch": 0.6210469826099179, "grad_norm": 1.5015759468078613, "learning_rate": 7.805149054666626e-05, "loss": 1.0721, "step": 97210 }, { "epoch": 0.6211108697596566, "grad_norm": 0.8608677387237549, "learning_rate": 7.804733678786345e-05, "loss": 0.9352, "step": 97220 }, { "epoch": 0.6211747569093953, "grad_norm": 0.7824024558067322, "learning_rate": 7.804318274659967e-05, "loss": 0.7138, "step": 97230 }, { "epoch": 0.621238644059134, "grad_norm": 0.6938091516494751, "learning_rate": 7.803902842291679e-05, "loss": 0.8414, "step": 97240 }, { "epoch": 0.6213025312088727, "grad_norm": 1.1884207725524902, "learning_rate": 7.803487381685665e-05, "loss": 0.846, "step": 97250 }, { "epoch": 0.6213664183586114, "grad_norm": 0.9938066005706787, "learning_rate": 7.803071892846106e-05, "loss": 1.0066, "step": 97260 }, { "epoch": 0.6214303055083501, "grad_norm": 1.1937052011489868, "learning_rate": 7.802656375777188e-05, "loss": 1.042, "step": 97270 }, { "epoch": 0.6214941926580888, "grad_norm": 0.7454966306686401, "learning_rate": 7.802240830483096e-05, "loss": 0.9139, "step": 97280 }, { "epoch": 0.6215580798078274, "grad_norm": 0.8491148948669434, "learning_rate": 7.801825256968015e-05, "loss": 0.8569, "step": 97290 }, { "epoch": 0.6216219669575661, "grad_norm": 1.283415675163269, "learning_rate": 7.80140965523613e-05, "loss": 1.134, "step": 97300 }, { "epoch": 0.6216858541073048, "grad_norm": 0.9425275921821594, "learning_rate": 7.800994025291626e-05, "loss": 0.8734, "step": 97310 }, { "epoch": 0.6217497412570435, "grad_norm": 0.8842566609382629, "learning_rate": 7.800578367138688e-05, "loss": 0.9209, "step": 97320 }, { "epoch": 0.6218136284067822, "grad_norm": 0.9904354214668274, "learning_rate": 7.800162680781504e-05, "loss": 0.7265, "step": 97330 }, { "epoch": 0.6218775155565209, "grad_norm": 0.956762969493866, "learning_rate": 7.79974696622426e-05, "loss": 0.911, "step": 97340 }, { "epoch": 0.6219414027062596, "grad_norm": 0.7186155319213867, "learning_rate": 7.79933122347114e-05, "loss": 0.9249, "step": 97350 }, { "epoch": 0.6220052898559983, "grad_norm": 0.733720064163208, "learning_rate": 7.798915452526334e-05, "loss": 0.9297, "step": 97360 }, { "epoch": 0.622069177005737, "grad_norm": 0.8453028202056885, "learning_rate": 7.798499653394028e-05, "loss": 0.9505, "step": 97370 }, { "epoch": 0.6221330641554758, "grad_norm": 1.2403620481491089, "learning_rate": 7.798083826078408e-05, "loss": 1.3309, "step": 97380 }, { "epoch": 0.6221969513052145, "grad_norm": 1.1222939491271973, "learning_rate": 7.797667970583666e-05, "loss": 1.1289, "step": 97390 }, { "epoch": 0.6222608384549532, "grad_norm": 0.633385956287384, "learning_rate": 7.797252086913984e-05, "loss": 0.9007, "step": 97400 }, { "epoch": 0.6223247256046919, "grad_norm": 0.7996073365211487, "learning_rate": 7.796877767525162e-05, "loss": 0.9044, "step": 97410 }, { "epoch": 0.6223886127544306, "grad_norm": 0.7875693440437317, "learning_rate": 7.796461830334642e-05, "loss": 1.0713, "step": 97420 }, { "epoch": 0.6224524999041693, "grad_norm": 1.1441236734390259, "learning_rate": 7.79604586498133e-05, "loss": 0.9106, "step": 97430 }, { "epoch": 0.622516387053908, "grad_norm": 1.1980715990066528, "learning_rate": 7.795629871469419e-05, "loss": 0.9184, "step": 97440 }, { "epoch": 0.6225802742036467, "grad_norm": 0.8532522320747375, "learning_rate": 7.795213849803094e-05, "loss": 0.827, "step": 97450 }, { "epoch": 0.6226441613533854, "grad_norm": 0.9568140506744385, "learning_rate": 7.794797799986549e-05, "loss": 0.8368, "step": 97460 }, { "epoch": 0.6227080485031241, "grad_norm": 0.8139510750770569, "learning_rate": 7.794381722023973e-05, "loss": 1.1102, "step": 97470 }, { "epoch": 0.6227719356528628, "grad_norm": 1.3737013339996338, "learning_rate": 7.793965615919555e-05, "loss": 1.0294, "step": 97480 }, { "epoch": 0.6228358228026015, "grad_norm": 1.1957775354385376, "learning_rate": 7.793549481677485e-05, "loss": 0.75, "step": 97490 }, { "epoch": 0.6228997099523402, "grad_norm": 0.7739052772521973, "learning_rate": 7.793133319301956e-05, "loss": 0.8989, "step": 97500 }, { "epoch": 0.6229635971020789, "grad_norm": 1.287320852279663, "learning_rate": 7.792717128797157e-05, "loss": 1.1412, "step": 97510 }, { "epoch": 0.6230274842518176, "grad_norm": 1.1825543642044067, "learning_rate": 7.792300910167284e-05, "loss": 0.7868, "step": 97520 }, { "epoch": 0.6230913714015562, "grad_norm": 0.9416884183883667, "learning_rate": 7.791884663416522e-05, "loss": 1.0537, "step": 97530 }, { "epoch": 0.6231552585512949, "grad_norm": 0.7893606424331665, "learning_rate": 7.791468388549066e-05, "loss": 0.9721, "step": 97540 }, { "epoch": 0.6232191457010336, "grad_norm": 0.6625798344612122, "learning_rate": 7.79105208556911e-05, "loss": 1.0084, "step": 97550 }, { "epoch": 0.6232830328507724, "grad_norm": 0.9183120131492615, "learning_rate": 7.790635754480844e-05, "loss": 1.2298, "step": 97560 }, { "epoch": 0.6233469200005111, "grad_norm": 0.7689588069915771, "learning_rate": 7.790219395288461e-05, "loss": 0.8109, "step": 97570 }, { "epoch": 0.6234108071502498, "grad_norm": 0.8191707134246826, "learning_rate": 7.789803007996156e-05, "loss": 0.9747, "step": 97580 }, { "epoch": 0.6234746942999885, "grad_norm": 1.4356540441513062, "learning_rate": 7.789386592608121e-05, "loss": 1.053, "step": 97590 }, { "epoch": 0.6235385814497272, "grad_norm": 0.5826048851013184, "learning_rate": 7.78897014912855e-05, "loss": 0.9845, "step": 97600 }, { "epoch": 0.6236024685994659, "grad_norm": 1.0215983390808105, "learning_rate": 7.788553677561635e-05, "loss": 1.0139, "step": 97610 }, { "epoch": 0.6236663557492046, "grad_norm": 1.153480887413025, "learning_rate": 7.788137177911573e-05, "loss": 0.8468, "step": 97620 }, { "epoch": 0.6237302428989433, "grad_norm": 1.5136088132858276, "learning_rate": 7.78772065018256e-05, "loss": 0.7882, "step": 97630 }, { "epoch": 0.623794130048682, "grad_norm": 1.1974624395370483, "learning_rate": 7.787304094378785e-05, "loss": 0.9654, "step": 97640 }, { "epoch": 0.6238580171984207, "grad_norm": 0.695049524307251, "learning_rate": 7.786887510504447e-05, "loss": 0.8364, "step": 97650 }, { "epoch": 0.6239219043481594, "grad_norm": 0.7446387410163879, "learning_rate": 7.786470898563741e-05, "loss": 0.5882, "step": 97660 }, { "epoch": 0.6239857914978981, "grad_norm": 1.169751763343811, "learning_rate": 7.786054258560863e-05, "loss": 0.795, "step": 97670 }, { "epoch": 0.6240496786476368, "grad_norm": 1.1560198068618774, "learning_rate": 7.785637590500007e-05, "loss": 0.7352, "step": 97680 }, { "epoch": 0.6241135657973755, "grad_norm": 0.8361658453941345, "learning_rate": 7.785220894385373e-05, "loss": 0.7835, "step": 97690 }, { "epoch": 0.6241774529471142, "grad_norm": 1.0349642038345337, "learning_rate": 7.784804170221154e-05, "loss": 0.7047, "step": 97700 }, { "epoch": 0.624241340096853, "grad_norm": 0.7345200181007385, "learning_rate": 7.784387418011547e-05, "loss": 0.9272, "step": 97710 }, { "epoch": 0.6243052272465917, "grad_norm": 1.210518717765808, "learning_rate": 7.783970637760751e-05, "loss": 0.9561, "step": 97720 }, { "epoch": 0.6243691143963304, "grad_norm": 1.1094375848770142, "learning_rate": 7.783553829472962e-05, "loss": 1.1463, "step": 97730 }, { "epoch": 0.6244330015460691, "grad_norm": 0.9743418097496033, "learning_rate": 7.783136993152376e-05, "loss": 0.8341, "step": 97740 }, { "epoch": 0.6244968886958078, "grad_norm": 0.6543291211128235, "learning_rate": 7.782720128803195e-05, "loss": 0.9606, "step": 97750 }, { "epoch": 0.6245607758455465, "grad_norm": 1.24593186378479, "learning_rate": 7.782303236429614e-05, "loss": 1.2391, "step": 97760 }, { "epoch": 0.6246246629952851, "grad_norm": 1.0866676568984985, "learning_rate": 7.781886316035834e-05, "loss": 0.96, "step": 97770 }, { "epoch": 0.6246885501450238, "grad_norm": 0.8642030358314514, "learning_rate": 7.78146936762605e-05, "loss": 1.1729, "step": 97780 }, { "epoch": 0.6247524372947625, "grad_norm": 0.8341190218925476, "learning_rate": 7.781052391204464e-05, "loss": 0.8916, "step": 97790 }, { "epoch": 0.6248163244445012, "grad_norm": 0.8593606948852539, "learning_rate": 7.780635386775273e-05, "loss": 0.7954, "step": 97800 }, { "epoch": 0.6248802115942399, "grad_norm": 0.7424865365028381, "learning_rate": 7.780218354342679e-05, "loss": 0.8716, "step": 97810 }, { "epoch": 0.6249440987439786, "grad_norm": 1.128391146659851, "learning_rate": 7.779801293910883e-05, "loss": 0.8336, "step": 97820 }, { "epoch": 0.6250079858937173, "grad_norm": 0.8907873630523682, "learning_rate": 7.779384205484079e-05, "loss": 1.0559, "step": 97830 }, { "epoch": 0.625071873043456, "grad_norm": 0.7362083792686462, "learning_rate": 7.778967089066474e-05, "loss": 1.0033, "step": 97840 }, { "epoch": 0.6251357601931947, "grad_norm": 0.8434352278709412, "learning_rate": 7.778549944662266e-05, "loss": 0.9773, "step": 97850 }, { "epoch": 0.6251996473429334, "grad_norm": 2.6534831523895264, "learning_rate": 7.778132772275657e-05, "loss": 0.921, "step": 97860 }, { "epoch": 0.6252635344926721, "grad_norm": 1.1809990406036377, "learning_rate": 7.777715571910846e-05, "loss": 0.9785, "step": 97870 }, { "epoch": 0.6253274216424108, "grad_norm": 0.7903746962547302, "learning_rate": 7.777298343572038e-05, "loss": 0.8915, "step": 97880 }, { "epoch": 0.6253913087921495, "grad_norm": 0.6154451370239258, "learning_rate": 7.776881087263433e-05, "loss": 1.0139, "step": 97890 }, { "epoch": 0.6254551959418883, "grad_norm": 0.7355427145957947, "learning_rate": 7.776463802989232e-05, "loss": 0.8841, "step": 97900 }, { "epoch": 0.625519083091627, "grad_norm": 1.5603142976760864, "learning_rate": 7.776046490753638e-05, "loss": 0.9459, "step": 97910 }, { "epoch": 0.6255829702413657, "grad_norm": 1.0645157098770142, "learning_rate": 7.775629150560854e-05, "loss": 0.8107, "step": 97920 }, { "epoch": 0.6256468573911044, "grad_norm": 0.7436626553535461, "learning_rate": 7.775211782415084e-05, "loss": 0.6884, "step": 97930 }, { "epoch": 0.6257107445408431, "grad_norm": 1.6905604600906372, "learning_rate": 7.774794386320531e-05, "loss": 0.9719, "step": 97940 }, { "epoch": 0.6257746316905818, "grad_norm": 1.0564686059951782, "learning_rate": 7.774376962281398e-05, "loss": 0.9414, "step": 97950 }, { "epoch": 0.6258385188403205, "grad_norm": 0.7647698521614075, "learning_rate": 7.773959510301887e-05, "loss": 1.0905, "step": 97960 }, { "epoch": 0.6259024059900592, "grad_norm": 0.8428241610527039, "learning_rate": 7.773542030386205e-05, "loss": 1.0266, "step": 97970 }, { "epoch": 0.6259662931397979, "grad_norm": 0.5705221891403198, "learning_rate": 7.773124522538556e-05, "loss": 0.8996, "step": 97980 }, { "epoch": 0.6260301802895366, "grad_norm": 0.9240884780883789, "learning_rate": 7.772706986763142e-05, "loss": 0.6718, "step": 97990 }, { "epoch": 0.6260940674392753, "grad_norm": 1.4182459115982056, "learning_rate": 7.772289423064174e-05, "loss": 0.9454, "step": 98000 }, { "epoch": 0.626157954589014, "grad_norm": 0.46557140350341797, "learning_rate": 7.77187183144585e-05, "loss": 0.8053, "step": 98010 }, { "epoch": 0.6262218417387526, "grad_norm": 1.070710301399231, "learning_rate": 7.771454211912378e-05, "loss": 0.8369, "step": 98020 }, { "epoch": 0.6262857288884913, "grad_norm": 1.3407284021377563, "learning_rate": 7.771036564467967e-05, "loss": 0.6425, "step": 98030 }, { "epoch": 0.62634961603823, "grad_norm": 1.1556596755981445, "learning_rate": 7.770618889116819e-05, "loss": 0.7995, "step": 98040 }, { "epoch": 0.6264135031879687, "grad_norm": 0.8401532769203186, "learning_rate": 7.770201185863142e-05, "loss": 1.0753, "step": 98050 }, { "epoch": 0.6264773903377074, "grad_norm": 0.927470862865448, "learning_rate": 7.769783454711143e-05, "loss": 0.8812, "step": 98060 }, { "epoch": 0.6265412774874461, "grad_norm": 0.7423887252807617, "learning_rate": 7.769365695665027e-05, "loss": 0.9529, "step": 98070 }, { "epoch": 0.6266051646371849, "grad_norm": 0.5495186448097229, "learning_rate": 7.768947908729003e-05, "loss": 0.7396, "step": 98080 }, { "epoch": 0.6266690517869236, "grad_norm": 0.8177791833877563, "learning_rate": 7.768530093907279e-05, "loss": 0.9336, "step": 98090 }, { "epoch": 0.6267329389366623, "grad_norm": 2.721142530441284, "learning_rate": 7.768112251204061e-05, "loss": 1.0718, "step": 98100 }, { "epoch": 0.626796826086401, "grad_norm": 0.5694549679756165, "learning_rate": 7.767694380623558e-05, "loss": 0.6205, "step": 98110 }, { "epoch": 0.6268607132361397, "grad_norm": 0.9336040616035461, "learning_rate": 7.767276482169979e-05, "loss": 0.8428, "step": 98120 }, { "epoch": 0.6269246003858784, "grad_norm": 1.029270052909851, "learning_rate": 7.766858555847531e-05, "loss": 0.8425, "step": 98130 }, { "epoch": 0.6269884875356171, "grad_norm": 1.2212886810302734, "learning_rate": 7.766440601660424e-05, "loss": 1.0028, "step": 98140 }, { "epoch": 0.6270523746853558, "grad_norm": 2.6575090885162354, "learning_rate": 7.766022619612867e-05, "loss": 0.8225, "step": 98150 }, { "epoch": 0.6271162618350945, "grad_norm": 0.7824742197990417, "learning_rate": 7.765604609709069e-05, "loss": 0.903, "step": 98160 }, { "epoch": 0.6271801489848332, "grad_norm": 0.9830259084701538, "learning_rate": 7.76518657195324e-05, "loss": 1.0083, "step": 98170 }, { "epoch": 0.6272440361345719, "grad_norm": 0.7284572720527649, "learning_rate": 7.764768506349589e-05, "loss": 0.7337, "step": 98180 }, { "epoch": 0.6273079232843106, "grad_norm": 1.008009910583496, "learning_rate": 7.764350412902328e-05, "loss": 1.0301, "step": 98190 }, { "epoch": 0.6273718104340493, "grad_norm": 0.7041063904762268, "learning_rate": 7.763932291615667e-05, "loss": 1.0467, "step": 98200 }, { "epoch": 0.627435697583788, "grad_norm": 1.120405673980713, "learning_rate": 7.763514142493818e-05, "loss": 1.0133, "step": 98210 }, { "epoch": 0.6274995847335267, "grad_norm": 0.855456531047821, "learning_rate": 7.76309596554099e-05, "loss": 1.1737, "step": 98220 }, { "epoch": 0.6275634718832654, "grad_norm": 0.8081047534942627, "learning_rate": 7.762677760761394e-05, "loss": 0.9021, "step": 98230 }, { "epoch": 0.6276273590330042, "grad_norm": 0.7557641267776489, "learning_rate": 7.762259528159243e-05, "loss": 0.9765, "step": 98240 }, { "epoch": 0.6276912461827429, "grad_norm": 3.023898124694824, "learning_rate": 7.76184126773875e-05, "loss": 0.8915, "step": 98250 }, { "epoch": 0.6277551333324815, "grad_norm": 1.2447547912597656, "learning_rate": 7.761422979504128e-05, "loss": 0.9107, "step": 98260 }, { "epoch": 0.6278190204822202, "grad_norm": 1.0318201780319214, "learning_rate": 7.761004663459584e-05, "loss": 0.8787, "step": 98270 }, { "epoch": 0.6278829076319589, "grad_norm": 0.69561767578125, "learning_rate": 7.760586319609335e-05, "loss": 0.8026, "step": 98280 }, { "epoch": 0.6279467947816976, "grad_norm": 0.5632861256599426, "learning_rate": 7.760167947957595e-05, "loss": 0.7686, "step": 98290 }, { "epoch": 0.6280106819314363, "grad_norm": 0.8044828772544861, "learning_rate": 7.759749548508575e-05, "loss": 0.8049, "step": 98300 }, { "epoch": 0.628074569081175, "grad_norm": 1.2165446281433105, "learning_rate": 7.759331121266489e-05, "loss": 0.7037, "step": 98310 }, { "epoch": 0.6281384562309137, "grad_norm": 0.5233101844787598, "learning_rate": 7.758912666235552e-05, "loss": 0.8435, "step": 98320 }, { "epoch": 0.6282023433806524, "grad_norm": 0.7019632458686829, "learning_rate": 7.758494183419978e-05, "loss": 0.7907, "step": 98330 }, { "epoch": 0.6282662305303911, "grad_norm": 0.7987385988235474, "learning_rate": 7.758075672823982e-05, "loss": 1.0906, "step": 98340 }, { "epoch": 0.6283301176801298, "grad_norm": 0.8817057013511658, "learning_rate": 7.757657134451776e-05, "loss": 0.9428, "step": 98350 }, { "epoch": 0.6283940048298685, "grad_norm": 0.7818195819854736, "learning_rate": 7.757238568307576e-05, "loss": 0.6948, "step": 98360 }, { "epoch": 0.6284578919796072, "grad_norm": 1.5914932489395142, "learning_rate": 7.756819974395602e-05, "loss": 0.9719, "step": 98370 }, { "epoch": 0.6285217791293459, "grad_norm": 0.9636878967285156, "learning_rate": 7.756401352720063e-05, "loss": 0.9189, "step": 98380 }, { "epoch": 0.6285856662790846, "grad_norm": 1.070579171180725, "learning_rate": 7.755982703285178e-05, "loss": 0.9929, "step": 98390 }, { "epoch": 0.6286495534288233, "grad_norm": 0.9646096229553223, "learning_rate": 7.755564026095164e-05, "loss": 0.7181, "step": 98400 }, { "epoch": 0.628713440578562, "grad_norm": 1.1919089555740356, "learning_rate": 7.755145321154235e-05, "loss": 0.956, "step": 98410 }, { "epoch": 0.6287773277283008, "grad_norm": 1.0525037050247192, "learning_rate": 7.754726588466611e-05, "loss": 0.8289, "step": 98420 }, { "epoch": 0.6288412148780395, "grad_norm": 0.8866745233535767, "learning_rate": 7.754307828036507e-05, "loss": 1.1507, "step": 98430 }, { "epoch": 0.6289051020277782, "grad_norm": 1.4663811922073364, "learning_rate": 7.753889039868138e-05, "loss": 0.8841, "step": 98440 }, { "epoch": 0.6289689891775169, "grad_norm": 0.8468247056007385, "learning_rate": 7.753470223965726e-05, "loss": 1.0977, "step": 98450 }, { "epoch": 0.6290328763272556, "grad_norm": 0.6286731958389282, "learning_rate": 7.753051380333485e-05, "loss": 0.6818, "step": 98460 }, { "epoch": 0.6290967634769943, "grad_norm": 1.2429255247116089, "learning_rate": 7.752632508975636e-05, "loss": 0.8151, "step": 98470 }, { "epoch": 0.629160650626733, "grad_norm": 1.2103321552276611, "learning_rate": 7.752213609896396e-05, "loss": 0.7415, "step": 98480 }, { "epoch": 0.6292245377764717, "grad_norm": 1.9655529260635376, "learning_rate": 7.751794683099986e-05, "loss": 1.1028, "step": 98490 }, { "epoch": 0.6292884249262103, "grad_norm": 0.7226641774177551, "learning_rate": 7.75137572859062e-05, "loss": 0.9055, "step": 98500 }, { "epoch": 0.629352312075949, "grad_norm": 1.1247568130493164, "learning_rate": 7.750956746372521e-05, "loss": 1.0251, "step": 98510 }, { "epoch": 0.6294161992256877, "grad_norm": 1.818439245223999, "learning_rate": 7.750537736449908e-05, "loss": 0.6323, "step": 98520 }, { "epoch": 0.6294800863754264, "grad_norm": 0.6944345235824585, "learning_rate": 7.750118698827e-05, "loss": 0.9022, "step": 98530 }, { "epoch": 0.6295439735251651, "grad_norm": 1.0383299589157104, "learning_rate": 7.749699633508019e-05, "loss": 0.9505, "step": 98540 }, { "epoch": 0.6296078606749038, "grad_norm": 0.5083116888999939, "learning_rate": 7.749280540497181e-05, "loss": 0.7154, "step": 98550 }, { "epoch": 0.6296717478246425, "grad_norm": 1.316440224647522, "learning_rate": 7.748861419798712e-05, "loss": 0.7183, "step": 98560 }, { "epoch": 0.6297356349743812, "grad_norm": 0.9615148901939392, "learning_rate": 7.74844227141683e-05, "loss": 1.0438, "step": 98570 }, { "epoch": 0.6297995221241199, "grad_norm": 1.223386526107788, "learning_rate": 7.748023095355756e-05, "loss": 0.8429, "step": 98580 }, { "epoch": 0.6298634092738586, "grad_norm": 0.7648318409919739, "learning_rate": 7.747603891619712e-05, "loss": 0.8862, "step": 98590 }, { "epoch": 0.6299272964235973, "grad_norm": 0.8979175686836243, "learning_rate": 7.747184660212918e-05, "loss": 0.9744, "step": 98600 }, { "epoch": 0.629991183573336, "grad_norm": 0.8479979038238525, "learning_rate": 7.7467654011396e-05, "loss": 0.9066, "step": 98610 }, { "epoch": 0.6300550707230748, "grad_norm": 1.0452567338943481, "learning_rate": 7.746346114403978e-05, "loss": 0.717, "step": 98620 }, { "epoch": 0.6301189578728135, "grad_norm": 0.5586809515953064, "learning_rate": 7.745926800010275e-05, "loss": 0.7231, "step": 98630 }, { "epoch": 0.6301828450225522, "grad_norm": 0.9758456945419312, "learning_rate": 7.745507457962712e-05, "loss": 0.7899, "step": 98640 }, { "epoch": 0.6302467321722909, "grad_norm": 0.8799155354499817, "learning_rate": 7.745088088265516e-05, "loss": 0.8026, "step": 98650 }, { "epoch": 0.6303106193220296, "grad_norm": 0.7209200263023376, "learning_rate": 7.744668690922907e-05, "loss": 0.9363, "step": 98660 }, { "epoch": 0.6303745064717683, "grad_norm": 1.0429208278656006, "learning_rate": 7.74424926593911e-05, "loss": 1.1868, "step": 98670 }, { "epoch": 0.630438393621507, "grad_norm": 0.829575777053833, "learning_rate": 7.743829813318349e-05, "loss": 0.795, "step": 98680 }, { "epoch": 0.6305022807712457, "grad_norm": 0.7974848747253418, "learning_rate": 7.743410333064847e-05, "loss": 1.0371, "step": 98690 }, { "epoch": 0.6305661679209844, "grad_norm": 1.1023069620132446, "learning_rate": 7.74299082518283e-05, "loss": 0.8404, "step": 98700 }, { "epoch": 0.6306300550707231, "grad_norm": 1.1051509380340576, "learning_rate": 7.742571289676522e-05, "loss": 0.9185, "step": 98710 }, { "epoch": 0.6306939422204618, "grad_norm": 1.0306414365768433, "learning_rate": 7.742151726550149e-05, "loss": 0.784, "step": 98720 }, { "epoch": 0.6307578293702005, "grad_norm": 0.6763244271278381, "learning_rate": 7.741732135807937e-05, "loss": 0.983, "step": 98730 }, { "epoch": 0.6308217165199391, "grad_norm": 0.5767059326171875, "learning_rate": 7.741312517454109e-05, "loss": 0.9532, "step": 98740 }, { "epoch": 0.6308856036696778, "grad_norm": 0.8445504307746887, "learning_rate": 7.740892871492894e-05, "loss": 1.0529, "step": 98750 }, { "epoch": 0.6309494908194165, "grad_norm": 0.5848102569580078, "learning_rate": 7.740473197928513e-05, "loss": 0.6972, "step": 98760 }, { "epoch": 0.6310133779691552, "grad_norm": 1.0560247898101807, "learning_rate": 7.740053496765199e-05, "loss": 0.8724, "step": 98770 }, { "epoch": 0.631077265118894, "grad_norm": 1.2998313903808594, "learning_rate": 7.739633768007175e-05, "loss": 1.0596, "step": 98780 }, { "epoch": 0.6311411522686327, "grad_norm": 0.6636534333229065, "learning_rate": 7.739214011658669e-05, "loss": 0.9184, "step": 98790 }, { "epoch": 0.6312050394183714, "grad_norm": 0.8721036911010742, "learning_rate": 7.738794227723907e-05, "loss": 0.9777, "step": 98800 }, { "epoch": 0.6312689265681101, "grad_norm": 0.9047155380249023, "learning_rate": 7.73837441620712e-05, "loss": 0.8865, "step": 98810 }, { "epoch": 0.6313328137178488, "grad_norm": 0.7591509819030762, "learning_rate": 7.737954577112532e-05, "loss": 0.8674, "step": 98820 }, { "epoch": 0.6313967008675875, "grad_norm": 0.7103126049041748, "learning_rate": 7.737534710444372e-05, "loss": 1.0165, "step": 98830 }, { "epoch": 0.6314605880173262, "grad_norm": 0.9940080642700195, "learning_rate": 7.73711481620687e-05, "loss": 0.8711, "step": 98840 }, { "epoch": 0.6315244751670649, "grad_norm": 0.8602542281150818, "learning_rate": 7.736694894404254e-05, "loss": 0.8148, "step": 98850 }, { "epoch": 0.6315883623168036, "grad_norm": 0.687978208065033, "learning_rate": 7.736274945040753e-05, "loss": 1.0732, "step": 98860 }, { "epoch": 0.6316522494665423, "grad_norm": 0.7140915989875793, "learning_rate": 7.735854968120596e-05, "loss": 1.043, "step": 98870 }, { "epoch": 0.631716136616281, "grad_norm": 0.7227775454521179, "learning_rate": 7.735434963648013e-05, "loss": 0.8709, "step": 98880 }, { "epoch": 0.6317800237660197, "grad_norm": 0.5700997710227966, "learning_rate": 7.735014931627234e-05, "loss": 0.7587, "step": 98890 }, { "epoch": 0.6318439109157584, "grad_norm": 1.2444158792495728, "learning_rate": 7.734594872062486e-05, "loss": 0.7822, "step": 98900 }, { "epoch": 0.6319077980654971, "grad_norm": 1.6679047346115112, "learning_rate": 7.734174784958004e-05, "loss": 1.1175, "step": 98910 }, { "epoch": 0.6319716852152358, "grad_norm": 0.6713977456092834, "learning_rate": 7.733754670318016e-05, "loss": 0.8503, "step": 98920 }, { "epoch": 0.6320355723649745, "grad_norm": 1.2853600978851318, "learning_rate": 7.733334528146753e-05, "loss": 0.8824, "step": 98930 }, { "epoch": 0.6320994595147132, "grad_norm": 1.0082619190216064, "learning_rate": 7.732914358448448e-05, "loss": 0.9314, "step": 98940 }, { "epoch": 0.632163346664452, "grad_norm": 0.7332447171211243, "learning_rate": 7.73249416122733e-05, "loss": 0.7704, "step": 98950 }, { "epoch": 0.6322272338141907, "grad_norm": 1.46458101272583, "learning_rate": 7.732073936487631e-05, "loss": 0.9249, "step": 98960 }, { "epoch": 0.6322911209639294, "grad_norm": 0.8572206497192383, "learning_rate": 7.731653684233585e-05, "loss": 1.1214, "step": 98970 }, { "epoch": 0.6323550081136681, "grad_norm": 0.780282199382782, "learning_rate": 7.731233404469424e-05, "loss": 0.7917, "step": 98980 }, { "epoch": 0.6324188952634067, "grad_norm": 0.9528806209564209, "learning_rate": 7.730813097199379e-05, "loss": 0.9149, "step": 98990 }, { "epoch": 0.6324827824131454, "grad_norm": 0.9242857098579407, "learning_rate": 7.730392762427683e-05, "loss": 0.7985, "step": 99000 }, { "epoch": 0.6325466695628841, "grad_norm": 0.7129524350166321, "learning_rate": 7.72997240015857e-05, "loss": 0.978, "step": 99010 }, { "epoch": 0.6326105567126228, "grad_norm": 0.7845136523246765, "learning_rate": 7.729552010396274e-05, "loss": 0.8295, "step": 99020 }, { "epoch": 0.6326744438623615, "grad_norm": 0.6033239960670471, "learning_rate": 7.729131593145027e-05, "loss": 1.1444, "step": 99030 }, { "epoch": 0.6327383310121002, "grad_norm": 0.8433771133422852, "learning_rate": 7.728711148409063e-05, "loss": 1.1936, "step": 99040 }, { "epoch": 0.6328022181618389, "grad_norm": 1.812710165977478, "learning_rate": 7.728290676192619e-05, "loss": 0.7792, "step": 99050 }, { "epoch": 0.6328661053115776, "grad_norm": 0.8290817737579346, "learning_rate": 7.727870176499928e-05, "loss": 0.756, "step": 99060 }, { "epoch": 0.6329299924613163, "grad_norm": 1.5934466123580933, "learning_rate": 7.727449649335222e-05, "loss": 1.0873, "step": 99070 }, { "epoch": 0.632993879611055, "grad_norm": 1.0674597024917603, "learning_rate": 7.727029094702739e-05, "loss": 0.7499, "step": 99080 }, { "epoch": 0.6330577667607937, "grad_norm": 0.7989637851715088, "learning_rate": 7.726608512606714e-05, "loss": 0.926, "step": 99090 }, { "epoch": 0.6331216539105324, "grad_norm": 0.6635336875915527, "learning_rate": 7.726187903051383e-05, "loss": 0.8383, "step": 99100 }, { "epoch": 0.6331855410602711, "grad_norm": 0.4389435648918152, "learning_rate": 7.725767266040982e-05, "loss": 0.8101, "step": 99110 }, { "epoch": 0.6332494282100098, "grad_norm": 0.8760795593261719, "learning_rate": 7.725346601579744e-05, "loss": 1.3585, "step": 99120 }, { "epoch": 0.6333133153597486, "grad_norm": 1.2530437707901, "learning_rate": 7.72492590967191e-05, "loss": 1.0076, "step": 99130 }, { "epoch": 0.6333772025094873, "grad_norm": 1.7212085723876953, "learning_rate": 7.724505190321714e-05, "loss": 1.144, "step": 99140 }, { "epoch": 0.633441089659226, "grad_norm": 1.0681732892990112, "learning_rate": 7.724084443533395e-05, "loss": 0.8348, "step": 99150 }, { "epoch": 0.6335049768089647, "grad_norm": 0.8044700026512146, "learning_rate": 7.723663669311188e-05, "loss": 0.9591, "step": 99160 }, { "epoch": 0.6335688639587034, "grad_norm": 1.0960590839385986, "learning_rate": 7.723242867659331e-05, "loss": 0.9373, "step": 99170 }, { "epoch": 0.6336327511084421, "grad_norm": 1.0857396125793457, "learning_rate": 7.722822038582062e-05, "loss": 1.23, "step": 99180 }, { "epoch": 0.6336966382581808, "grad_norm": 0.9322388768196106, "learning_rate": 7.722401182083621e-05, "loss": 0.8729, "step": 99190 }, { "epoch": 0.6337605254079195, "grad_norm": 0.8809077143669128, "learning_rate": 7.721980298168243e-05, "loss": 1.1811, "step": 99200 }, { "epoch": 0.6338244125576582, "grad_norm": 0.662137508392334, "learning_rate": 7.721559386840172e-05, "loss": 0.8042, "step": 99210 }, { "epoch": 0.6338882997073969, "grad_norm": 0.7486308813095093, "learning_rate": 7.72113844810364e-05, "loss": 0.8313, "step": 99220 }, { "epoch": 0.6339521868571355, "grad_norm": 1.1322681903839111, "learning_rate": 7.720717481962891e-05, "loss": 0.9974, "step": 99230 }, { "epoch": 0.6340160740068742, "grad_norm": 0.9434690475463867, "learning_rate": 7.720296488422163e-05, "loss": 0.7033, "step": 99240 }, { "epoch": 0.6340799611566129, "grad_norm": 1.0906821489334106, "learning_rate": 7.719875467485696e-05, "loss": 0.7467, "step": 99250 }, { "epoch": 0.6341438483063516, "grad_norm": 0.9866161942481995, "learning_rate": 7.71945441915773e-05, "loss": 0.9562, "step": 99260 }, { "epoch": 0.6342077354560903, "grad_norm": 1.0579166412353516, "learning_rate": 7.719033343442506e-05, "loss": 0.7671, "step": 99270 }, { "epoch": 0.634271622605829, "grad_norm": 0.8389692902565002, "learning_rate": 7.718612240344264e-05, "loss": 0.9807, "step": 99280 }, { "epoch": 0.6343355097555677, "grad_norm": 0.9779929518699646, "learning_rate": 7.718191109867244e-05, "loss": 0.9206, "step": 99290 }, { "epoch": 0.6343993969053064, "grad_norm": 1.6635096073150635, "learning_rate": 7.717769952015687e-05, "loss": 0.8582, "step": 99300 }, { "epoch": 0.6344632840550452, "grad_norm": 1.1410149335861206, "learning_rate": 7.717348766793837e-05, "loss": 0.9895, "step": 99310 }, { "epoch": 0.6345271712047839, "grad_norm": 0.8290955424308777, "learning_rate": 7.716927554205935e-05, "loss": 0.6947, "step": 99320 }, { "epoch": 0.6345910583545226, "grad_norm": 0.919790506362915, "learning_rate": 7.71650631425622e-05, "loss": 0.8935, "step": 99330 }, { "epoch": 0.6346549455042613, "grad_norm": 0.9774859547615051, "learning_rate": 7.716085046948937e-05, "loss": 0.9696, "step": 99340 }, { "epoch": 0.634718832654, "grad_norm": 0.6959844827651978, "learning_rate": 7.715663752288328e-05, "loss": 0.8616, "step": 99350 }, { "epoch": 0.6347827198037387, "grad_norm": 0.7934970855712891, "learning_rate": 7.715242430278636e-05, "loss": 1.0319, "step": 99360 }, { "epoch": 0.6348466069534774, "grad_norm": 0.8985568284988403, "learning_rate": 7.714821080924102e-05, "loss": 1.032, "step": 99370 }, { "epoch": 0.6349104941032161, "grad_norm": 0.8157596588134766, "learning_rate": 7.714399704228972e-05, "loss": 0.8366, "step": 99380 }, { "epoch": 0.6349743812529548, "grad_norm": 0.6816175580024719, "learning_rate": 7.713978300197488e-05, "loss": 0.8663, "step": 99390 }, { "epoch": 0.6350382684026935, "grad_norm": 1.0564788579940796, "learning_rate": 7.713556868833896e-05, "loss": 0.9809, "step": 99400 }, { "epoch": 0.6351021555524322, "grad_norm": 1.043823480606079, "learning_rate": 7.713135410142437e-05, "loss": 0.7598, "step": 99410 }, { "epoch": 0.6351660427021709, "grad_norm": 0.858410120010376, "learning_rate": 7.71271392412736e-05, "loss": 0.9618, "step": 99420 }, { "epoch": 0.6352299298519096, "grad_norm": 1.3805270195007324, "learning_rate": 7.712292410792905e-05, "loss": 0.9138, "step": 99430 }, { "epoch": 0.6352938170016483, "grad_norm": 1.437741756439209, "learning_rate": 7.711870870143321e-05, "loss": 1.0721, "step": 99440 }, { "epoch": 0.635357704151387, "grad_norm": 0.5732793807983398, "learning_rate": 7.711449302182849e-05, "loss": 0.8065, "step": 99450 }, { "epoch": 0.6354215913011257, "grad_norm": 0.852961003780365, "learning_rate": 7.711027706915738e-05, "loss": 0.8523, "step": 99460 }, { "epoch": 0.6354854784508643, "grad_norm": 0.7680826783180237, "learning_rate": 7.710606084346232e-05, "loss": 1.0963, "step": 99470 }, { "epoch": 0.635549365600603, "grad_norm": 0.7236658334732056, "learning_rate": 7.710184434478577e-05, "loss": 1.0257, "step": 99480 }, { "epoch": 0.6356132527503418, "grad_norm": 0.8568646907806396, "learning_rate": 7.709762757317021e-05, "loss": 0.7875, "step": 99490 }, { "epoch": 0.6356771399000805, "grad_norm": 1.2610046863555908, "learning_rate": 7.709341052865811e-05, "loss": 1.0912, "step": 99500 }, { "epoch": 0.6357410270498192, "grad_norm": 0.8206515312194824, "learning_rate": 7.708919321129192e-05, "loss": 1.2463, "step": 99510 }, { "epoch": 0.6358049141995579, "grad_norm": 1.1320310831069946, "learning_rate": 7.70849756211141e-05, "loss": 0.7328, "step": 99520 }, { "epoch": 0.6358688013492966, "grad_norm": 0.832253098487854, "learning_rate": 7.708075775816715e-05, "loss": 1.0437, "step": 99530 }, { "epoch": 0.6359326884990353, "grad_norm": 0.8778328895568848, "learning_rate": 7.707653962249355e-05, "loss": 0.748, "step": 99540 }, { "epoch": 0.635996575648774, "grad_norm": 0.6165944933891296, "learning_rate": 7.707232121413577e-05, "loss": 0.9211, "step": 99550 }, { "epoch": 0.6360604627985127, "grad_norm": 0.7879787683486938, "learning_rate": 7.70681025331363e-05, "loss": 0.7676, "step": 99560 }, { "epoch": 0.6361243499482514, "grad_norm": 1.0421432256698608, "learning_rate": 7.70638835795376e-05, "loss": 0.951, "step": 99570 }, { "epoch": 0.6361882370979901, "grad_norm": 0.9447425603866577, "learning_rate": 7.705966435338218e-05, "loss": 0.9486, "step": 99580 }, { "epoch": 0.6362521242477288, "grad_norm": Infinity, "learning_rate": 7.705586681684145e-05, "loss": 1.0416, "step": 99590 }, { "epoch": 0.6363160113974675, "grad_norm": 0.745619535446167, "learning_rate": 7.705164707294533e-05, "loss": 0.7663, "step": 99600 }, { "epoch": 0.6363798985472062, "grad_norm": 1.6954360008239746, "learning_rate": 7.704742705661573e-05, "loss": 1.0033, "step": 99610 }, { "epoch": 0.6364437856969449, "grad_norm": 0.6701345443725586, "learning_rate": 7.704320676789514e-05, "loss": 0.7165, "step": 99620 }, { "epoch": 0.6365076728466836, "grad_norm": 0.7628158926963806, "learning_rate": 7.703898620682606e-05, "loss": 0.8939, "step": 99630 }, { "epoch": 0.6365715599964223, "grad_norm": 1.4524946212768555, "learning_rate": 7.7034765373451e-05, "loss": 0.9185, "step": 99640 }, { "epoch": 0.636635447146161, "grad_norm": 1.0932461023330688, "learning_rate": 7.703054426781246e-05, "loss": 0.8613, "step": 99650 }, { "epoch": 0.6366993342958998, "grad_norm": 0.8748453855514526, "learning_rate": 7.702632288995297e-05, "loss": 0.9724, "step": 99660 }, { "epoch": 0.6367632214456385, "grad_norm": 0.6235826015472412, "learning_rate": 7.7022101239915e-05, "loss": 0.8267, "step": 99670 }, { "epoch": 0.6368271085953772, "grad_norm": 0.9133662581443787, "learning_rate": 7.701787931774111e-05, "loss": 0.9088, "step": 99680 }, { "epoch": 0.6368909957451159, "grad_norm": 0.8982312679290771, "learning_rate": 7.701365712347379e-05, "loss": 0.9541, "step": 99690 }, { "epoch": 0.6369548828948546, "grad_norm": 0.6857670545578003, "learning_rate": 7.700943465715557e-05, "loss": 1.0629, "step": 99700 }, { "epoch": 0.6370187700445933, "grad_norm": 0.8805925250053406, "learning_rate": 7.7005211918829e-05, "loss": 0.9387, "step": 99710 }, { "epoch": 0.6370826571943319, "grad_norm": 0.7308109402656555, "learning_rate": 7.700098890853658e-05, "loss": 0.6636, "step": 99720 }, { "epoch": 0.6371465443440706, "grad_norm": 0.8776551485061646, "learning_rate": 7.699676562632084e-05, "loss": 0.9326, "step": 99730 }, { "epoch": 0.6372104314938093, "grad_norm": 0.6855533719062805, "learning_rate": 7.699254207222429e-05, "loss": 1.1151, "step": 99740 }, { "epoch": 0.637274318643548, "grad_norm": 1.2820175886154175, "learning_rate": 7.698831824628951e-05, "loss": 1.0056, "step": 99750 }, { "epoch": 0.6373382057932867, "grad_norm": 0.8445931673049927, "learning_rate": 7.698409414855902e-05, "loss": 0.9203, "step": 99760 }, { "epoch": 0.6374020929430254, "grad_norm": 1.1383895874023438, "learning_rate": 7.697986977907534e-05, "loss": 0.9536, "step": 99770 }, { "epoch": 0.6374659800927641, "grad_norm": 1.0668944120407104, "learning_rate": 7.697564513788105e-05, "loss": 1.3005, "step": 99780 }, { "epoch": 0.6375298672425028, "grad_norm": 1.0279992818832397, "learning_rate": 7.697142022501866e-05, "loss": 0.9918, "step": 99790 }, { "epoch": 0.6375937543922415, "grad_norm": 0.8683011531829834, "learning_rate": 7.696719504053075e-05, "loss": 1.1289, "step": 99800 }, { "epoch": 0.6376576415419802, "grad_norm": 0.6475001573562622, "learning_rate": 7.696296958445985e-05, "loss": 0.9956, "step": 99810 }, { "epoch": 0.6377215286917189, "grad_norm": 0.9741643667221069, "learning_rate": 7.695874385684852e-05, "loss": 0.8118, "step": 99820 }, { "epoch": 0.6377854158414576, "grad_norm": 0.6049178838729858, "learning_rate": 7.695451785773931e-05, "loss": 0.9677, "step": 99830 }, { "epoch": 0.6378493029911964, "grad_norm": 0.7751360535621643, "learning_rate": 7.695029158717479e-05, "loss": 0.7172, "step": 99840 }, { "epoch": 0.6379131901409351, "grad_norm": 0.6430035829544067, "learning_rate": 7.694606504519752e-05, "loss": 0.9808, "step": 99850 }, { "epoch": 0.6379770772906738, "grad_norm": 1.0987446308135986, "learning_rate": 7.694183823185005e-05, "loss": 0.8621, "step": 99860 }, { "epoch": 0.6380409644404125, "grad_norm": 0.7962204217910767, "learning_rate": 7.6937611147175e-05, "loss": 0.9515, "step": 99870 }, { "epoch": 0.6381048515901512, "grad_norm": 0.8740015625953674, "learning_rate": 7.693338379121486e-05, "loss": 0.865, "step": 99880 }, { "epoch": 0.6381687387398899, "grad_norm": 0.8522897362709045, "learning_rate": 7.692915616401226e-05, "loss": 0.7795, "step": 99890 }, { "epoch": 0.6382326258896286, "grad_norm": 0.8299471139907837, "learning_rate": 7.692492826560978e-05, "loss": 1.0699, "step": 99900 }, { "epoch": 0.6382965130393673, "grad_norm": 1.0273027420043945, "learning_rate": 7.692070009604994e-05, "loss": 0.8754, "step": 99910 }, { "epoch": 0.638360400189106, "grad_norm": 0.8855130672454834, "learning_rate": 7.69164716553754e-05, "loss": 0.9245, "step": 99920 }, { "epoch": 0.6384242873388447, "grad_norm": 0.9745055437088013, "learning_rate": 7.691224294362866e-05, "loss": 0.6921, "step": 99930 }, { "epoch": 0.6384881744885834, "grad_norm": 0.7872833609580994, "learning_rate": 7.690801396085239e-05, "loss": 0.7978, "step": 99940 }, { "epoch": 0.6385520616383221, "grad_norm": 0.8484395742416382, "learning_rate": 7.690378470708912e-05, "loss": 0.6428, "step": 99950 }, { "epoch": 0.6386159487880607, "grad_norm": 0.9526743292808533, "learning_rate": 7.689955518238148e-05, "loss": 0.9055, "step": 99960 }, { "epoch": 0.6386798359377994, "grad_norm": 0.7249189615249634, "learning_rate": 7.689532538677203e-05, "loss": 0.9418, "step": 99970 }, { "epoch": 0.6387437230875381, "grad_norm": 0.695597767829895, "learning_rate": 7.689109532030339e-05, "loss": 0.9258, "step": 99980 }, { "epoch": 0.6388076102372768, "grad_norm": 1.010576605796814, "learning_rate": 7.688686498301816e-05, "loss": 0.8175, "step": 99990 }, { "epoch": 0.6388714973870155, "grad_norm": 0.8327822685241699, "learning_rate": 7.688263437495892e-05, "loss": 0.8687, "step": 100000 }, { "epoch": 0.6389353845367542, "grad_norm": 0.7016774415969849, "learning_rate": 7.687840349616833e-05, "loss": 1.0383, "step": 100010 }, { "epoch": 0.638999271686493, "grad_norm": 0.9525433778762817, "learning_rate": 7.687417234668895e-05, "loss": 0.8909, "step": 100020 }, { "epoch": 0.6390631588362317, "grad_norm": 0.8068029284477234, "learning_rate": 7.686994092656339e-05, "loss": 0.9069, "step": 100030 }, { "epoch": 0.6391270459859704, "grad_norm": 1.108211636543274, "learning_rate": 7.686570923583429e-05, "loss": 0.931, "step": 100040 }, { "epoch": 0.6391909331357091, "grad_norm": 1.0641669034957886, "learning_rate": 7.686147727454426e-05, "loss": 0.8077, "step": 100050 }, { "epoch": 0.6392548202854478, "grad_norm": 0.8529702425003052, "learning_rate": 7.68572450427359e-05, "loss": 1.153, "step": 100060 }, { "epoch": 0.6393187074351865, "grad_norm": 0.6657126545906067, "learning_rate": 7.685301254045188e-05, "loss": 0.7854, "step": 100070 }, { "epoch": 0.6393825945849252, "grad_norm": 1.6039618253707886, "learning_rate": 7.684877976773476e-05, "loss": 0.9493, "step": 100080 }, { "epoch": 0.6394464817346639, "grad_norm": 0.7978668212890625, "learning_rate": 7.684454672462723e-05, "loss": 1.034, "step": 100090 }, { "epoch": 0.6395103688844026, "grad_norm": 0.9934602379798889, "learning_rate": 7.684031341117186e-05, "loss": 1.1376, "step": 100100 }, { "epoch": 0.6395742560341413, "grad_norm": 1.048313856124878, "learning_rate": 7.683607982741132e-05, "loss": 0.9767, "step": 100110 }, { "epoch": 0.63963814318388, "grad_norm": 0.9221808910369873, "learning_rate": 7.683184597338826e-05, "loss": 0.836, "step": 100120 }, { "epoch": 0.6397020303336187, "grad_norm": 0.9617723822593689, "learning_rate": 7.682761184914528e-05, "loss": 0.6673, "step": 100130 }, { "epoch": 0.6397659174833574, "grad_norm": 1.2165039777755737, "learning_rate": 7.682337745472505e-05, "loss": 1.1207, "step": 100140 }, { "epoch": 0.6398298046330961, "grad_norm": 1.1467498540878296, "learning_rate": 7.681914279017019e-05, "loss": 0.8988, "step": 100150 }, { "epoch": 0.6398936917828348, "grad_norm": 1.0085322856903076, "learning_rate": 7.681490785552337e-05, "loss": 0.9632, "step": 100160 }, { "epoch": 0.6399575789325735, "grad_norm": 1.1508851051330566, "learning_rate": 7.681067265082721e-05, "loss": 0.885, "step": 100170 }, { "epoch": 0.6400214660823123, "grad_norm": 1.3940848112106323, "learning_rate": 7.680643717612441e-05, "loss": 0.9147, "step": 100180 }, { "epoch": 0.640085353232051, "grad_norm": 1.0096964836120605, "learning_rate": 7.680220143145757e-05, "loss": 0.9108, "step": 100190 }, { "epoch": 0.6401492403817896, "grad_norm": 0.6056742668151855, "learning_rate": 7.679796541686942e-05, "loss": 0.9895, "step": 100200 }, { "epoch": 0.6402131275315283, "grad_norm": 0.8732916116714478, "learning_rate": 7.679372913240252e-05, "loss": 0.8695, "step": 100210 }, { "epoch": 0.640277014681267, "grad_norm": 1.1940739154815674, "learning_rate": 7.678949257809962e-05, "loss": 0.851, "step": 100220 }, { "epoch": 0.6403409018310057, "grad_norm": 0.9139200448989868, "learning_rate": 7.678525575400335e-05, "loss": 0.8596, "step": 100230 }, { "epoch": 0.6404047889807444, "grad_norm": 0.871724009513855, "learning_rate": 7.678101866015638e-05, "loss": 1.1388, "step": 100240 }, { "epoch": 0.6404686761304831, "grad_norm": 1.4798542261123657, "learning_rate": 7.677678129660137e-05, "loss": 0.8845, "step": 100250 }, { "epoch": 0.6405325632802218, "grad_norm": 0.8862691521644592, "learning_rate": 7.677254366338103e-05, "loss": 0.8407, "step": 100260 }, { "epoch": 0.6405964504299605, "grad_norm": 1.0017880201339722, "learning_rate": 7.676830576053799e-05, "loss": 0.9927, "step": 100270 }, { "epoch": 0.6406603375796992, "grad_norm": 1.1630281209945679, "learning_rate": 7.676406758811497e-05, "loss": 1.0337, "step": 100280 }, { "epoch": 0.6407242247294379, "grad_norm": 0.8417305946350098, "learning_rate": 7.675982914615464e-05, "loss": 0.799, "step": 100290 }, { "epoch": 0.6407881118791766, "grad_norm": 0.5187862515449524, "learning_rate": 7.675559043469966e-05, "loss": 0.9353, "step": 100300 }, { "epoch": 0.6408519990289153, "grad_norm": 0.6978999376296997, "learning_rate": 7.675135145379276e-05, "loss": 0.9321, "step": 100310 }, { "epoch": 0.640915886178654, "grad_norm": 0.9272652268409729, "learning_rate": 7.674711220347659e-05, "loss": 0.9838, "step": 100320 }, { "epoch": 0.6409797733283927, "grad_norm": 0.8608036637306213, "learning_rate": 7.674287268379386e-05, "loss": 0.9587, "step": 100330 }, { "epoch": 0.6410436604781314, "grad_norm": 1.0041062831878662, "learning_rate": 7.673863289478727e-05, "loss": 1.0735, "step": 100340 }, { "epoch": 0.6411075476278701, "grad_norm": 0.7018103003501892, "learning_rate": 7.673439283649952e-05, "loss": 0.9516, "step": 100350 }, { "epoch": 0.6411714347776089, "grad_norm": 1.4162198305130005, "learning_rate": 7.673015250897331e-05, "loss": 1.0228, "step": 100360 }, { "epoch": 0.6412353219273476, "grad_norm": 0.7780821919441223, "learning_rate": 7.672591191225134e-05, "loss": 0.9125, "step": 100370 }, { "epoch": 0.6412992090770863, "grad_norm": 0.9130464792251587, "learning_rate": 7.67216710463763e-05, "loss": 0.779, "step": 100380 }, { "epoch": 0.641363096226825, "grad_norm": 1.32298743724823, "learning_rate": 7.671742991139093e-05, "loss": 0.7318, "step": 100390 }, { "epoch": 0.6414269833765637, "grad_norm": 0.8966230750083923, "learning_rate": 7.671318850733791e-05, "loss": 0.8519, "step": 100400 }, { "epoch": 0.6414908705263024, "grad_norm": 0.7686439752578735, "learning_rate": 7.670894683425997e-05, "loss": 0.796, "step": 100410 }, { "epoch": 0.6415547576760411, "grad_norm": 0.48994553089141846, "learning_rate": 7.670470489219986e-05, "loss": 0.8914, "step": 100420 }, { "epoch": 0.6416186448257798, "grad_norm": 0.9652213454246521, "learning_rate": 7.670046268120023e-05, "loss": 0.8382, "step": 100430 }, { "epoch": 0.6416825319755184, "grad_norm": 0.8491564989089966, "learning_rate": 7.669622020130387e-05, "loss": 0.6973, "step": 100440 }, { "epoch": 0.6417464191252571, "grad_norm": 0.8289761543273926, "learning_rate": 7.669197745255348e-05, "loss": 1.0381, "step": 100450 }, { "epoch": 0.6418103062749958, "grad_norm": 1.043124794960022, "learning_rate": 7.668773443499176e-05, "loss": 0.9624, "step": 100460 }, { "epoch": 0.6418741934247345, "grad_norm": 0.8697907328605652, "learning_rate": 7.668349114866149e-05, "loss": 0.8019, "step": 100470 }, { "epoch": 0.6419380805744732, "grad_norm": 2.3505332469940186, "learning_rate": 7.667924759360537e-05, "loss": 0.7879, "step": 100480 }, { "epoch": 0.6420019677242119, "grad_norm": 0.638027548789978, "learning_rate": 7.667500376986614e-05, "loss": 0.8642, "step": 100490 }, { "epoch": 0.6420658548739506, "grad_norm": 0.7815401554107666, "learning_rate": 7.667075967748655e-05, "loss": 0.9564, "step": 100500 }, { "epoch": 0.6421297420236893, "grad_norm": 0.5453735589981079, "learning_rate": 7.666651531650934e-05, "loss": 0.9481, "step": 100510 }, { "epoch": 0.642193629173428, "grad_norm": 0.6826755404472351, "learning_rate": 7.666227068697722e-05, "loss": 0.9086, "step": 100520 }, { "epoch": 0.6422575163231667, "grad_norm": 1.5171852111816406, "learning_rate": 7.665802578893301e-05, "loss": 0.993, "step": 100530 }, { "epoch": 0.6423214034729055, "grad_norm": 1.0722650289535522, "learning_rate": 7.665378062241939e-05, "loss": 1.005, "step": 100540 }, { "epoch": 0.6423852906226442, "grad_norm": 0.9586762189865112, "learning_rate": 7.664953518747916e-05, "loss": 0.8817, "step": 100550 }, { "epoch": 0.6424491777723829, "grad_norm": 1.246511459350586, "learning_rate": 7.664528948415505e-05, "loss": 0.7576, "step": 100560 }, { "epoch": 0.6425130649221216, "grad_norm": 0.9459572434425354, "learning_rate": 7.664104351248982e-05, "loss": 1.0113, "step": 100570 }, { "epoch": 0.6425769520718603, "grad_norm": 0.9673700332641602, "learning_rate": 7.663679727252624e-05, "loss": 1.2202, "step": 100580 }, { "epoch": 0.642640839221599, "grad_norm": 1.0018703937530518, "learning_rate": 7.663255076430707e-05, "loss": 0.8106, "step": 100590 }, { "epoch": 0.6427047263713377, "grad_norm": 2.040289878845215, "learning_rate": 7.662830398787506e-05, "loss": 0.8261, "step": 100600 }, { "epoch": 0.6427686135210764, "grad_norm": 1.0974010229110718, "learning_rate": 7.662405694327302e-05, "loss": 0.9523, "step": 100610 }, { "epoch": 0.6428325006708151, "grad_norm": 0.9285750389099121, "learning_rate": 7.661980963054366e-05, "loss": 0.7965, "step": 100620 }, { "epoch": 0.6428963878205538, "grad_norm": 0.529984176158905, "learning_rate": 7.66155620497298e-05, "loss": 0.6934, "step": 100630 }, { "epoch": 0.6429602749702925, "grad_norm": 0.9809777736663818, "learning_rate": 7.661131420087421e-05, "loss": 1.0064, "step": 100640 }, { "epoch": 0.6430241621200312, "grad_norm": 0.7377033829689026, "learning_rate": 7.660706608401965e-05, "loss": 0.9637, "step": 100650 }, { "epoch": 0.6430880492697699, "grad_norm": 0.7346864342689514, "learning_rate": 7.660281769920893e-05, "loss": 0.8089, "step": 100660 }, { "epoch": 0.6431519364195086, "grad_norm": 0.6735924482345581, "learning_rate": 7.659856904648482e-05, "loss": 0.7855, "step": 100670 }, { "epoch": 0.6432158235692473, "grad_norm": 1.3427221775054932, "learning_rate": 7.659432012589009e-05, "loss": 0.7815, "step": 100680 }, { "epoch": 0.6432797107189859, "grad_norm": 0.7119907736778259, "learning_rate": 7.659007093746757e-05, "loss": 0.8669, "step": 100690 }, { "epoch": 0.6433435978687246, "grad_norm": 1.4726430177688599, "learning_rate": 7.658582148126001e-05, "loss": 0.9779, "step": 100700 }, { "epoch": 0.6434074850184633, "grad_norm": 0.721088171005249, "learning_rate": 7.658157175731024e-05, "loss": 0.7114, "step": 100710 }, { "epoch": 0.643471372168202, "grad_norm": 0.8141944408416748, "learning_rate": 7.657732176566105e-05, "loss": 1.0834, "step": 100720 }, { "epoch": 0.6435352593179408, "grad_norm": 1.5644798278808594, "learning_rate": 7.657307150635524e-05, "loss": 1.3679, "step": 100730 }, { "epoch": 0.6435991464676795, "grad_norm": 1.1040544509887695, "learning_rate": 7.65688209794356e-05, "loss": 0.913, "step": 100740 }, { "epoch": 0.6436630336174182, "grad_norm": 0.8945556282997131, "learning_rate": 7.656457018494496e-05, "loss": 0.9663, "step": 100750 }, { "epoch": 0.6437269207671569, "grad_norm": 0.8544933199882507, "learning_rate": 7.656031912292612e-05, "loss": 0.7184, "step": 100760 }, { "epoch": 0.6437908079168956, "grad_norm": 0.5265579223632812, "learning_rate": 7.655606779342188e-05, "loss": 0.9399, "step": 100770 }, { "epoch": 0.6438546950666343, "grad_norm": 0.9724834561347961, "learning_rate": 7.655181619647505e-05, "loss": 0.919, "step": 100780 }, { "epoch": 0.643918582216373, "grad_norm": 0.756826639175415, "learning_rate": 7.654756433212848e-05, "loss": 0.7773, "step": 100790 }, { "epoch": 0.6439824693661117, "grad_norm": 0.599709689617157, "learning_rate": 7.654331220042497e-05, "loss": 0.8449, "step": 100800 }, { "epoch": 0.6440463565158504, "grad_norm": 0.8727756142616272, "learning_rate": 7.653905980140734e-05, "loss": 0.8233, "step": 100810 }, { "epoch": 0.6441102436655891, "grad_norm": 1.616363286972046, "learning_rate": 7.653480713511841e-05, "loss": 0.8221, "step": 100820 }, { "epoch": 0.6441741308153278, "grad_norm": 0.8817083239555359, "learning_rate": 7.653055420160102e-05, "loss": 0.8405, "step": 100830 }, { "epoch": 0.6442380179650665, "grad_norm": 1.3261228799819946, "learning_rate": 7.6526301000898e-05, "loss": 0.9731, "step": 100840 }, { "epoch": 0.6443019051148052, "grad_norm": 0.7460963726043701, "learning_rate": 7.652204753305217e-05, "loss": 0.8105, "step": 100850 }, { "epoch": 0.6443657922645439, "grad_norm": 1.7109055519104004, "learning_rate": 7.651779379810639e-05, "loss": 1.1521, "step": 100860 }, { "epoch": 0.6444296794142826, "grad_norm": 1.031250238418579, "learning_rate": 7.651353979610348e-05, "loss": 0.9203, "step": 100870 }, { "epoch": 0.6444935665640213, "grad_norm": 1.0401231050491333, "learning_rate": 7.650928552708628e-05, "loss": 0.7879, "step": 100880 }, { "epoch": 0.64455745371376, "grad_norm": 0.9327844381332397, "learning_rate": 7.650503099109765e-05, "loss": 0.9528, "step": 100890 }, { "epoch": 0.6446213408634988, "grad_norm": 0.803861677646637, "learning_rate": 7.650077618818044e-05, "loss": 0.7373, "step": 100900 }, { "epoch": 0.6446852280132375, "grad_norm": 1.1715584993362427, "learning_rate": 7.649652111837746e-05, "loss": 0.963, "step": 100910 }, { "epoch": 0.6447491151629762, "grad_norm": 0.7066060900688171, "learning_rate": 7.649226578173161e-05, "loss": 0.8937, "step": 100920 }, { "epoch": 0.6448130023127148, "grad_norm": 0.9723853468894958, "learning_rate": 7.648801017828571e-05, "loss": 0.9408, "step": 100930 }, { "epoch": 0.6448768894624535, "grad_norm": 0.7268878817558289, "learning_rate": 7.648375430808264e-05, "loss": 0.9303, "step": 100940 }, { "epoch": 0.6449407766121922, "grad_norm": 0.8822718262672424, "learning_rate": 7.647949817116525e-05, "loss": 1.1401, "step": 100950 }, { "epoch": 0.6450046637619309, "grad_norm": 0.9792453646659851, "learning_rate": 7.64752417675764e-05, "loss": 0.9359, "step": 100960 }, { "epoch": 0.6450685509116696, "grad_norm": 0.5390404462814331, "learning_rate": 7.647098509735897e-05, "loss": 0.8654, "step": 100970 }, { "epoch": 0.6451324380614083, "grad_norm": 0.7712870836257935, "learning_rate": 7.646672816055583e-05, "loss": 0.8716, "step": 100980 }, { "epoch": 0.645196325211147, "grad_norm": 0.711517333984375, "learning_rate": 7.646247095720982e-05, "loss": 0.978, "step": 100990 }, { "epoch": 0.6452602123608857, "grad_norm": 0.6369432210922241, "learning_rate": 7.645821348736383e-05, "loss": 0.8599, "step": 101000 }, { "epoch": 0.6453240995106244, "grad_norm": 1.0040960311889648, "learning_rate": 7.645395575106075e-05, "loss": 0.8098, "step": 101010 }, { "epoch": 0.6453879866603631, "grad_norm": 0.9577940106391907, "learning_rate": 7.644969774834348e-05, "loss": 0.6533, "step": 101020 }, { "epoch": 0.6454518738101018, "grad_norm": 0.8677441477775574, "learning_rate": 7.644543947925483e-05, "loss": 0.8457, "step": 101030 }, { "epoch": 0.6455157609598405, "grad_norm": 0.7165183424949646, "learning_rate": 7.644118094383774e-05, "loss": 0.8876, "step": 101040 }, { "epoch": 0.6455796481095792, "grad_norm": 1.3950011730194092, "learning_rate": 7.643692214213507e-05, "loss": 1.1486, "step": 101050 }, { "epoch": 0.645643535259318, "grad_norm": 0.824797511100769, "learning_rate": 7.643266307418974e-05, "loss": 0.8211, "step": 101060 }, { "epoch": 0.6457074224090567, "grad_norm": 0.7592344284057617, "learning_rate": 7.642840374004463e-05, "loss": 1.0379, "step": 101070 }, { "epoch": 0.6457713095587954, "grad_norm": 0.9701903462409973, "learning_rate": 7.642414413974262e-05, "loss": 0.6966, "step": 101080 }, { "epoch": 0.6458351967085341, "grad_norm": 0.8895474672317505, "learning_rate": 7.641988427332663e-05, "loss": 1.1851, "step": 101090 }, { "epoch": 0.6458990838582728, "grad_norm": 0.7744872570037842, "learning_rate": 7.641562414083952e-05, "loss": 0.8782, "step": 101100 }, { "epoch": 0.6459629710080115, "grad_norm": 1.7571711540222168, "learning_rate": 7.641136374232425e-05, "loss": 0.9443, "step": 101110 }, { "epoch": 0.6460268581577502, "grad_norm": 0.6616213917732239, "learning_rate": 7.640710307782368e-05, "loss": 0.7928, "step": 101120 }, { "epoch": 0.6460907453074889, "grad_norm": 0.7461645603179932, "learning_rate": 7.640284214738075e-05, "loss": 1.0164, "step": 101130 }, { "epoch": 0.6461546324572276, "grad_norm": 0.9420298933982849, "learning_rate": 7.639858095103836e-05, "loss": 0.7869, "step": 101140 }, { "epoch": 0.6462185196069663, "grad_norm": 1.6276788711547852, "learning_rate": 7.639431948883941e-05, "loss": 0.6882, "step": 101150 }, { "epoch": 0.646282406756705, "grad_norm": 1.1304795742034912, "learning_rate": 7.639005776082683e-05, "loss": 0.892, "step": 101160 }, { "epoch": 0.6463462939064436, "grad_norm": 0.9252867102622986, "learning_rate": 7.638579576704355e-05, "loss": 0.8588, "step": 101170 }, { "epoch": 0.6464101810561823, "grad_norm": 1.1409999132156372, "learning_rate": 7.638153350753246e-05, "loss": 0.927, "step": 101180 }, { "epoch": 0.646474068205921, "grad_norm": 1.0091885328292847, "learning_rate": 7.637727098233651e-05, "loss": 0.9428, "step": 101190 }, { "epoch": 0.6465379553556597, "grad_norm": 0.8046776652336121, "learning_rate": 7.637300819149862e-05, "loss": 1.0972, "step": 101200 }, { "epoch": 0.6466018425053984, "grad_norm": 1.0795817375183105, "learning_rate": 7.636874513506174e-05, "loss": 1.1528, "step": 101210 }, { "epoch": 0.6466657296551371, "grad_norm": 1.0064719915390015, "learning_rate": 7.636448181306876e-05, "loss": 0.876, "step": 101220 }, { "epoch": 0.6467296168048758, "grad_norm": 0.786211371421814, "learning_rate": 7.636021822556266e-05, "loss": 0.9598, "step": 101230 }, { "epoch": 0.6467935039546145, "grad_norm": 1.0055882930755615, "learning_rate": 7.635595437258634e-05, "loss": 0.8143, "step": 101240 }, { "epoch": 0.6468573911043533, "grad_norm": 0.8458549380302429, "learning_rate": 7.635169025418278e-05, "loss": 0.89, "step": 101250 }, { "epoch": 0.646921278254092, "grad_norm": 0.9229540228843689, "learning_rate": 7.634742587039489e-05, "loss": 0.6728, "step": 101260 }, { "epoch": 0.6469851654038307, "grad_norm": 0.8958204388618469, "learning_rate": 7.634316122126562e-05, "loss": 0.7874, "step": 101270 }, { "epoch": 0.6470490525535694, "grad_norm": 0.6617315411567688, "learning_rate": 7.633889630683794e-05, "loss": 0.8585, "step": 101280 }, { "epoch": 0.6471129397033081, "grad_norm": 0.5204321146011353, "learning_rate": 7.633463112715477e-05, "loss": 0.8739, "step": 101290 }, { "epoch": 0.6471768268530468, "grad_norm": 0.7551961541175842, "learning_rate": 7.633036568225911e-05, "loss": 0.7589, "step": 101300 }, { "epoch": 0.6472407140027855, "grad_norm": 1.2205754518508911, "learning_rate": 7.632609997219388e-05, "loss": 1.1764, "step": 101310 }, { "epoch": 0.6473046011525242, "grad_norm": 0.5850151181221008, "learning_rate": 7.632183399700204e-05, "loss": 0.7911, "step": 101320 }, { "epoch": 0.6473684883022629, "grad_norm": 0.8024903535842896, "learning_rate": 7.631756775672656e-05, "loss": 0.9156, "step": 101330 }, { "epoch": 0.6474323754520016, "grad_norm": 1.3309029340744019, "learning_rate": 7.63133012514104e-05, "loss": 0.8624, "step": 101340 }, { "epoch": 0.6474962626017403, "grad_norm": 1.0072933435440063, "learning_rate": 7.630903448109654e-05, "loss": 0.7941, "step": 101350 }, { "epoch": 0.647560149751479, "grad_norm": 0.6163814663887024, "learning_rate": 7.630476744582794e-05, "loss": 1.1017, "step": 101360 }, { "epoch": 0.6476240369012177, "grad_norm": 1.1491094827651978, "learning_rate": 7.630050014564755e-05, "loss": 0.7671, "step": 101370 }, { "epoch": 0.6476879240509564, "grad_norm": 0.9501873254776001, "learning_rate": 7.62962325805984e-05, "loss": 0.8288, "step": 101380 }, { "epoch": 0.6477518112006951, "grad_norm": 1.3051087856292725, "learning_rate": 7.629196475072345e-05, "loss": 0.7035, "step": 101390 }, { "epoch": 0.6478156983504338, "grad_norm": 0.8004158139228821, "learning_rate": 7.628769665606564e-05, "loss": 0.8295, "step": 101400 }, { "epoch": 0.6478795855001726, "grad_norm": 0.8266284465789795, "learning_rate": 7.628342829666799e-05, "loss": 0.9296, "step": 101410 }, { "epoch": 0.6479434726499111, "grad_norm": 0.7903985977172852, "learning_rate": 7.627915967257348e-05, "loss": 0.8982, "step": 101420 }, { "epoch": 0.6480073597996499, "grad_norm": 0.9672759771347046, "learning_rate": 7.62748907838251e-05, "loss": 0.9396, "step": 101430 }, { "epoch": 0.6480712469493886, "grad_norm": 0.5555049777030945, "learning_rate": 7.627062163046585e-05, "loss": 0.787, "step": 101440 }, { "epoch": 0.6481351340991273, "grad_norm": 1.0060865879058838, "learning_rate": 7.626635221253871e-05, "loss": 1.3664, "step": 101450 }, { "epoch": 0.648199021248866, "grad_norm": 0.8967821002006531, "learning_rate": 7.626208253008667e-05, "loss": 0.6339, "step": 101460 }, { "epoch": 0.6482629083986047, "grad_norm": 0.9876995086669922, "learning_rate": 7.625781258315273e-05, "loss": 0.8212, "step": 101470 }, { "epoch": 0.6483267955483434, "grad_norm": 0.8965706825256348, "learning_rate": 7.625354237177991e-05, "loss": 0.9081, "step": 101480 }, { "epoch": 0.6483906826980821, "grad_norm": 0.7027506232261658, "learning_rate": 7.624927189601121e-05, "loss": 0.7717, "step": 101490 }, { "epoch": 0.6484545698478208, "grad_norm": 1.207170844078064, "learning_rate": 7.624500115588963e-05, "loss": 1.0589, "step": 101500 }, { "epoch": 0.6485184569975595, "grad_norm": 0.6559182405471802, "learning_rate": 7.624073015145819e-05, "loss": 0.9365, "step": 101510 }, { "epoch": 0.6485823441472982, "grad_norm": 0.8271816372871399, "learning_rate": 7.623645888275988e-05, "loss": 1.0675, "step": 101520 }, { "epoch": 0.6486462312970369, "grad_norm": 0.9976779818534851, "learning_rate": 7.623218734983775e-05, "loss": 0.9971, "step": 101530 }, { "epoch": 0.6487101184467756, "grad_norm": 1.128108263015747, "learning_rate": 7.622791555273478e-05, "loss": 0.7268, "step": 101540 }, { "epoch": 0.6487740055965143, "grad_norm": 1.8108998537063599, "learning_rate": 7.622364349149402e-05, "loss": 0.9972, "step": 101550 }, { "epoch": 0.648837892746253, "grad_norm": 1.99355149269104, "learning_rate": 7.621937116615849e-05, "loss": 0.844, "step": 101560 }, { "epoch": 0.6489017798959917, "grad_norm": 0.9173657894134521, "learning_rate": 7.62150985767712e-05, "loss": 0.7644, "step": 101570 }, { "epoch": 0.6489656670457304, "grad_norm": 0.8146788477897644, "learning_rate": 7.62108257233752e-05, "loss": 0.8551, "step": 101580 }, { "epoch": 0.6490295541954691, "grad_norm": 1.06039297580719, "learning_rate": 7.62065526060135e-05, "loss": 1.0242, "step": 101590 }, { "epoch": 0.6490934413452079, "grad_norm": 0.5522758364677429, "learning_rate": 7.620227922472914e-05, "loss": 1.0448, "step": 101600 }, { "epoch": 0.6491573284949466, "grad_norm": 1.0754237174987793, "learning_rate": 7.619800557956516e-05, "loss": 0.8727, "step": 101610 }, { "epoch": 0.6492212156446853, "grad_norm": 0.5872507691383362, "learning_rate": 7.619373167056461e-05, "loss": 0.7673, "step": 101620 }, { "epoch": 0.649285102794424, "grad_norm": 1.68289053440094, "learning_rate": 7.618945749777051e-05, "loss": 1.2075, "step": 101630 }, { "epoch": 0.6493489899441627, "grad_norm": 0.673235297203064, "learning_rate": 7.618518306122593e-05, "loss": 0.8398, "step": 101640 }, { "epoch": 0.6494128770939014, "grad_norm": 0.8392667174339294, "learning_rate": 7.618090836097389e-05, "loss": 1.0282, "step": 101650 }, { "epoch": 0.64947676424364, "grad_norm": 0.834991991519928, "learning_rate": 7.617706090531277e-05, "loss": 0.8716, "step": 101660 }, { "epoch": 0.6495406513933787, "grad_norm": 1.0076117515563965, "learning_rate": 7.617278570413519e-05, "loss": 1.0367, "step": 101670 }, { "epoch": 0.6496045385431174, "grad_norm": 1.0270442962646484, "learning_rate": 7.616851023937501e-05, "loss": 0.9725, "step": 101680 }, { "epoch": 0.6496684256928561, "grad_norm": 0.7313565611839294, "learning_rate": 7.61642345110753e-05, "loss": 0.858, "step": 101690 }, { "epoch": 0.6497323128425948, "grad_norm": 0.8768913149833679, "learning_rate": 7.615995851927911e-05, "loss": 0.9933, "step": 101700 }, { "epoch": 0.6497961999923335, "grad_norm": 1.0162863731384277, "learning_rate": 7.615568226402951e-05, "loss": 0.8052, "step": 101710 }, { "epoch": 0.6498600871420722, "grad_norm": 0.6026840209960938, "learning_rate": 7.615140574536956e-05, "loss": 0.8807, "step": 101720 }, { "epoch": 0.6499239742918109, "grad_norm": 1.1195095777511597, "learning_rate": 7.614712896334233e-05, "loss": 0.9153, "step": 101730 }, { "epoch": 0.6499878614415496, "grad_norm": 1.266113042831421, "learning_rate": 7.614285191799088e-05, "loss": 1.144, "step": 101740 }, { "epoch": 0.6500517485912883, "grad_norm": 0.7634327411651611, "learning_rate": 7.613857460935831e-05, "loss": 0.7877, "step": 101750 }, { "epoch": 0.650115635741027, "grad_norm": 0.8808565139770508, "learning_rate": 7.613429703748768e-05, "loss": 0.8561, "step": 101760 }, { "epoch": 0.6501795228907657, "grad_norm": 1.3923038244247437, "learning_rate": 7.613001920242206e-05, "loss": 0.9596, "step": 101770 }, { "epoch": 0.6502434100405045, "grad_norm": 1.1155999898910522, "learning_rate": 7.612574110420454e-05, "loss": 0.7707, "step": 101780 }, { "epoch": 0.6503072971902432, "grad_norm": 0.7540896534919739, "learning_rate": 7.612146274287821e-05, "loss": 1.2077, "step": 101790 }, { "epoch": 0.6503711843399819, "grad_norm": 0.7972086071968079, "learning_rate": 7.611718411848617e-05, "loss": 0.9242, "step": 101800 }, { "epoch": 0.6504350714897206, "grad_norm": 0.8194320797920227, "learning_rate": 7.611290523107146e-05, "loss": 1.0412, "step": 101810 }, { "epoch": 0.6504989586394593, "grad_norm": 0.8786047101020813, "learning_rate": 7.610862608067721e-05, "loss": 0.901, "step": 101820 }, { "epoch": 0.650562845789198, "grad_norm": 0.7013679146766663, "learning_rate": 7.610434666734651e-05, "loss": 0.9909, "step": 101830 }, { "epoch": 0.6506267329389367, "grad_norm": 1.3710912466049194, "learning_rate": 7.610006699112248e-05, "loss": 1.1784, "step": 101840 }, { "epoch": 0.6506906200886754, "grad_norm": 0.9212914705276489, "learning_rate": 7.609578705204816e-05, "loss": 0.8956, "step": 101850 }, { "epoch": 0.6507545072384141, "grad_norm": 1.100433588027954, "learning_rate": 7.609150685016671e-05, "loss": 0.8263, "step": 101860 }, { "epoch": 0.6508183943881528, "grad_norm": 1.0776516199111938, "learning_rate": 7.60872263855212e-05, "loss": 0.9641, "step": 101870 }, { "epoch": 0.6508822815378915, "grad_norm": 0.9307558536529541, "learning_rate": 7.608294565815476e-05, "loss": 0.8491, "step": 101880 }, { "epoch": 0.6509461686876302, "grad_norm": 0.9816484451293945, "learning_rate": 7.60786646681105e-05, "loss": 0.5917, "step": 101890 }, { "epoch": 0.6510100558373688, "grad_norm": 0.8662287592887878, "learning_rate": 7.607438341543152e-05, "loss": 0.8599, "step": 101900 }, { "epoch": 0.6510739429871075, "grad_norm": 1.7769293785095215, "learning_rate": 7.607010190016093e-05, "loss": 0.8963, "step": 101910 }, { "epoch": 0.6511378301368462, "grad_norm": 0.9365469813346863, "learning_rate": 7.606582012234188e-05, "loss": 0.8052, "step": 101920 }, { "epoch": 0.6512017172865849, "grad_norm": 0.8617343902587891, "learning_rate": 7.606153808201746e-05, "loss": 0.9196, "step": 101930 }, { "epoch": 0.6512656044363236, "grad_norm": 0.689240038394928, "learning_rate": 7.605725577923081e-05, "loss": 0.9611, "step": 101940 }, { "epoch": 0.6513294915860623, "grad_norm": 0.7218610644340515, "learning_rate": 7.605297321402504e-05, "loss": 0.8778, "step": 101950 }, { "epoch": 0.651393378735801, "grad_norm": 1.1720585823059082, "learning_rate": 7.60486903864433e-05, "loss": 0.8404, "step": 101960 }, { "epoch": 0.6514572658855398, "grad_norm": 1.2702221870422363, "learning_rate": 7.60444072965287e-05, "loss": 1.1349, "step": 101970 }, { "epoch": 0.6515211530352785, "grad_norm": 0.8623278141021729, "learning_rate": 7.60401239443244e-05, "loss": 0.6295, "step": 101980 }, { "epoch": 0.6515850401850172, "grad_norm": 1.1232866048812866, "learning_rate": 7.603584032987353e-05, "loss": 0.9372, "step": 101990 }, { "epoch": 0.6516489273347559, "grad_norm": 1.010554552078247, "learning_rate": 7.603155645321921e-05, "loss": 0.9095, "step": 102000 }, { "epoch": 0.6517128144844946, "grad_norm": 0.9294137358665466, "learning_rate": 7.60272723144046e-05, "loss": 0.9728, "step": 102010 }, { "epoch": 0.6517767016342333, "grad_norm": 0.6776549816131592, "learning_rate": 7.602298791347284e-05, "loss": 0.9536, "step": 102020 }, { "epoch": 0.651840588783972, "grad_norm": 0.8278113007545471, "learning_rate": 7.601870325046707e-05, "loss": 0.691, "step": 102030 }, { "epoch": 0.6519044759337107, "grad_norm": 0.7509378790855408, "learning_rate": 7.601441832543046e-05, "loss": 0.7495, "step": 102040 }, { "epoch": 0.6519683630834494, "grad_norm": 0.6673555374145508, "learning_rate": 7.601013313840616e-05, "loss": 1.0675, "step": 102050 }, { "epoch": 0.6520322502331881, "grad_norm": 1.0808007717132568, "learning_rate": 7.600584768943731e-05, "loss": 0.8746, "step": 102060 }, { "epoch": 0.6520961373829268, "grad_norm": 0.9976995587348938, "learning_rate": 7.600156197856707e-05, "loss": 0.7481, "step": 102070 }, { "epoch": 0.6521600245326655, "grad_norm": 0.7592312693595886, "learning_rate": 7.599727600583861e-05, "loss": 0.7296, "step": 102080 }, { "epoch": 0.6522239116824042, "grad_norm": 0.7450394034385681, "learning_rate": 7.599298977129509e-05, "loss": 0.8024, "step": 102090 }, { "epoch": 0.6522877988321429, "grad_norm": 1.1607773303985596, "learning_rate": 7.598870327497967e-05, "loss": 1.0051, "step": 102100 }, { "epoch": 0.6523516859818816, "grad_norm": 1.8432011604309082, "learning_rate": 7.598441651693554e-05, "loss": 0.8561, "step": 102110 }, { "epoch": 0.6524155731316204, "grad_norm": 0.6616463661193848, "learning_rate": 7.598012949720585e-05, "loss": 0.981, "step": 102120 }, { "epoch": 0.6524794602813591, "grad_norm": 0.715186357498169, "learning_rate": 7.597584221583377e-05, "loss": 0.8713, "step": 102130 }, { "epoch": 0.6525433474310978, "grad_norm": 1.7039902210235596, "learning_rate": 7.597155467286249e-05, "loss": 0.8644, "step": 102140 }, { "epoch": 0.6526072345808364, "grad_norm": 0.7133430242538452, "learning_rate": 7.59672668683352e-05, "loss": 0.8738, "step": 102150 }, { "epoch": 0.6526711217305751, "grad_norm": 0.79267418384552, "learning_rate": 7.596297880229504e-05, "loss": 0.7017, "step": 102160 }, { "epoch": 0.6527350088803138, "grad_norm": 3.0087802410125732, "learning_rate": 7.595869047478524e-05, "loss": 0.8555, "step": 102170 }, { "epoch": 0.6527988960300525, "grad_norm": 1.167046070098877, "learning_rate": 7.595440188584897e-05, "loss": 0.9269, "step": 102180 }, { "epoch": 0.6528627831797912, "grad_norm": 0.7473157644271851, "learning_rate": 7.595011303552941e-05, "loss": 1.1813, "step": 102190 }, { "epoch": 0.6529266703295299, "grad_norm": 2.5482473373413086, "learning_rate": 7.594582392386977e-05, "loss": 0.9303, "step": 102200 }, { "epoch": 0.6529905574792686, "grad_norm": 0.5995567440986633, "learning_rate": 7.594153455091324e-05, "loss": 0.8351, "step": 102210 }, { "epoch": 0.6530544446290073, "grad_norm": 0.8906635046005249, "learning_rate": 7.593724491670302e-05, "loss": 1.0707, "step": 102220 }, { "epoch": 0.653118331778746, "grad_norm": 1.3267639875411987, "learning_rate": 7.593295502128229e-05, "loss": 0.9804, "step": 102230 }, { "epoch": 0.6531822189284847, "grad_norm": 0.9197192788124084, "learning_rate": 7.592866486469427e-05, "loss": 0.831, "step": 102240 }, { "epoch": 0.6532461060782234, "grad_norm": 0.9400060772895813, "learning_rate": 7.592437444698217e-05, "loss": 1.1257, "step": 102250 }, { "epoch": 0.6533099932279621, "grad_norm": 1.9750057458877563, "learning_rate": 7.592008376818918e-05, "loss": 1.1414, "step": 102260 }, { "epoch": 0.6533738803777008, "grad_norm": 0.8337990045547485, "learning_rate": 7.591579282835854e-05, "loss": 0.9497, "step": 102270 }, { "epoch": 0.6534377675274395, "grad_norm": 0.6679349541664124, "learning_rate": 7.591150162753343e-05, "loss": 0.859, "step": 102280 }, { "epoch": 0.6535016546771782, "grad_norm": 1.1458660364151, "learning_rate": 7.590721016575709e-05, "loss": 0.8031, "step": 102290 }, { "epoch": 0.653565541826917, "grad_norm": 1.3683353662490845, "learning_rate": 7.590291844307274e-05, "loss": 0.7009, "step": 102300 }, { "epoch": 0.6536294289766557, "grad_norm": 1.0884777307510376, "learning_rate": 7.589862645952358e-05, "loss": 1.1495, "step": 102310 }, { "epoch": 0.6536933161263944, "grad_norm": 2.0209991931915283, "learning_rate": 7.589433421515284e-05, "loss": 0.7371, "step": 102320 }, { "epoch": 0.6537572032761331, "grad_norm": 0.8924853801727295, "learning_rate": 7.589004171000376e-05, "loss": 0.8453, "step": 102330 }, { "epoch": 0.6538210904258718, "grad_norm": 0.9563447833061218, "learning_rate": 7.588574894411957e-05, "loss": 0.9245, "step": 102340 }, { "epoch": 0.6538849775756105, "grad_norm": 0.6629459857940674, "learning_rate": 7.588145591754348e-05, "loss": 0.8992, "step": 102350 }, { "epoch": 0.6539488647253492, "grad_norm": 0.7915505170822144, "learning_rate": 7.587716263031875e-05, "loss": 0.6657, "step": 102360 }, { "epoch": 0.6540127518750879, "grad_norm": 0.9542189240455627, "learning_rate": 7.587286908248859e-05, "loss": 0.8998, "step": 102370 }, { "epoch": 0.6540766390248266, "grad_norm": 0.6477545499801636, "learning_rate": 7.586857527409625e-05, "loss": 0.828, "step": 102380 }, { "epoch": 0.6541405261745652, "grad_norm": 3.1310455799102783, "learning_rate": 7.586428120518498e-05, "loss": 0.8608, "step": 102390 }, { "epoch": 0.6542044133243039, "grad_norm": 0.7806214094161987, "learning_rate": 7.585998687579805e-05, "loss": 0.7727, "step": 102400 }, { "epoch": 0.6542683004740426, "grad_norm": 0.8768726587295532, "learning_rate": 7.585569228597866e-05, "loss": 0.7605, "step": 102410 }, { "epoch": 0.6543321876237813, "grad_norm": 1.5629066228866577, "learning_rate": 7.585139743577007e-05, "loss": 0.7757, "step": 102420 }, { "epoch": 0.65439607477352, "grad_norm": 0.6218248009681702, "learning_rate": 7.584710232521558e-05, "loss": 0.805, "step": 102430 }, { "epoch": 0.6544599619232587, "grad_norm": 0.555633008480072, "learning_rate": 7.584280695435839e-05, "loss": 0.9534, "step": 102440 }, { "epoch": 0.6545238490729974, "grad_norm": 0.9295303821563721, "learning_rate": 7.583851132324176e-05, "loss": 0.9972, "step": 102450 }, { "epoch": 0.6545877362227361, "grad_norm": 0.4502405822277069, "learning_rate": 7.583421543190899e-05, "loss": 0.8702, "step": 102460 }, { "epoch": 0.6546516233724748, "grad_norm": 1.0970765352249146, "learning_rate": 7.58299192804033e-05, "loss": 0.6886, "step": 102470 }, { "epoch": 0.6547155105222136, "grad_norm": 0.967715859413147, "learning_rate": 7.5825622868768e-05, "loss": 0.769, "step": 102480 }, { "epoch": 0.6547793976719523, "grad_norm": 1.3791766166687012, "learning_rate": 7.582132619704632e-05, "loss": 1.0848, "step": 102490 }, { "epoch": 0.654843284821691, "grad_norm": 1.011976718902588, "learning_rate": 7.581702926528156e-05, "loss": 0.8708, "step": 102500 }, { "epoch": 0.6549071719714297, "grad_norm": 1.187624454498291, "learning_rate": 7.581273207351696e-05, "loss": 0.8925, "step": 102510 }, { "epoch": 0.6549710591211684, "grad_norm": 1.0658549070358276, "learning_rate": 7.580843462179583e-05, "loss": 0.6593, "step": 102520 }, { "epoch": 0.6550349462709071, "grad_norm": 0.7370697855949402, "learning_rate": 7.580413691016144e-05, "loss": 0.9271, "step": 102530 }, { "epoch": 0.6550988334206458, "grad_norm": 1.2200112342834473, "learning_rate": 7.579983893865704e-05, "loss": 0.9089, "step": 102540 }, { "epoch": 0.6551627205703845, "grad_norm": 0.8738793730735779, "learning_rate": 7.579554070732597e-05, "loss": 0.9293, "step": 102550 }, { "epoch": 0.6552266077201232, "grad_norm": 0.6876864433288574, "learning_rate": 7.579124221621148e-05, "loss": 0.7449, "step": 102560 }, { "epoch": 0.6552904948698619, "grad_norm": 0.8214115500450134, "learning_rate": 7.578694346535686e-05, "loss": 0.7657, "step": 102570 }, { "epoch": 0.6553543820196006, "grad_norm": 2.8567984104156494, "learning_rate": 7.578264445480543e-05, "loss": 0.8339, "step": 102580 }, { "epoch": 0.6554182691693393, "grad_norm": 1.8069883584976196, "learning_rate": 7.577834518460046e-05, "loss": 0.772, "step": 102590 }, { "epoch": 0.655482156319078, "grad_norm": 0.7831799387931824, "learning_rate": 7.577404565478525e-05, "loss": 1.1675, "step": 102600 }, { "epoch": 0.6555460434688167, "grad_norm": 1.1496902704238892, "learning_rate": 7.576974586540309e-05, "loss": 0.727, "step": 102610 }, { "epoch": 0.6556099306185554, "grad_norm": 0.9092468619346619, "learning_rate": 7.57654458164973e-05, "loss": 0.7713, "step": 102620 }, { "epoch": 0.655673817768294, "grad_norm": 1.3994219303131104, "learning_rate": 7.57611455081112e-05, "loss": 0.9057, "step": 102630 }, { "epoch": 0.6557377049180327, "grad_norm": 0.7715085744857788, "learning_rate": 7.575684494028805e-05, "loss": 0.9913, "step": 102640 }, { "epoch": 0.6558015920677714, "grad_norm": 1.0739322900772095, "learning_rate": 7.57525441130712e-05, "loss": 1.0197, "step": 102650 }, { "epoch": 0.6558654792175102, "grad_norm": 1.3112781047821045, "learning_rate": 7.574824302650396e-05, "loss": 0.819, "step": 102660 }, { "epoch": 0.6559293663672489, "grad_norm": 0.8328858017921448, "learning_rate": 7.574394168062964e-05, "loss": 1.1271, "step": 102670 }, { "epoch": 0.6559932535169876, "grad_norm": 1.3175311088562012, "learning_rate": 7.573964007549155e-05, "loss": 1.0228, "step": 102680 }, { "epoch": 0.6560571406667263, "grad_norm": 1.8402165174484253, "learning_rate": 7.573533821113302e-05, "loss": 0.6843, "step": 102690 }, { "epoch": 0.656121027816465, "grad_norm": 0.6495104432106018, "learning_rate": 7.573103608759736e-05, "loss": 0.8759, "step": 102700 }, { "epoch": 0.6561849149662037, "grad_norm": 1.0838744640350342, "learning_rate": 7.572673370492788e-05, "loss": 0.9824, "step": 102710 }, { "epoch": 0.6562488021159424, "grad_norm": 1.348099708557129, "learning_rate": 7.572243106316798e-05, "loss": 0.995, "step": 102720 }, { "epoch": 0.6563126892656811, "grad_norm": 1.7211371660232544, "learning_rate": 7.571812816236093e-05, "loss": 1.0677, "step": 102730 }, { "epoch": 0.6563765764154198, "grad_norm": 1.924634337425232, "learning_rate": 7.571382500255009e-05, "loss": 0.6923, "step": 102740 }, { "epoch": 0.6564404635651585, "grad_norm": 0.7509860992431641, "learning_rate": 7.570952158377877e-05, "loss": 1.3095, "step": 102750 }, { "epoch": 0.6565043507148972, "grad_norm": 2.123326301574707, "learning_rate": 7.570521790609033e-05, "loss": 1.0223, "step": 102760 }, { "epoch": 0.6565682378646359, "grad_norm": 0.9180009961128235, "learning_rate": 7.570091396952811e-05, "loss": 0.7883, "step": 102770 }, { "epoch": 0.6566321250143746, "grad_norm": 0.9530285596847534, "learning_rate": 7.569660977413546e-05, "loss": 0.6924, "step": 102780 }, { "epoch": 0.6566960121641133, "grad_norm": 0.9887816905975342, "learning_rate": 7.56923053199557e-05, "loss": 0.9007, "step": 102790 }, { "epoch": 0.656759899313852, "grad_norm": 0.853535532951355, "learning_rate": 7.568800060703222e-05, "loss": 0.9581, "step": 102800 }, { "epoch": 0.6568237864635907, "grad_norm": 0.716618001461029, "learning_rate": 7.568369563540834e-05, "loss": 1.1438, "step": 102810 }, { "epoch": 0.6568876736133294, "grad_norm": 0.9339463114738464, "learning_rate": 7.567939040512742e-05, "loss": 1.0451, "step": 102820 }, { "epoch": 0.6569515607630682, "grad_norm": 1.0368573665618896, "learning_rate": 7.567508491623283e-05, "loss": 0.6927, "step": 102830 }, { "epoch": 0.6570154479128069, "grad_norm": 0.9054602384567261, "learning_rate": 7.567077916876793e-05, "loss": 0.9451, "step": 102840 }, { "epoch": 0.6570793350625456, "grad_norm": 0.7542839646339417, "learning_rate": 7.566647316277607e-05, "loss": 0.6406, "step": 102850 }, { "epoch": 0.6571432222122843, "grad_norm": 0.7233191132545471, "learning_rate": 7.566216689830061e-05, "loss": 0.9122, "step": 102860 }, { "epoch": 0.6572071093620229, "grad_norm": 0.8938806056976318, "learning_rate": 7.565786037538492e-05, "loss": 0.9786, "step": 102870 }, { "epoch": 0.6572709965117616, "grad_norm": 0.8927178978919983, "learning_rate": 7.56535535940724e-05, "loss": 0.8482, "step": 102880 }, { "epoch": 0.6573348836615003, "grad_norm": 0.9564715623855591, "learning_rate": 7.564924655440639e-05, "loss": 0.824, "step": 102890 }, { "epoch": 0.657398770811239, "grad_norm": 1.4152295589447021, "learning_rate": 7.564493925643028e-05, "loss": 0.8507, "step": 102900 }, { "epoch": 0.6574626579609777, "grad_norm": 2.762669324874878, "learning_rate": 7.564063170018745e-05, "loss": 1.0035, "step": 102910 }, { "epoch": 0.6575265451107164, "grad_norm": 0.6634849905967712, "learning_rate": 7.563632388572128e-05, "loss": 1.004, "step": 102920 }, { "epoch": 0.6575904322604551, "grad_norm": 0.7901267409324646, "learning_rate": 7.563201581307516e-05, "loss": 1.004, "step": 102930 }, { "epoch": 0.6576543194101938, "grad_norm": 1.174972653388977, "learning_rate": 7.562770748229245e-05, "loss": 1.0687, "step": 102940 }, { "epoch": 0.6577182065599325, "grad_norm": 0.6787839531898499, "learning_rate": 7.562339889341655e-05, "loss": 0.6921, "step": 102950 }, { "epoch": 0.6577820937096712, "grad_norm": 1.1864277124404907, "learning_rate": 7.561909004649086e-05, "loss": 0.8603, "step": 102960 }, { "epoch": 0.6578459808594099, "grad_norm": 0.715882420539856, "learning_rate": 7.561478094155877e-05, "loss": 0.9814, "step": 102970 }, { "epoch": 0.6579098680091486, "grad_norm": 0.7654950618743896, "learning_rate": 7.561047157866368e-05, "loss": 0.7236, "step": 102980 }, { "epoch": 0.6579737551588873, "grad_norm": 0.8814225196838379, "learning_rate": 7.560616195784898e-05, "loss": 1.0984, "step": 102990 }, { "epoch": 0.658037642308626, "grad_norm": 0.6822389364242554, "learning_rate": 7.560185207915808e-05, "loss": 0.7056, "step": 103000 }, { "epoch": 0.6581015294583648, "grad_norm": 0.9107899069786072, "learning_rate": 7.559754194263438e-05, "loss": 1.0334, "step": 103010 }, { "epoch": 0.6581654166081035, "grad_norm": 1.1239162683486938, "learning_rate": 7.559323154832128e-05, "loss": 1.0588, "step": 103020 }, { "epoch": 0.6582293037578422, "grad_norm": 0.7621965408325195, "learning_rate": 7.558892089626222e-05, "loss": 1.3516, "step": 103030 }, { "epoch": 0.6582931909075809, "grad_norm": 0.9127793312072754, "learning_rate": 7.558460998650056e-05, "loss": 0.8635, "step": 103040 }, { "epoch": 0.6583570780573196, "grad_norm": 1.0723611116409302, "learning_rate": 7.558029881907977e-05, "loss": 0.8784, "step": 103050 }, { "epoch": 0.6584209652070583, "grad_norm": 1.1511666774749756, "learning_rate": 7.557598739404322e-05, "loss": 0.9645, "step": 103060 }, { "epoch": 0.658484852356797, "grad_norm": 0.8360834717750549, "learning_rate": 7.557167571143435e-05, "loss": 1.2718, "step": 103070 }, { "epoch": 0.6585487395065357, "grad_norm": 0.8610426187515259, "learning_rate": 7.556736377129659e-05, "loss": 0.6349, "step": 103080 }, { "epoch": 0.6586126266562744, "grad_norm": 2.5912959575653076, "learning_rate": 7.556305157367336e-05, "loss": 0.9122, "step": 103090 }, { "epoch": 0.6586765138060131, "grad_norm": 0.6567677855491638, "learning_rate": 7.555873911860808e-05, "loss": 0.7129, "step": 103100 }, { "epoch": 0.6587404009557518, "grad_norm": 2.0863733291625977, "learning_rate": 7.55544264061442e-05, "loss": 0.9628, "step": 103110 }, { "epoch": 0.6588042881054904, "grad_norm": 0.9286092519760132, "learning_rate": 7.555011343632512e-05, "loss": 0.8073, "step": 103120 }, { "epoch": 0.6588681752552291, "grad_norm": 0.8470326662063599, "learning_rate": 7.55458002091943e-05, "loss": 0.7094, "step": 103130 }, { "epoch": 0.6589320624049678, "grad_norm": 0.9523374438285828, "learning_rate": 7.554148672479518e-05, "loss": 0.898, "step": 103140 }, { "epoch": 0.6589959495547065, "grad_norm": 0.7024726271629333, "learning_rate": 7.553717298317118e-05, "loss": 0.8187, "step": 103150 }, { "epoch": 0.6590598367044452, "grad_norm": 0.8820728063583374, "learning_rate": 7.553285898436577e-05, "loss": 0.9663, "step": 103160 }, { "epoch": 0.6591237238541839, "grad_norm": 0.6763830184936523, "learning_rate": 7.552854472842238e-05, "loss": 0.7318, "step": 103170 }, { "epoch": 0.6591876110039226, "grad_norm": 0.881584644317627, "learning_rate": 7.552423021538445e-05, "loss": 1.0267, "step": 103180 }, { "epoch": 0.6592514981536614, "grad_norm": 1.0598586797714233, "learning_rate": 7.551991544529544e-05, "loss": 0.9321, "step": 103190 }, { "epoch": 0.6593153853034001, "grad_norm": 0.879578709602356, "learning_rate": 7.55156004181988e-05, "loss": 0.8715, "step": 103200 }, { "epoch": 0.6593792724531388, "grad_norm": 0.9016001224517822, "learning_rate": 7.5511285134138e-05, "loss": 0.8316, "step": 103210 }, { "epoch": 0.6594431596028775, "grad_norm": 0.7828614115715027, "learning_rate": 7.550696959315647e-05, "loss": 0.7837, "step": 103220 }, { "epoch": 0.6595070467526162, "grad_norm": 0.6147605180740356, "learning_rate": 7.550265379529771e-05, "loss": 0.6583, "step": 103230 }, { "epoch": 0.6595709339023549, "grad_norm": 0.5749229192733765, "learning_rate": 7.549833774060515e-05, "loss": 0.7125, "step": 103240 }, { "epoch": 0.6596348210520936, "grad_norm": 0.9546531438827515, "learning_rate": 7.549402142912228e-05, "loss": 0.9588, "step": 103250 }, { "epoch": 0.6596987082018323, "grad_norm": 0.8251008987426758, "learning_rate": 7.548970486089255e-05, "loss": 1.0117, "step": 103260 }, { "epoch": 0.659762595351571, "grad_norm": 1.1722609996795654, "learning_rate": 7.548538803595944e-05, "loss": 0.6943, "step": 103270 }, { "epoch": 0.6598264825013097, "grad_norm": 2.220587730407715, "learning_rate": 7.548107095436644e-05, "loss": 0.6568, "step": 103280 }, { "epoch": 0.6598903696510484, "grad_norm": 1.1250571012496948, "learning_rate": 7.547675361615701e-05, "loss": 0.6475, "step": 103290 }, { "epoch": 0.6599542568007871, "grad_norm": 0.6930386424064636, "learning_rate": 7.547243602137462e-05, "loss": 1.03, "step": 103300 }, { "epoch": 0.6600181439505258, "grad_norm": 0.7208458185195923, "learning_rate": 7.546811817006275e-05, "loss": 0.7767, "step": 103310 }, { "epoch": 0.6600820311002645, "grad_norm": 0.9552310705184937, "learning_rate": 7.546380006226493e-05, "loss": 1.2225, "step": 103320 }, { "epoch": 0.6601459182500032, "grad_norm": 0.8683717250823975, "learning_rate": 7.545948169802458e-05, "loss": 1.0714, "step": 103330 }, { "epoch": 0.660209805399742, "grad_norm": 1.0436851978302002, "learning_rate": 7.545516307738524e-05, "loss": 1.001, "step": 103340 }, { "epoch": 0.6602736925494807, "grad_norm": 1.0748889446258545, "learning_rate": 7.545084420039038e-05, "loss": 0.9228, "step": 103350 }, { "epoch": 0.6603375796992192, "grad_norm": 0.716839075088501, "learning_rate": 7.54465250670835e-05, "loss": 0.9845, "step": 103360 }, { "epoch": 0.660401466848958, "grad_norm": 1.4549845457077026, "learning_rate": 7.54422056775081e-05, "loss": 0.8982, "step": 103370 }, { "epoch": 0.6604653539986967, "grad_norm": 1.124532699584961, "learning_rate": 7.54378860317077e-05, "loss": 0.9131, "step": 103380 }, { "epoch": 0.6605292411484354, "grad_norm": 0.7380385994911194, "learning_rate": 7.543356612972575e-05, "loss": 0.9161, "step": 103390 }, { "epoch": 0.6605931282981741, "grad_norm": 1.4843467473983765, "learning_rate": 7.54292459716058e-05, "loss": 0.9268, "step": 103400 }, { "epoch": 0.6606570154479128, "grad_norm": 0.9203116297721863, "learning_rate": 7.542492555739135e-05, "loss": 0.861, "step": 103410 }, { "epoch": 0.6607209025976515, "grad_norm": 0.9751471877098083, "learning_rate": 7.54206048871259e-05, "loss": 0.7468, "step": 103420 }, { "epoch": 0.6607847897473902, "grad_norm": 1.2186683416366577, "learning_rate": 7.541628396085296e-05, "loss": 0.967, "step": 103430 }, { "epoch": 0.6608486768971289, "grad_norm": 1.1114938259124756, "learning_rate": 7.541196277861604e-05, "loss": 1.0421, "step": 103440 }, { "epoch": 0.6609125640468676, "grad_norm": 1.0457226037979126, "learning_rate": 7.540764134045869e-05, "loss": 0.8752, "step": 103450 }, { "epoch": 0.6609764511966063, "grad_norm": 1.4589784145355225, "learning_rate": 7.540331964642441e-05, "loss": 0.8713, "step": 103460 }, { "epoch": 0.661040338346345, "grad_norm": 1.1460403203964233, "learning_rate": 7.539899769655672e-05, "loss": 0.832, "step": 103470 }, { "epoch": 0.6611042254960837, "grad_norm": 0.9500607252120972, "learning_rate": 7.539467549089914e-05, "loss": 1.0559, "step": 103480 }, { "epoch": 0.6611681126458224, "grad_norm": 1.8726199865341187, "learning_rate": 7.539035302949523e-05, "loss": 0.7371, "step": 103490 }, { "epoch": 0.6612319997955611, "grad_norm": 1.0229368209838867, "learning_rate": 7.538603031238849e-05, "loss": 1.2995, "step": 103500 }, { "epoch": 0.6612958869452998, "grad_norm": 0.8887920379638672, "learning_rate": 7.538170733962245e-05, "loss": 1.0423, "step": 103510 }, { "epoch": 0.6613597740950385, "grad_norm": 1.2013378143310547, "learning_rate": 7.537738411124066e-05, "loss": 0.8154, "step": 103520 }, { "epoch": 0.6614236612447773, "grad_norm": 0.6866891980171204, "learning_rate": 7.537306062728669e-05, "loss": 0.9957, "step": 103530 }, { "epoch": 0.661487548394516, "grad_norm": 1.4273715019226074, "learning_rate": 7.536873688780402e-05, "loss": 0.9109, "step": 103540 }, { "epoch": 0.6615514355442547, "grad_norm": 0.7437546253204346, "learning_rate": 7.536441289283622e-05, "loss": 0.9392, "step": 103550 }, { "epoch": 0.6616153226939934, "grad_norm": 0.8337574005126953, "learning_rate": 7.536008864242685e-05, "loss": 1.0836, "step": 103560 }, { "epoch": 0.6616792098437321, "grad_norm": 0.6678511500358582, "learning_rate": 7.535576413661944e-05, "loss": 0.7588, "step": 103570 }, { "epoch": 0.6617430969934708, "grad_norm": 0.8168431520462036, "learning_rate": 7.535143937545757e-05, "loss": 0.7712, "step": 103580 }, { "epoch": 0.6618069841432095, "grad_norm": 0.7926838994026184, "learning_rate": 7.534711435898473e-05, "loss": 0.8549, "step": 103590 }, { "epoch": 0.6618708712929481, "grad_norm": 0.8065713047981262, "learning_rate": 7.534278908724455e-05, "loss": 0.9805, "step": 103600 }, { "epoch": 0.6619347584426868, "grad_norm": 1.288443684577942, "learning_rate": 7.533846356028056e-05, "loss": 0.9803, "step": 103610 }, { "epoch": 0.6619986455924255, "grad_norm": 0.8270924687385559, "learning_rate": 7.533413777813632e-05, "loss": 1.0178, "step": 103620 }, { "epoch": 0.6620625327421642, "grad_norm": 0.969517707824707, "learning_rate": 7.532981174085538e-05, "loss": 0.7647, "step": 103630 }, { "epoch": 0.6621264198919029, "grad_norm": 0.8974095582962036, "learning_rate": 7.532548544848134e-05, "loss": 0.92, "step": 103640 }, { "epoch": 0.6621903070416416, "grad_norm": 0.6502230763435364, "learning_rate": 7.532115890105776e-05, "loss": 0.8038, "step": 103650 }, { "epoch": 0.6622541941913803, "grad_norm": 2.234053134918213, "learning_rate": 7.531683209862818e-05, "loss": 0.6465, "step": 103660 }, { "epoch": 0.662318081341119, "grad_norm": 0.8067479729652405, "learning_rate": 7.531250504123622e-05, "loss": 0.8337, "step": 103670 }, { "epoch": 0.6623819684908577, "grad_norm": 1.0391908884048462, "learning_rate": 7.530817772892543e-05, "loss": 0.839, "step": 103680 }, { "epoch": 0.6624458556405964, "grad_norm": 0.6917629241943359, "learning_rate": 7.53038501617394e-05, "loss": 1.0411, "step": 103690 }, { "epoch": 0.6625097427903351, "grad_norm": 0.8440914750099182, "learning_rate": 7.529952233972169e-05, "loss": 0.886, "step": 103700 }, { "epoch": 0.6625736299400738, "grad_norm": 0.7776359915733337, "learning_rate": 7.529519426291591e-05, "loss": 0.8062, "step": 103710 }, { "epoch": 0.6626375170898126, "grad_norm": 0.7299126386642456, "learning_rate": 7.529086593136564e-05, "loss": 0.9034, "step": 103720 }, { "epoch": 0.6627014042395513, "grad_norm": 1.0849508047103882, "learning_rate": 7.528653734511447e-05, "loss": 0.8913, "step": 103730 }, { "epoch": 0.66276529138929, "grad_norm": 1.4763078689575195, "learning_rate": 7.5282208504206e-05, "loss": 1.0506, "step": 103740 }, { "epoch": 0.6628291785390287, "grad_norm": 1.22445547580719, "learning_rate": 7.52778794086838e-05, "loss": 0.7745, "step": 103750 }, { "epoch": 0.6628930656887674, "grad_norm": 0.757455587387085, "learning_rate": 7.52735500585915e-05, "loss": 0.7991, "step": 103760 }, { "epoch": 0.6629569528385061, "grad_norm": 0.643725574016571, "learning_rate": 7.526922045397269e-05, "loss": 0.8059, "step": 103770 }, { "epoch": 0.6630208399882448, "grad_norm": 0.8213297724723816, "learning_rate": 7.526489059487097e-05, "loss": 0.7859, "step": 103780 }, { "epoch": 0.6630847271379835, "grad_norm": 0.90571528673172, "learning_rate": 7.526056048132993e-05, "loss": 0.8258, "step": 103790 }, { "epoch": 0.6631486142877222, "grad_norm": 1.3528343439102173, "learning_rate": 7.52562301133932e-05, "loss": 0.8203, "step": 103800 }, { "epoch": 0.6632125014374609, "grad_norm": 0.9805328845977783, "learning_rate": 7.525189949110438e-05, "loss": 0.9493, "step": 103810 }, { "epoch": 0.6632763885871996, "grad_norm": 1.103614091873169, "learning_rate": 7.52475686145071e-05, "loss": 0.6765, "step": 103820 }, { "epoch": 0.6633402757369383, "grad_norm": 0.864163875579834, "learning_rate": 7.524323748364494e-05, "loss": 0.9175, "step": 103830 }, { "epoch": 0.663404162886677, "grad_norm": 0.5981049537658691, "learning_rate": 7.523890609856157e-05, "loss": 0.808, "step": 103840 }, { "epoch": 0.6634680500364156, "grad_norm": 0.5768615007400513, "learning_rate": 7.523457445930055e-05, "loss": 0.9691, "step": 103850 }, { "epoch": 0.6635319371861543, "grad_norm": 0.5739576816558838, "learning_rate": 7.523024256590556e-05, "loss": 0.852, "step": 103860 }, { "epoch": 0.663595824335893, "grad_norm": 0.8774191737174988, "learning_rate": 7.522591041842018e-05, "loss": 0.9165, "step": 103870 }, { "epoch": 0.6636597114856317, "grad_norm": 1.1826159954071045, "learning_rate": 7.522157801688807e-05, "loss": 0.648, "step": 103880 }, { "epoch": 0.6637235986353704, "grad_norm": 1.5209389925003052, "learning_rate": 7.521724536135287e-05, "loss": 0.8307, "step": 103890 }, { "epoch": 0.6637874857851092, "grad_norm": 0.7982348799705505, "learning_rate": 7.521291245185815e-05, "loss": 0.8145, "step": 103900 }, { "epoch": 0.6638513729348479, "grad_norm": 0.9979506731033325, "learning_rate": 7.52085792884476e-05, "loss": 0.9305, "step": 103910 }, { "epoch": 0.6639152600845866, "grad_norm": 0.9951682686805725, "learning_rate": 7.520424587116485e-05, "loss": 0.8077, "step": 103920 }, { "epoch": 0.6639791472343253, "grad_norm": 0.7749238610267639, "learning_rate": 7.519991220005355e-05, "loss": 0.7822, "step": 103930 }, { "epoch": 0.664043034384064, "grad_norm": 1.0329078435897827, "learning_rate": 7.519557827515733e-05, "loss": 0.8315, "step": 103940 }, { "epoch": 0.6641069215338027, "grad_norm": 1.0199581384658813, "learning_rate": 7.519124409651984e-05, "loss": 0.9086, "step": 103950 }, { "epoch": 0.6641708086835414, "grad_norm": 1.0277516841888428, "learning_rate": 7.518690966418474e-05, "loss": 1.0459, "step": 103960 }, { "epoch": 0.6642346958332801, "grad_norm": 1.1918634176254272, "learning_rate": 7.518257497819566e-05, "loss": 1.1006, "step": 103970 }, { "epoch": 0.6642985829830188, "grad_norm": 1.1144057512283325, "learning_rate": 7.517824003859624e-05, "loss": 0.8974, "step": 103980 }, { "epoch": 0.6643624701327575, "grad_norm": 0.5517343282699585, "learning_rate": 7.517390484543018e-05, "loss": 0.9436, "step": 103990 }, { "epoch": 0.6644263572824962, "grad_norm": 0.7781495451927185, "learning_rate": 7.516956939874113e-05, "loss": 0.9474, "step": 104000 }, { "epoch": 0.6644902444322349, "grad_norm": 0.9537546634674072, "learning_rate": 7.516523369857273e-05, "loss": 0.7145, "step": 104010 }, { "epoch": 0.6645541315819736, "grad_norm": 0.9538782238960266, "learning_rate": 7.516089774496866e-05, "loss": 0.7486, "step": 104020 }, { "epoch": 0.6646180187317123, "grad_norm": 0.8699349164962769, "learning_rate": 7.515656153797257e-05, "loss": 0.8378, "step": 104030 }, { "epoch": 0.664681905881451, "grad_norm": 0.7079137563705444, "learning_rate": 7.515222507762815e-05, "loss": 0.7564, "step": 104040 }, { "epoch": 0.6647457930311897, "grad_norm": 0.596549391746521, "learning_rate": 7.514788836397908e-05, "loss": 0.9477, "step": 104050 }, { "epoch": 0.6648096801809285, "grad_norm": 0.8176950812339783, "learning_rate": 7.5143551397069e-05, "loss": 0.883, "step": 104060 }, { "epoch": 0.6648735673306672, "grad_norm": 0.5366206765174866, "learning_rate": 7.51392141769416e-05, "loss": 0.7585, "step": 104070 }, { "epoch": 0.6649374544804059, "grad_norm": 2.3098976612091064, "learning_rate": 7.51348767036406e-05, "loss": 0.9656, "step": 104080 }, { "epoch": 0.6650013416301445, "grad_norm": 1.27628755569458, "learning_rate": 7.51305389772096e-05, "loss": 0.8482, "step": 104090 }, { "epoch": 0.6650652287798832, "grad_norm": 0.6717961430549622, "learning_rate": 7.512620099769235e-05, "loss": 0.94, "step": 104100 }, { "epoch": 0.6651291159296219, "grad_norm": 2.8479366302490234, "learning_rate": 7.512186276513252e-05, "loss": 0.9146, "step": 104110 }, { "epoch": 0.6651930030793606, "grad_norm": 1.3686326742172241, "learning_rate": 7.51175242795738e-05, "loss": 0.9382, "step": 104120 }, { "epoch": 0.6652568902290993, "grad_norm": 1.0768921375274658, "learning_rate": 7.511318554105988e-05, "loss": 0.8419, "step": 104130 }, { "epoch": 0.665320777378838, "grad_norm": 0.581240713596344, "learning_rate": 7.510884654963446e-05, "loss": 1.4625, "step": 104140 }, { "epoch": 0.6653846645285767, "grad_norm": 0.8124034404754639, "learning_rate": 7.510450730534123e-05, "loss": 0.8727, "step": 104150 }, { "epoch": 0.6654485516783154, "grad_norm": 0.9794655442237854, "learning_rate": 7.510016780822388e-05, "loss": 0.9003, "step": 104160 }, { "epoch": 0.6655124388280541, "grad_norm": 1.2169163227081299, "learning_rate": 7.509582805832614e-05, "loss": 0.9785, "step": 104170 }, { "epoch": 0.6655763259777928, "grad_norm": 1.4729397296905518, "learning_rate": 7.50914880556917e-05, "loss": 1.0235, "step": 104180 }, { "epoch": 0.6656402131275315, "grad_norm": 0.7866071462631226, "learning_rate": 7.508714780036428e-05, "loss": 0.7818, "step": 104190 }, { "epoch": 0.6657041002772702, "grad_norm": 1.0959784984588623, "learning_rate": 7.508280729238754e-05, "loss": 1.0379, "step": 104200 }, { "epoch": 0.6657679874270089, "grad_norm": 0.6036289930343628, "learning_rate": 7.507846653180527e-05, "loss": 0.7128, "step": 104210 }, { "epoch": 0.6658318745767476, "grad_norm": 1.0480402708053589, "learning_rate": 7.507412551866113e-05, "loss": 0.8218, "step": 104220 }, { "epoch": 0.6658957617264863, "grad_norm": 0.8238396048545837, "learning_rate": 7.506978425299886e-05, "loss": 1.0993, "step": 104230 }, { "epoch": 0.665959648876225, "grad_norm": 0.6929308176040649, "learning_rate": 7.506544273486216e-05, "loss": 0.7918, "step": 104240 }, { "epoch": 0.6660235360259638, "grad_norm": 0.9507032036781311, "learning_rate": 7.506110096429478e-05, "loss": 1.1289, "step": 104250 }, { "epoch": 0.6660874231757025, "grad_norm": 0.6241841316223145, "learning_rate": 7.505675894134042e-05, "loss": 0.6933, "step": 104260 }, { "epoch": 0.6661513103254412, "grad_norm": 1.4490808248519897, "learning_rate": 7.505241666604284e-05, "loss": 0.9477, "step": 104270 }, { "epoch": 0.6662151974751799, "grad_norm": 1.379927635192871, "learning_rate": 7.504807413844573e-05, "loss": 0.7406, "step": 104280 }, { "epoch": 0.6662790846249186, "grad_norm": 0.7105908393859863, "learning_rate": 7.504373135859283e-05, "loss": 0.981, "step": 104290 }, { "epoch": 0.6663429717746573, "grad_norm": 2.1415674686431885, "learning_rate": 7.503938832652793e-05, "loss": 0.804, "step": 104300 }, { "epoch": 0.666406858924396, "grad_norm": 1.058493733406067, "learning_rate": 7.50350450422947e-05, "loss": 0.8421, "step": 104310 }, { "epoch": 0.6664707460741347, "grad_norm": 0.8077415823936462, "learning_rate": 7.503070150593692e-05, "loss": 1.0033, "step": 104320 }, { "epoch": 0.6665346332238733, "grad_norm": 1.5287679433822632, "learning_rate": 7.502635771749832e-05, "loss": 0.9708, "step": 104330 }, { "epoch": 0.666598520373612, "grad_norm": 1.087836742401123, "learning_rate": 7.502201367702264e-05, "loss": 1.0815, "step": 104340 }, { "epoch": 0.6666624075233507, "grad_norm": 1.3868272304534912, "learning_rate": 7.501766938455365e-05, "loss": 0.8257, "step": 104350 }, { "epoch": 0.6667262946730894, "grad_norm": 0.9557937979698181, "learning_rate": 7.501332484013508e-05, "loss": 0.9096, "step": 104360 }, { "epoch": 0.6667901818228281, "grad_norm": 0.964483916759491, "learning_rate": 7.50089800438107e-05, "loss": 0.8804, "step": 104370 }, { "epoch": 0.6668540689725668, "grad_norm": 0.955265462398529, "learning_rate": 7.500463499562423e-05, "loss": 1.2936, "step": 104380 }, { "epoch": 0.6669179561223055, "grad_norm": 0.7339872717857361, "learning_rate": 7.500028969561947e-05, "loss": 0.6539, "step": 104390 }, { "epoch": 0.6669818432720442, "grad_norm": 0.724774181842804, "learning_rate": 7.499594414384015e-05, "loss": 1.0477, "step": 104400 }, { "epoch": 0.667045730421783, "grad_norm": 0.9447941184043884, "learning_rate": 7.499159834033006e-05, "loss": 0.9432, "step": 104410 }, { "epoch": 0.6671096175715217, "grad_norm": 1.0058971643447876, "learning_rate": 7.498725228513295e-05, "loss": 0.9858, "step": 104420 }, { "epoch": 0.6671735047212604, "grad_norm": 0.9895200133323669, "learning_rate": 7.49829059782926e-05, "loss": 0.9344, "step": 104430 }, { "epoch": 0.6672373918709991, "grad_norm": 0.8452537655830383, "learning_rate": 7.497855941985274e-05, "loss": 0.9845, "step": 104440 }, { "epoch": 0.6673012790207378, "grad_norm": 1.1709915399551392, "learning_rate": 7.497421260985721e-05, "loss": 0.6073, "step": 104450 }, { "epoch": 0.6673651661704765, "grad_norm": 0.6118887662887573, "learning_rate": 7.496986554834974e-05, "loss": 0.8184, "step": 104460 }, { "epoch": 0.6674290533202152, "grad_norm": 1.082446575164795, "learning_rate": 7.496551823537414e-05, "loss": 0.7765, "step": 104470 }, { "epoch": 0.6674929404699539, "grad_norm": 1.0023239850997925, "learning_rate": 7.496117067097416e-05, "loss": 0.9153, "step": 104480 }, { "epoch": 0.6675568276196926, "grad_norm": 0.8243518471717834, "learning_rate": 7.49568228551936e-05, "loss": 0.7647, "step": 104490 }, { "epoch": 0.6676207147694313, "grad_norm": 1.0458136796951294, "learning_rate": 7.495247478807624e-05, "loss": 0.7552, "step": 104500 }, { "epoch": 0.66768460191917, "grad_norm": 1.0639128684997559, "learning_rate": 7.494856131281384e-05, "loss": 1.1131, "step": 104510 }, { "epoch": 0.6677484890689087, "grad_norm": 0.9773111343383789, "learning_rate": 7.494421276827722e-05, "loss": 0.9946, "step": 104520 }, { "epoch": 0.6678123762186474, "grad_norm": 0.8806718587875366, "learning_rate": 7.493986397253079e-05, "loss": 0.79, "step": 104530 }, { "epoch": 0.6678762633683861, "grad_norm": 0.7560052275657654, "learning_rate": 7.493551492561835e-05, "loss": 0.8424, "step": 104540 }, { "epoch": 0.6679401505181248, "grad_norm": 0.7948547601699829, "learning_rate": 7.49311656275837e-05, "loss": 0.992, "step": 104550 }, { "epoch": 0.6680040376678635, "grad_norm": 0.8209701776504517, "learning_rate": 7.492681607847064e-05, "loss": 0.7234, "step": 104560 }, { "epoch": 0.6680679248176021, "grad_norm": 0.6525776386260986, "learning_rate": 7.492246627832297e-05, "loss": 0.9871, "step": 104570 }, { "epoch": 0.6681318119673408, "grad_norm": 0.7713031768798828, "learning_rate": 7.491811622718454e-05, "loss": 1.1684, "step": 104580 }, { "epoch": 0.6681956991170795, "grad_norm": 0.7066755890846252, "learning_rate": 7.49137659250991e-05, "loss": 1.0549, "step": 104590 }, { "epoch": 0.6682595862668183, "grad_norm": 0.9427279829978943, "learning_rate": 7.490941537211047e-05, "loss": 1.1706, "step": 104600 }, { "epoch": 0.668323473416557, "grad_norm": 1.0990161895751953, "learning_rate": 7.49050645682625e-05, "loss": 0.7285, "step": 104610 }, { "epoch": 0.6683873605662957, "grad_norm": 0.9260150790214539, "learning_rate": 7.490071351359896e-05, "loss": 1.0507, "step": 104620 }, { "epoch": 0.6684512477160344, "grad_norm": 0.7509433627128601, "learning_rate": 7.48963622081637e-05, "loss": 1.1747, "step": 104630 }, { "epoch": 0.6685151348657731, "grad_norm": 0.5178989768028259, "learning_rate": 7.489201065200055e-05, "loss": 0.7815, "step": 104640 }, { "epoch": 0.6685790220155118, "grad_norm": 0.6780941486358643, "learning_rate": 7.488765884515331e-05, "loss": 0.8624, "step": 104650 }, { "epoch": 0.6686429091652505, "grad_norm": 0.9320861101150513, "learning_rate": 7.488330678766581e-05, "loss": 0.7658, "step": 104660 }, { "epoch": 0.6687067963149892, "grad_norm": 1.1950159072875977, "learning_rate": 7.487895447958189e-05, "loss": 0.7474, "step": 104670 }, { "epoch": 0.6687706834647279, "grad_norm": 1.2028008699417114, "learning_rate": 7.487460192094538e-05, "loss": 1.0459, "step": 104680 }, { "epoch": 0.6688345706144666, "grad_norm": 0.6161251068115234, "learning_rate": 7.48702491118001e-05, "loss": 0.8281, "step": 104690 }, { "epoch": 0.6688984577642053, "grad_norm": 0.6632505059242249, "learning_rate": 7.48658960521899e-05, "loss": 0.927, "step": 104700 }, { "epoch": 0.668962344913944, "grad_norm": 1.3930466175079346, "learning_rate": 7.48615427421586e-05, "loss": 0.8673, "step": 104710 }, { "epoch": 0.6690262320636827, "grad_norm": 0.6448533535003662, "learning_rate": 7.485718918175006e-05, "loss": 1.0228, "step": 104720 }, { "epoch": 0.6690901192134214, "grad_norm": 0.997040331363678, "learning_rate": 7.485283537100813e-05, "loss": 0.952, "step": 104730 }, { "epoch": 0.6691540063631601, "grad_norm": 0.7598833441734314, "learning_rate": 7.484848130997664e-05, "loss": 0.7925, "step": 104740 }, { "epoch": 0.6692178935128988, "grad_norm": 1.102980375289917, "learning_rate": 7.484412699869946e-05, "loss": 0.8564, "step": 104750 }, { "epoch": 0.6692817806626375, "grad_norm": 0.7010207772254944, "learning_rate": 7.483977243722042e-05, "loss": 0.821, "step": 104760 }, { "epoch": 0.6693456678123763, "grad_norm": 0.6999570727348328, "learning_rate": 7.483541762558338e-05, "loss": 0.9697, "step": 104770 }, { "epoch": 0.669409554962115, "grad_norm": 1.1638367176055908, "learning_rate": 7.48310625638322e-05, "loss": 0.7476, "step": 104780 }, { "epoch": 0.6694734421118537, "grad_norm": 1.272621512413025, "learning_rate": 7.482670725201075e-05, "loss": 0.9064, "step": 104790 }, { "epoch": 0.6695373292615924, "grad_norm": 1.063870906829834, "learning_rate": 7.482235169016286e-05, "loss": 1.0145, "step": 104800 }, { "epoch": 0.6696012164113311, "grad_norm": 0.9829151630401611, "learning_rate": 7.481799587833241e-05, "loss": 0.7884, "step": 104810 }, { "epoch": 0.6696651035610697, "grad_norm": 0.8304445147514343, "learning_rate": 7.481363981656329e-05, "loss": 0.7588, "step": 104820 }, { "epoch": 0.6697289907108084, "grad_norm": 0.8410045504570007, "learning_rate": 7.480928350489935e-05, "loss": 0.8533, "step": 104830 }, { "epoch": 0.6697928778605471, "grad_norm": 0.8726821541786194, "learning_rate": 7.480492694338445e-05, "loss": 0.9978, "step": 104840 }, { "epoch": 0.6698567650102858, "grad_norm": 0.6928712129592896, "learning_rate": 7.480057013206248e-05, "loss": 0.5947, "step": 104850 }, { "epoch": 0.6699206521600245, "grad_norm": 0.9680970907211304, "learning_rate": 7.479621307097732e-05, "loss": 1.0667, "step": 104860 }, { "epoch": 0.6699845393097632, "grad_norm": 1.0033652782440186, "learning_rate": 7.479185576017283e-05, "loss": 0.8343, "step": 104870 }, { "epoch": 0.6700484264595019, "grad_norm": 0.9829990863800049, "learning_rate": 7.478749819969291e-05, "loss": 0.8276, "step": 104880 }, { "epoch": 0.6701123136092406, "grad_norm": 0.591493546962738, "learning_rate": 7.478314038958144e-05, "loss": 0.8754, "step": 104890 }, { "epoch": 0.6701762007589793, "grad_norm": 0.9688683748245239, "learning_rate": 7.477878232988231e-05, "loss": 0.7653, "step": 104900 }, { "epoch": 0.670240087908718, "grad_norm": 1.1329195499420166, "learning_rate": 7.47744240206394e-05, "loss": 1.1918, "step": 104910 }, { "epoch": 0.6703039750584567, "grad_norm": 0.9358948469161987, "learning_rate": 7.47700654618966e-05, "loss": 0.9408, "step": 104920 }, { "epoch": 0.6703678622081954, "grad_norm": 0.8225820064544678, "learning_rate": 7.476570665369782e-05, "loss": 0.7708, "step": 104930 }, { "epoch": 0.6704317493579341, "grad_norm": 0.5274181962013245, "learning_rate": 7.476134759608695e-05, "loss": 1.0094, "step": 104940 }, { "epoch": 0.6704956365076729, "grad_norm": 0.9145537614822388, "learning_rate": 7.475698828910789e-05, "loss": 1.132, "step": 104950 }, { "epoch": 0.6705595236574116, "grad_norm": 0.9951286315917969, "learning_rate": 7.475262873280453e-05, "loss": 0.7787, "step": 104960 }, { "epoch": 0.6706234108071503, "grad_norm": 0.9020349979400635, "learning_rate": 7.47482689272208e-05, "loss": 0.8919, "step": 104970 }, { "epoch": 0.670687297956889, "grad_norm": 1.0705788135528564, "learning_rate": 7.474390887240058e-05, "loss": 1.0581, "step": 104980 }, { "epoch": 0.6707511851066277, "grad_norm": 0.665863573551178, "learning_rate": 7.47395485683878e-05, "loss": 1.1165, "step": 104990 }, { "epoch": 0.6708150722563664, "grad_norm": 0.692719042301178, "learning_rate": 7.473518801522636e-05, "loss": 0.927, "step": 105000 }, { "epoch": 0.6708789594061051, "grad_norm": 1.2955323457717896, "learning_rate": 7.473082721296017e-05, "loss": 0.7546, "step": 105010 }, { "epoch": 0.6709428465558438, "grad_norm": 3.011267900466919, "learning_rate": 7.472646616163317e-05, "loss": 0.8958, "step": 105020 }, { "epoch": 0.6710067337055825, "grad_norm": 1.2101144790649414, "learning_rate": 7.472210486128926e-05, "loss": 0.8662, "step": 105030 }, { "epoch": 0.6710706208553212, "grad_norm": 1.1374763250350952, "learning_rate": 7.471774331197235e-05, "loss": 0.7575, "step": 105040 }, { "epoch": 0.6711345080050599, "grad_norm": 0.7340751886367798, "learning_rate": 7.47133815137264e-05, "loss": 0.9587, "step": 105050 }, { "epoch": 0.6711983951547985, "grad_norm": 0.9507089853286743, "learning_rate": 7.470901946659529e-05, "loss": 0.7413, "step": 105060 }, { "epoch": 0.6712622823045372, "grad_norm": 0.9281381368637085, "learning_rate": 7.470465717062301e-05, "loss": 0.8137, "step": 105070 }, { "epoch": 0.6713261694542759, "grad_norm": 1.0482163429260254, "learning_rate": 7.470029462585344e-05, "loss": 0.9596, "step": 105080 }, { "epoch": 0.6713900566040146, "grad_norm": 0.8228697180747986, "learning_rate": 7.469593183233055e-05, "loss": 0.9596, "step": 105090 }, { "epoch": 0.6714539437537533, "grad_norm": 0.9611666798591614, "learning_rate": 7.469156879009824e-05, "loss": 0.9952, "step": 105100 }, { "epoch": 0.671517830903492, "grad_norm": 0.9121928811073303, "learning_rate": 7.468720549920049e-05, "loss": 0.9997, "step": 105110 }, { "epoch": 0.6715817180532307, "grad_norm": 0.6626456379890442, "learning_rate": 7.468284195968122e-05, "loss": 1.3452, "step": 105120 }, { "epoch": 0.6716456052029695, "grad_norm": 1.1006265878677368, "learning_rate": 7.467847817158438e-05, "loss": 0.8195, "step": 105130 }, { "epoch": 0.6717094923527082, "grad_norm": 0.881610631942749, "learning_rate": 7.46741141349539e-05, "loss": 0.7741, "step": 105140 }, { "epoch": 0.6717733795024469, "grad_norm": 0.8922167420387268, "learning_rate": 7.466974984983374e-05, "loss": 0.6272, "step": 105150 }, { "epoch": 0.6718372666521856, "grad_norm": 0.9514210820198059, "learning_rate": 7.466538531626788e-05, "loss": 0.8509, "step": 105160 }, { "epoch": 0.6719011538019243, "grad_norm": 1.4604068994522095, "learning_rate": 7.466102053430023e-05, "loss": 1.0156, "step": 105170 }, { "epoch": 0.671965040951663, "grad_norm": 0.619574785232544, "learning_rate": 7.46566555039748e-05, "loss": 0.8557, "step": 105180 }, { "epoch": 0.6720289281014017, "grad_norm": 0.6358922123908997, "learning_rate": 7.46522902253355e-05, "loss": 0.7148, "step": 105190 }, { "epoch": 0.6720928152511404, "grad_norm": 1.179234266281128, "learning_rate": 7.46479246984263e-05, "loss": 1.074, "step": 105200 }, { "epoch": 0.6721567024008791, "grad_norm": 1.14448881149292, "learning_rate": 7.464355892329119e-05, "loss": 1.2771, "step": 105210 }, { "epoch": 0.6722205895506178, "grad_norm": 1.1141912937164307, "learning_rate": 7.463919289997413e-05, "loss": 1.0229, "step": 105220 }, { "epoch": 0.6722844767003565, "grad_norm": 1.5927108526229858, "learning_rate": 7.463482662851904e-05, "loss": 0.7145, "step": 105230 }, { "epoch": 0.6723483638500952, "grad_norm": 0.9684871435165405, "learning_rate": 7.463046010896996e-05, "loss": 0.862, "step": 105240 }, { "epoch": 0.6724122509998339, "grad_norm": 2.3723561763763428, "learning_rate": 7.462609334137085e-05, "loss": 0.8466, "step": 105250 }, { "epoch": 0.6724761381495726, "grad_norm": 1.4940024614334106, "learning_rate": 7.462172632576566e-05, "loss": 1.0536, "step": 105260 }, { "epoch": 0.6725400252993113, "grad_norm": 2.5333075523376465, "learning_rate": 7.46173590621984e-05, "loss": 0.8354, "step": 105270 }, { "epoch": 0.67260391244905, "grad_norm": 0.7402299046516418, "learning_rate": 7.461299155071302e-05, "loss": 0.8569, "step": 105280 }, { "epoch": 0.6726677995987888, "grad_norm": 1.2563143968582153, "learning_rate": 7.460862379135353e-05, "loss": 0.9467, "step": 105290 }, { "epoch": 0.6727316867485273, "grad_norm": 0.6169116497039795, "learning_rate": 7.460425578416392e-05, "loss": 0.8101, "step": 105300 }, { "epoch": 0.672795573898266, "grad_norm": 0.9723251461982727, "learning_rate": 7.459988752918815e-05, "loss": 0.836, "step": 105310 }, { "epoch": 0.6728594610480048, "grad_norm": 0.9852758049964905, "learning_rate": 7.459551902647023e-05, "loss": 0.7975, "step": 105320 }, { "epoch": 0.6729233481977435, "grad_norm": 0.8491624593734741, "learning_rate": 7.459115027605416e-05, "loss": 1.0277, "step": 105330 }, { "epoch": 0.6729872353474822, "grad_norm": 0.9614421725273132, "learning_rate": 7.458678127798394e-05, "loss": 0.6645, "step": 105340 }, { "epoch": 0.6730511224972209, "grad_norm": 0.9115906357765198, "learning_rate": 7.458241203230355e-05, "loss": 0.8675, "step": 105350 }, { "epoch": 0.6731150096469596, "grad_norm": 0.9010282158851624, "learning_rate": 7.457804253905701e-05, "loss": 0.8478, "step": 105360 }, { "epoch": 0.6731788967966983, "grad_norm": 1.2737298011779785, "learning_rate": 7.457367279828833e-05, "loss": 0.7011, "step": 105370 }, { "epoch": 0.673242783946437, "grad_norm": 1.0771639347076416, "learning_rate": 7.456930281004148e-05, "loss": 1.0038, "step": 105380 }, { "epoch": 0.6733066710961757, "grad_norm": 0.9873582124710083, "learning_rate": 7.456493257436052e-05, "loss": 0.858, "step": 105390 }, { "epoch": 0.6733705582459144, "grad_norm": 0.7805922031402588, "learning_rate": 7.456056209128942e-05, "loss": 0.9136, "step": 105400 }, { "epoch": 0.6734344453956531, "grad_norm": 0.9018038511276245, "learning_rate": 7.455619136087221e-05, "loss": 1.0227, "step": 105410 }, { "epoch": 0.6734983325453918, "grad_norm": 0.6552641987800598, "learning_rate": 7.455182038315294e-05, "loss": 1.0684, "step": 105420 }, { "epoch": 0.6735622196951305, "grad_norm": 1.088218331336975, "learning_rate": 7.454744915817557e-05, "loss": 0.833, "step": 105430 }, { "epoch": 0.6736261068448692, "grad_norm": 1.2808659076690674, "learning_rate": 7.454307768598416e-05, "loss": 0.6516, "step": 105440 }, { "epoch": 0.6736899939946079, "grad_norm": 0.9723607301712036, "learning_rate": 7.453870596662271e-05, "loss": 1.0049, "step": 105450 }, { "epoch": 0.6737538811443466, "grad_norm": 0.6379223465919495, "learning_rate": 7.453433400013528e-05, "loss": 0.9626, "step": 105460 }, { "epoch": 0.6738177682940854, "grad_norm": 0.5692765712738037, "learning_rate": 7.452996178656587e-05, "loss": 0.7118, "step": 105470 }, { "epoch": 0.6738816554438241, "grad_norm": 0.4991033971309662, "learning_rate": 7.452558932595853e-05, "loss": 0.8539, "step": 105480 }, { "epoch": 0.6739455425935628, "grad_norm": 0.6770216226577759, "learning_rate": 7.45212166183573e-05, "loss": 0.797, "step": 105490 }, { "epoch": 0.6740094297433015, "grad_norm": 1.0302858352661133, "learning_rate": 7.451728097037279e-05, "loss": 1.1371, "step": 105500 }, { "epoch": 0.6740733168930402, "grad_norm": 0.986290693283081, "learning_rate": 7.451290779360444e-05, "loss": 0.8325, "step": 105510 }, { "epoch": 0.6741372040427789, "grad_norm": 0.6854764223098755, "learning_rate": 7.450853436996992e-05, "loss": 1.201, "step": 105520 }, { "epoch": 0.6742010911925176, "grad_norm": 1.172593116760254, "learning_rate": 7.450416069951324e-05, "loss": 0.7934, "step": 105530 }, { "epoch": 0.6742649783422563, "grad_norm": 0.6270721554756165, "learning_rate": 7.44997867822785e-05, "loss": 0.944, "step": 105540 }, { "epoch": 0.6743288654919949, "grad_norm": 1.119352102279663, "learning_rate": 7.449541261830968e-05, "loss": 0.9087, "step": 105550 }, { "epoch": 0.6743927526417336, "grad_norm": 1.074959397315979, "learning_rate": 7.449103820765086e-05, "loss": 0.7586, "step": 105560 }, { "epoch": 0.6744566397914723, "grad_norm": 0.7056079506874084, "learning_rate": 7.44866635503461e-05, "loss": 0.9738, "step": 105570 }, { "epoch": 0.674520526941211, "grad_norm": 0.5963863730430603, "learning_rate": 7.448228864643947e-05, "loss": 0.7275, "step": 105580 }, { "epoch": 0.6745844140909497, "grad_norm": 0.836320698261261, "learning_rate": 7.447791349597502e-05, "loss": 0.9359, "step": 105590 }, { "epoch": 0.6746483012406884, "grad_norm": 0.8702114224433899, "learning_rate": 7.447353809899677e-05, "loss": 0.8465, "step": 105600 }, { "epoch": 0.6747121883904271, "grad_norm": 0.9476937651634216, "learning_rate": 7.446916245554885e-05, "loss": 1.0381, "step": 105610 }, { "epoch": 0.6747760755401658, "grad_norm": 1.5769599676132202, "learning_rate": 7.446478656567529e-05, "loss": 1.1323, "step": 105620 }, { "epoch": 0.6748399626899045, "grad_norm": 0.9263478517532349, "learning_rate": 7.446041042942016e-05, "loss": 1.2046, "step": 105630 }, { "epoch": 0.6749038498396432, "grad_norm": 0.5203749537467957, "learning_rate": 7.445603404682754e-05, "loss": 0.9345, "step": 105640 }, { "epoch": 0.674967736989382, "grad_norm": 1.511046290397644, "learning_rate": 7.445165741794149e-05, "loss": 0.7772, "step": 105650 }, { "epoch": 0.6750316241391207, "grad_norm": 0.868693470954895, "learning_rate": 7.44472805428061e-05, "loss": 0.9579, "step": 105660 }, { "epoch": 0.6750955112888594, "grad_norm": 0.7717391848564148, "learning_rate": 7.444290342146545e-05, "loss": 0.911, "step": 105670 }, { "epoch": 0.6751593984385981, "grad_norm": 1.1811197996139526, "learning_rate": 7.443852605396361e-05, "loss": 0.8844, "step": 105680 }, { "epoch": 0.6752232855883368, "grad_norm": 0.9373357892036438, "learning_rate": 7.443414844034468e-05, "loss": 1.0509, "step": 105690 }, { "epoch": 0.6752871727380755, "grad_norm": 1.0546302795410156, "learning_rate": 7.442977058065273e-05, "loss": 0.8802, "step": 105700 }, { "epoch": 0.6753510598878142, "grad_norm": 0.8621144890785217, "learning_rate": 7.442539247493185e-05, "loss": 0.6729, "step": 105710 }, { "epoch": 0.6754149470375529, "grad_norm": 0.6948429942131042, "learning_rate": 7.442101412322613e-05, "loss": 0.9194, "step": 105720 }, { "epoch": 0.6754788341872916, "grad_norm": 1.381230354309082, "learning_rate": 7.441663552557969e-05, "loss": 0.7042, "step": 105730 }, { "epoch": 0.6755427213370303, "grad_norm": 1.3487558364868164, "learning_rate": 7.441225668203658e-05, "loss": 0.8875, "step": 105740 }, { "epoch": 0.675606608486769, "grad_norm": 1.0803183317184448, "learning_rate": 7.440787759264095e-05, "loss": 1.0096, "step": 105750 }, { "epoch": 0.6756704956365077, "grad_norm": 1.2716935873031616, "learning_rate": 7.440349825743687e-05, "loss": 0.7651, "step": 105760 }, { "epoch": 0.6757343827862464, "grad_norm": 1.0950727462768555, "learning_rate": 7.439911867646845e-05, "loss": 0.8156, "step": 105770 }, { "epoch": 0.6757982699359851, "grad_norm": 0.8086333870887756, "learning_rate": 7.43947388497798e-05, "loss": 0.8297, "step": 105780 }, { "epoch": 0.6758621570857237, "grad_norm": 0.7670168876647949, "learning_rate": 7.439035877741503e-05, "loss": 0.855, "step": 105790 }, { "epoch": 0.6759260442354624, "grad_norm": 0.7938393950462341, "learning_rate": 7.438597845941824e-05, "loss": 0.8926, "step": 105800 }, { "epoch": 0.6759899313852011, "grad_norm": 0.7349621057510376, "learning_rate": 7.438159789583354e-05, "loss": 0.9497, "step": 105810 }, { "epoch": 0.6760538185349398, "grad_norm": 0.7684302926063538, "learning_rate": 7.437721708670508e-05, "loss": 0.9919, "step": 105820 }, { "epoch": 0.6761177056846785, "grad_norm": 1.0219396352767944, "learning_rate": 7.437283603207693e-05, "loss": 0.8476, "step": 105830 }, { "epoch": 0.6761815928344173, "grad_norm": 0.8535874485969543, "learning_rate": 7.436845473199325e-05, "loss": 0.8841, "step": 105840 }, { "epoch": 0.676245479984156, "grad_norm": 0.8949576020240784, "learning_rate": 7.436407318649814e-05, "loss": 0.8545, "step": 105850 }, { "epoch": 0.6763093671338947, "grad_norm": 1.5550041198730469, "learning_rate": 7.435969139563574e-05, "loss": 1.1006, "step": 105860 }, { "epoch": 0.6763732542836334, "grad_norm": 0.8512755036354065, "learning_rate": 7.435530935945018e-05, "loss": 0.9107, "step": 105870 }, { "epoch": 0.6764371414333721, "grad_norm": 0.6899836659431458, "learning_rate": 7.435092707798559e-05, "loss": 1.1033, "step": 105880 }, { "epoch": 0.6765010285831108, "grad_norm": 0.6218075156211853, "learning_rate": 7.434654455128607e-05, "loss": 0.8597, "step": 105890 }, { "epoch": 0.6765649157328495, "grad_norm": 0.6466425657272339, "learning_rate": 7.43421617793958e-05, "loss": 0.798, "step": 105900 }, { "epoch": 0.6766288028825882, "grad_norm": 0.6886029243469238, "learning_rate": 7.43377787623589e-05, "loss": 0.8296, "step": 105910 }, { "epoch": 0.6766926900323269, "grad_norm": 0.6372695565223694, "learning_rate": 7.433339550021951e-05, "loss": 0.8947, "step": 105920 }, { "epoch": 0.6767565771820656, "grad_norm": 0.8008190393447876, "learning_rate": 7.43290119930218e-05, "loss": 0.9288, "step": 105930 }, { "epoch": 0.6768204643318043, "grad_norm": 0.8058283925056458, "learning_rate": 7.432462824080985e-05, "loss": 0.8823, "step": 105940 }, { "epoch": 0.676884351481543, "grad_norm": 0.7353378534317017, "learning_rate": 7.432024424362789e-05, "loss": 0.9896, "step": 105950 }, { "epoch": 0.6769482386312817, "grad_norm": 1.0405430793762207, "learning_rate": 7.431586000152001e-05, "loss": 1.3537, "step": 105960 }, { "epoch": 0.6770121257810204, "grad_norm": 0.5413171052932739, "learning_rate": 7.431147551453038e-05, "loss": 0.8819, "step": 105970 }, { "epoch": 0.6770760129307591, "grad_norm": 1.0479340553283691, "learning_rate": 7.430709078270316e-05, "loss": 0.9891, "step": 105980 }, { "epoch": 0.6771399000804978, "grad_norm": 1.1004263162612915, "learning_rate": 7.430270580608252e-05, "loss": 0.8119, "step": 105990 }, { "epoch": 0.6772037872302366, "grad_norm": 0.6329840421676636, "learning_rate": 7.42983205847126e-05, "loss": 0.8123, "step": 106000 }, { "epoch": 0.6772676743799753, "grad_norm": 0.8763070702552795, "learning_rate": 7.429393511863757e-05, "loss": 0.8839, "step": 106010 }, { "epoch": 0.677331561529714, "grad_norm": 1.7734843492507935, "learning_rate": 7.42895494079016e-05, "loss": 0.9038, "step": 106020 }, { "epoch": 0.6773954486794526, "grad_norm": 1.5639463663101196, "learning_rate": 7.428516345254886e-05, "loss": 0.7489, "step": 106030 }, { "epoch": 0.6774593358291913, "grad_norm": 0.8904886245727539, "learning_rate": 7.42807772526235e-05, "loss": 0.7974, "step": 106040 }, { "epoch": 0.67752322297893, "grad_norm": 0.9649606347084045, "learning_rate": 7.42763908081697e-05, "loss": 1.0977, "step": 106050 }, { "epoch": 0.6775871101286687, "grad_norm": 0.7616420984268188, "learning_rate": 7.427200411923166e-05, "loss": 0.9091, "step": 106060 }, { "epoch": 0.6776509972784074, "grad_norm": 1.3879841566085815, "learning_rate": 7.426761718585353e-05, "loss": 0.9823, "step": 106070 }, { "epoch": 0.6777148844281461, "grad_norm": 1.2231416702270508, "learning_rate": 7.426323000807951e-05, "loss": 0.8862, "step": 106080 }, { "epoch": 0.6777787715778848, "grad_norm": 1.1057007312774658, "learning_rate": 7.425884258595377e-05, "loss": 0.9518, "step": 106090 }, { "epoch": 0.6778426587276235, "grad_norm": 0.7669041156768799, "learning_rate": 7.42544549195205e-05, "loss": 0.8201, "step": 106100 }, { "epoch": 0.6779065458773622, "grad_norm": 0.9496064186096191, "learning_rate": 7.425006700882388e-05, "loss": 0.747, "step": 106110 }, { "epoch": 0.6779704330271009, "grad_norm": 0.8966147899627686, "learning_rate": 7.424567885390811e-05, "loss": 0.9232, "step": 106120 }, { "epoch": 0.6780343201768396, "grad_norm": 0.8240459561347961, "learning_rate": 7.424129045481738e-05, "loss": 0.9572, "step": 106130 }, { "epoch": 0.6780982073265783, "grad_norm": 0.9006532430648804, "learning_rate": 7.423690181159588e-05, "loss": 1.0682, "step": 106140 }, { "epoch": 0.678162094476317, "grad_norm": 0.6999794840812683, "learning_rate": 7.423251292428782e-05, "loss": 0.7835, "step": 106150 }, { "epoch": 0.6782259816260557, "grad_norm": 0.639180600643158, "learning_rate": 7.422812379293738e-05, "loss": 0.9808, "step": 106160 }, { "epoch": 0.6782898687757944, "grad_norm": 0.9818177819252014, "learning_rate": 7.422373441758877e-05, "loss": 0.9845, "step": 106170 }, { "epoch": 0.6783537559255332, "grad_norm": 0.85085529088974, "learning_rate": 7.421934479828621e-05, "loss": 1.0079, "step": 106180 }, { "epoch": 0.6784176430752719, "grad_norm": 1.0107144117355347, "learning_rate": 7.421495493507388e-05, "loss": 0.71, "step": 106190 }, { "epoch": 0.6784815302250106, "grad_norm": 0.8467554450035095, "learning_rate": 7.421056482799602e-05, "loss": 0.8878, "step": 106200 }, { "epoch": 0.6785454173747493, "grad_norm": 1.0272150039672852, "learning_rate": 7.42061744770968e-05, "loss": 1.0742, "step": 106210 }, { "epoch": 0.678609304524488, "grad_norm": 0.5289245247840881, "learning_rate": 7.42017838824205e-05, "loss": 0.6677, "step": 106220 }, { "epoch": 0.6786731916742267, "grad_norm": 0.78628009557724, "learning_rate": 7.419739304401127e-05, "loss": 0.9517, "step": 106230 }, { "epoch": 0.6787370788239654, "grad_norm": 1.0156890153884888, "learning_rate": 7.419300196191338e-05, "loss": 1.0061, "step": 106240 }, { "epoch": 0.6788009659737041, "grad_norm": 1.2271900177001953, "learning_rate": 7.418861063617102e-05, "loss": 0.8683, "step": 106250 }, { "epoch": 0.6788648531234428, "grad_norm": 2.280670404434204, "learning_rate": 7.41842190668284e-05, "loss": 0.7783, "step": 106260 }, { "epoch": 0.6789287402731814, "grad_norm": 0.7349517345428467, "learning_rate": 7.41798272539298e-05, "loss": 0.8688, "step": 106270 }, { "epoch": 0.6789926274229201, "grad_norm": 0.9518811702728271, "learning_rate": 7.417543519751943e-05, "loss": 0.7962, "step": 106280 }, { "epoch": 0.6790565145726588, "grad_norm": 1.090990662574768, "learning_rate": 7.41710428976415e-05, "loss": 0.9509, "step": 106290 }, { "epoch": 0.6791204017223975, "grad_norm": 0.817570149898529, "learning_rate": 7.416665035434025e-05, "loss": 0.8855, "step": 106300 }, { "epoch": 0.6791842888721362, "grad_norm": 0.6482291221618652, "learning_rate": 7.416225756765993e-05, "loss": 0.7992, "step": 106310 }, { "epoch": 0.6792481760218749, "grad_norm": 1.4157582521438599, "learning_rate": 7.415786453764478e-05, "loss": 1.0889, "step": 106320 }, { "epoch": 0.6793120631716136, "grad_norm": 1.1152769327163696, "learning_rate": 7.415347126433903e-05, "loss": 1.1192, "step": 106330 }, { "epoch": 0.6793759503213523, "grad_norm": 0.8868082761764526, "learning_rate": 7.414907774778693e-05, "loss": 0.7684, "step": 106340 }, { "epoch": 0.679439837471091, "grad_norm": 0.8544641137123108, "learning_rate": 7.414468398803272e-05, "loss": 0.8531, "step": 106350 }, { "epoch": 0.6795037246208298, "grad_norm": 2.2637743949890137, "learning_rate": 7.414028998512065e-05, "loss": 0.8975, "step": 106360 }, { "epoch": 0.6795676117705685, "grad_norm": 0.912253737449646, "learning_rate": 7.413589573909498e-05, "loss": 0.6954, "step": 106370 }, { "epoch": 0.6796314989203072, "grad_norm": 0.7223014831542969, "learning_rate": 7.413150124999997e-05, "loss": 0.9605, "step": 106380 }, { "epoch": 0.6796953860700459, "grad_norm": 0.7518347501754761, "learning_rate": 7.412710651787986e-05, "loss": 0.7462, "step": 106390 }, { "epoch": 0.6797592732197846, "grad_norm": 0.6483036279678345, "learning_rate": 7.412271154277891e-05, "loss": 0.8934, "step": 106400 }, { "epoch": 0.6798231603695233, "grad_norm": 1.010314702987671, "learning_rate": 7.411831632474138e-05, "loss": 0.724, "step": 106410 }, { "epoch": 0.679887047519262, "grad_norm": 0.7592995762825012, "learning_rate": 7.411392086381154e-05, "loss": 0.8157, "step": 106420 }, { "epoch": 0.6799509346690007, "grad_norm": 1.2588444948196411, "learning_rate": 7.410952516003367e-05, "loss": 0.9328, "step": 106430 }, { "epoch": 0.6800148218187394, "grad_norm": 0.7056863903999329, "learning_rate": 7.410512921345201e-05, "loss": 1.0183, "step": 106440 }, { "epoch": 0.6800787089684781, "grad_norm": 0.7405192255973816, "learning_rate": 7.410073302411085e-05, "loss": 0.7652, "step": 106450 }, { "epoch": 0.6801425961182168, "grad_norm": 0.6608672738075256, "learning_rate": 7.409633659205446e-05, "loss": 0.9101, "step": 106460 }, { "epoch": 0.6802064832679555, "grad_norm": 1.030137300491333, "learning_rate": 7.409193991732711e-05, "loss": 0.849, "step": 106470 }, { "epoch": 0.6802703704176942, "grad_norm": 0.6124225854873657, "learning_rate": 7.40875429999731e-05, "loss": 0.758, "step": 106480 }, { "epoch": 0.6803342575674329, "grad_norm": 0.8795433640480042, "learning_rate": 7.408314584003666e-05, "loss": 1.0669, "step": 106490 }, { "epoch": 0.6803981447171716, "grad_norm": 0.7640893459320068, "learning_rate": 7.407874843756213e-05, "loss": 0.8179, "step": 106500 }, { "epoch": 0.6804620318669103, "grad_norm": 0.6787682771682739, "learning_rate": 7.407435079259377e-05, "loss": 0.9895, "step": 106510 }, { "epoch": 0.6805259190166489, "grad_norm": 0.8706437349319458, "learning_rate": 7.406995290517587e-05, "loss": 0.7269, "step": 106520 }, { "epoch": 0.6805898061663876, "grad_norm": 0.6258346438407898, "learning_rate": 7.406555477535271e-05, "loss": 1.0131, "step": 106530 }, { "epoch": 0.6806536933161264, "grad_norm": 1.0943886041641235, "learning_rate": 7.406115640316861e-05, "loss": 0.7417, "step": 106540 }, { "epoch": 0.6807175804658651, "grad_norm": 0.7393679618835449, "learning_rate": 7.405675778866785e-05, "loss": 0.8613, "step": 106550 }, { "epoch": 0.6807814676156038, "grad_norm": 0.8770964741706848, "learning_rate": 7.40523589318947e-05, "loss": 1.2317, "step": 106560 }, { "epoch": 0.6808453547653425, "grad_norm": 0.980842649936676, "learning_rate": 7.404795983289351e-05, "loss": 0.8648, "step": 106570 }, { "epoch": 0.6809092419150812, "grad_norm": 0.7715876698493958, "learning_rate": 7.404356049170856e-05, "loss": 0.9493, "step": 106580 }, { "epoch": 0.6809731290648199, "grad_norm": 0.8744866847991943, "learning_rate": 7.403916090838414e-05, "loss": 1.3351, "step": 106590 }, { "epoch": 0.6810370162145586, "grad_norm": 2.178861618041992, "learning_rate": 7.403476108296458e-05, "loss": 1.0401, "step": 106600 }, { "epoch": 0.6811009033642973, "grad_norm": 0.7490164637565613, "learning_rate": 7.40303610154942e-05, "loss": 0.8825, "step": 106610 }, { "epoch": 0.681164790514036, "grad_norm": 1.3160593509674072, "learning_rate": 7.402596070601729e-05, "loss": 0.9475, "step": 106620 }, { "epoch": 0.6812286776637747, "grad_norm": 0.7300577163696289, "learning_rate": 7.402156015457815e-05, "loss": 0.7993, "step": 106630 }, { "epoch": 0.6812925648135134, "grad_norm": 1.1624113321304321, "learning_rate": 7.401715936122114e-05, "loss": 1.0644, "step": 106640 }, { "epoch": 0.6813564519632521, "grad_norm": 0.6754822134971619, "learning_rate": 7.401275832599054e-05, "loss": 0.9375, "step": 106650 }, { "epoch": 0.6814203391129908, "grad_norm": 0.8442546129226685, "learning_rate": 7.40083570489307e-05, "loss": 1.0102, "step": 106660 }, { "epoch": 0.6814842262627295, "grad_norm": 0.8470264673233032, "learning_rate": 7.400395553008593e-05, "loss": 0.8809, "step": 106670 }, { "epoch": 0.6815481134124682, "grad_norm": 1.252909541130066, "learning_rate": 7.399955376950056e-05, "loss": 0.9274, "step": 106680 }, { "epoch": 0.6816120005622069, "grad_norm": 1.0591319799423218, "learning_rate": 7.399515176721894e-05, "loss": 0.7077, "step": 106690 }, { "epoch": 0.6816758877119456, "grad_norm": 0.9662178754806519, "learning_rate": 7.399074952328536e-05, "loss": 0.9326, "step": 106700 }, { "epoch": 0.6817397748616844, "grad_norm": 0.6794439554214478, "learning_rate": 7.398634703774417e-05, "loss": 0.9654, "step": 106710 }, { "epoch": 0.6818036620114231, "grad_norm": 1.1868617534637451, "learning_rate": 7.398194431063974e-05, "loss": 0.6711, "step": 106720 }, { "epoch": 0.6818675491611618, "grad_norm": 0.6283101439476013, "learning_rate": 7.397754134201637e-05, "loss": 0.7644, "step": 106730 }, { "epoch": 0.6819314363109005, "grad_norm": 0.9207131862640381, "learning_rate": 7.397313813191842e-05, "loss": 1.1804, "step": 106740 }, { "epoch": 0.6819953234606392, "grad_norm": 0.7542859315872192, "learning_rate": 7.396873468039022e-05, "loss": 1.1054, "step": 106750 }, { "epoch": 0.6820592106103778, "grad_norm": 1.1628599166870117, "learning_rate": 7.396433098747613e-05, "loss": 0.7886, "step": 106760 }, { "epoch": 0.6821230977601165, "grad_norm": 0.9535654187202454, "learning_rate": 7.39599270532205e-05, "loss": 0.8177, "step": 106770 }, { "epoch": 0.6821869849098552, "grad_norm": 1.606237769126892, "learning_rate": 7.395552287766766e-05, "loss": 0.9816, "step": 106780 }, { "epoch": 0.6822508720595939, "grad_norm": 0.8882198333740234, "learning_rate": 7.395111846086201e-05, "loss": 0.6792, "step": 106790 }, { "epoch": 0.6823147592093326, "grad_norm": 0.7362374067306519, "learning_rate": 7.394671380284784e-05, "loss": 0.8806, "step": 106800 }, { "epoch": 0.6823786463590713, "grad_norm": 0.7599479556083679, "learning_rate": 7.394230890366956e-05, "loss": 0.8613, "step": 106810 }, { "epoch": 0.68244253350881, "grad_norm": 0.7655912041664124, "learning_rate": 7.393790376337153e-05, "loss": 0.8717, "step": 106820 }, { "epoch": 0.6825064206585487, "grad_norm": 1.046034812927246, "learning_rate": 7.393349838199809e-05, "loss": 1.1742, "step": 106830 }, { "epoch": 0.6825703078082874, "grad_norm": 0.7715229392051697, "learning_rate": 7.392909275959362e-05, "loss": 1.078, "step": 106840 }, { "epoch": 0.6826341949580261, "grad_norm": 0.7597649097442627, "learning_rate": 7.39246868962025e-05, "loss": 0.971, "step": 106850 }, { "epoch": 0.6826980821077648, "grad_norm": 0.5466295480728149, "learning_rate": 7.392028079186906e-05, "loss": 0.7825, "step": 106860 }, { "epoch": 0.6827619692575035, "grad_norm": 4.8443284034729, "learning_rate": 7.39158744466377e-05, "loss": 1.1009, "step": 106870 }, { "epoch": 0.6828258564072422, "grad_norm": 0.6265544891357422, "learning_rate": 7.39114678605528e-05, "loss": 0.8694, "step": 106880 }, { "epoch": 0.682889743556981, "grad_norm": 0.917610764503479, "learning_rate": 7.390706103365873e-05, "loss": 0.9782, "step": 106890 }, { "epoch": 0.6829536307067197, "grad_norm": 0.9550445079803467, "learning_rate": 7.390265396599987e-05, "loss": 0.906, "step": 106900 }, { "epoch": 0.6830175178564584, "grad_norm": 2.5587947368621826, "learning_rate": 7.389824665762061e-05, "loss": 0.8528, "step": 106910 }, { "epoch": 0.6830814050061971, "grad_norm": 1.318000078201294, "learning_rate": 7.389383910856534e-05, "loss": 0.9362, "step": 106920 }, { "epoch": 0.6831452921559358, "grad_norm": 1.0165103673934937, "learning_rate": 7.388943131887842e-05, "loss": 0.7795, "step": 106930 }, { "epoch": 0.6832091793056745, "grad_norm": 0.9445672631263733, "learning_rate": 7.388502328860427e-05, "loss": 0.9833, "step": 106940 }, { "epoch": 0.6832730664554132, "grad_norm": 1.0553864240646362, "learning_rate": 7.388061501778727e-05, "loss": 0.8713, "step": 106950 }, { "epoch": 0.6833369536051519, "grad_norm": 0.913757860660553, "learning_rate": 7.387620650647182e-05, "loss": 0.9192, "step": 106960 }, { "epoch": 0.6834008407548906, "grad_norm": 0.7922553420066833, "learning_rate": 7.387179775470232e-05, "loss": 0.8956, "step": 106970 }, { "epoch": 0.6834647279046293, "grad_norm": 0.7192181348800659, "learning_rate": 7.386738876252315e-05, "loss": 0.8198, "step": 106980 }, { "epoch": 0.683528615054368, "grad_norm": 1.0555779933929443, "learning_rate": 7.386297952997874e-05, "loss": 0.7005, "step": 106990 }, { "epoch": 0.6835925022041066, "grad_norm": 1.0021594762802124, "learning_rate": 7.385857005711348e-05, "loss": 0.8504, "step": 107000 }, { "epoch": 0.6836563893538453, "grad_norm": 0.5227010250091553, "learning_rate": 7.385416034397177e-05, "loss": 0.8899, "step": 107010 }, { "epoch": 0.683720276503584, "grad_norm": 0.47646623849868774, "learning_rate": 7.384975039059802e-05, "loss": 1.0871, "step": 107020 }, { "epoch": 0.6837841636533227, "grad_norm": 1.0652568340301514, "learning_rate": 7.384534019703667e-05, "loss": 0.9768, "step": 107030 }, { "epoch": 0.6838480508030614, "grad_norm": 0.7635281682014465, "learning_rate": 7.384092976333212e-05, "loss": 0.957, "step": 107040 }, { "epoch": 0.6839119379528001, "grad_norm": 0.6990230083465576, "learning_rate": 7.383651908952877e-05, "loss": 0.8431, "step": 107050 }, { "epoch": 0.6839758251025388, "grad_norm": 1.1831239461898804, "learning_rate": 7.383210817567104e-05, "loss": 0.9295, "step": 107060 }, { "epoch": 0.6840397122522776, "grad_norm": 0.9544264078140259, "learning_rate": 7.382769702180339e-05, "loss": 1.0323, "step": 107070 }, { "epoch": 0.6841035994020163, "grad_norm": 0.7274150848388672, "learning_rate": 7.38232856279702e-05, "loss": 0.9134, "step": 107080 }, { "epoch": 0.684167486551755, "grad_norm": 1.0423110723495483, "learning_rate": 7.381887399421592e-05, "loss": 0.9402, "step": 107090 }, { "epoch": 0.6842313737014937, "grad_norm": 0.8618479371070862, "learning_rate": 7.381446212058497e-05, "loss": 0.7547, "step": 107100 }, { "epoch": 0.6842952608512324, "grad_norm": 0.8330484628677368, "learning_rate": 7.381005000712177e-05, "loss": 0.8832, "step": 107110 }, { "epoch": 0.6843591480009711, "grad_norm": 1.7487927675247192, "learning_rate": 7.380563765387079e-05, "loss": 0.8351, "step": 107120 }, { "epoch": 0.6844230351507098, "grad_norm": 1.0328443050384521, "learning_rate": 7.380122506087644e-05, "loss": 0.7783, "step": 107130 }, { "epoch": 0.6844869223004485, "grad_norm": 1.1022374629974365, "learning_rate": 7.379681222818314e-05, "loss": 0.8898, "step": 107140 }, { "epoch": 0.6845508094501872, "grad_norm": 1.1118669509887695, "learning_rate": 7.379239915583538e-05, "loss": 0.8272, "step": 107150 }, { "epoch": 0.6846146965999259, "grad_norm": 1.1815778017044067, "learning_rate": 7.378798584387756e-05, "loss": 1.045, "step": 107160 }, { "epoch": 0.6846785837496646, "grad_norm": 0.781929612159729, "learning_rate": 7.378357229235415e-05, "loss": 1.0828, "step": 107170 }, { "epoch": 0.6847424708994033, "grad_norm": 0.8094179630279541, "learning_rate": 7.37791585013096e-05, "loss": 0.6823, "step": 107180 }, { "epoch": 0.684806358049142, "grad_norm": 0.9121211767196655, "learning_rate": 7.377474447078835e-05, "loss": 0.8463, "step": 107190 }, { "epoch": 0.6848702451988807, "grad_norm": 0.9199677109718323, "learning_rate": 7.377033020083485e-05, "loss": 0.9192, "step": 107200 }, { "epoch": 0.6849341323486194, "grad_norm": 1.0086863040924072, "learning_rate": 7.376591569149356e-05, "loss": 0.8648, "step": 107210 }, { "epoch": 0.6849980194983581, "grad_norm": 0.6935834288597107, "learning_rate": 7.376150094280894e-05, "loss": 0.8088, "step": 107220 }, { "epoch": 0.6850619066480969, "grad_norm": 1.3548187017440796, "learning_rate": 7.375708595482544e-05, "loss": 0.7954, "step": 107230 }, { "epoch": 0.6851257937978356, "grad_norm": 2.9168577194213867, "learning_rate": 7.375267072758753e-05, "loss": 1.0147, "step": 107240 }, { "epoch": 0.6851896809475742, "grad_norm": 0.9866139888763428, "learning_rate": 7.37482552611397e-05, "loss": 0.77, "step": 107250 }, { "epoch": 0.6852535680973129, "grad_norm": 1.5297490358352661, "learning_rate": 7.374383955552638e-05, "loss": 1.2862, "step": 107260 }, { "epoch": 0.6853174552470516, "grad_norm": 0.7798259854316711, "learning_rate": 7.373942361079204e-05, "loss": 0.7411, "step": 107270 }, { "epoch": 0.6853813423967903, "grad_norm": 0.7515537738800049, "learning_rate": 7.37350074269812e-05, "loss": 0.6391, "step": 107280 }, { "epoch": 0.685445229546529, "grad_norm": 1.7930855751037598, "learning_rate": 7.373059100413829e-05, "loss": 0.8938, "step": 107290 }, { "epoch": 0.6855091166962677, "grad_norm": 1.0468648672103882, "learning_rate": 7.372617434230778e-05, "loss": 0.8846, "step": 107300 }, { "epoch": 0.6855730038460064, "grad_norm": 0.9677194952964783, "learning_rate": 7.372175744153417e-05, "loss": 0.879, "step": 107310 }, { "epoch": 0.6856368909957451, "grad_norm": 0.9054749608039856, "learning_rate": 7.371734030186195e-05, "loss": 0.9007, "step": 107320 }, { "epoch": 0.6857007781454838, "grad_norm": 1.1012799739837646, "learning_rate": 7.371292292333559e-05, "loss": 0.7437, "step": 107330 }, { "epoch": 0.6857646652952225, "grad_norm": 0.8656480312347412, "learning_rate": 7.370850530599959e-05, "loss": 0.7237, "step": 107340 }, { "epoch": 0.6858285524449612, "grad_norm": 0.986134946346283, "learning_rate": 7.370408744989844e-05, "loss": 0.9098, "step": 107350 }, { "epoch": 0.6858924395946999, "grad_norm": 1.038024663925171, "learning_rate": 7.36996693550766e-05, "loss": 0.7683, "step": 107360 }, { "epoch": 0.6859563267444386, "grad_norm": 0.9421197175979614, "learning_rate": 7.369525102157861e-05, "loss": 0.7816, "step": 107370 }, { "epoch": 0.6860202138941773, "grad_norm": 0.8556358218193054, "learning_rate": 7.369083244944893e-05, "loss": 0.9645, "step": 107380 }, { "epoch": 0.686084101043916, "grad_norm": 0.7408592700958252, "learning_rate": 7.368641363873207e-05, "loss": 0.8846, "step": 107390 }, { "epoch": 0.6861479881936547, "grad_norm": 0.5881041288375854, "learning_rate": 7.368199458947254e-05, "loss": 0.7665, "step": 107400 }, { "epoch": 0.6862118753433935, "grad_norm": 0.9732454419136047, "learning_rate": 7.367757530171482e-05, "loss": 1.018, "step": 107410 }, { "epoch": 0.6862757624931322, "grad_norm": 0.4878905415534973, "learning_rate": 7.367315577550344e-05, "loss": 1.0164, "step": 107420 }, { "epoch": 0.6863396496428709, "grad_norm": 0.9142529368400574, "learning_rate": 7.366873601088291e-05, "loss": 0.8166, "step": 107430 }, { "epoch": 0.6864035367926096, "grad_norm": 0.7303772568702698, "learning_rate": 7.366431600789772e-05, "loss": 0.6688, "step": 107440 }, { "epoch": 0.6864674239423483, "grad_norm": 0.7583977580070496, "learning_rate": 7.36598957665924e-05, "loss": 0.859, "step": 107450 }, { "epoch": 0.686531311092087, "grad_norm": 0.8306979537010193, "learning_rate": 7.365547528701146e-05, "loss": 0.9408, "step": 107460 }, { "epoch": 0.6865951982418257, "grad_norm": 0.9841431379318237, "learning_rate": 7.365105456919942e-05, "loss": 0.9479, "step": 107470 }, { "epoch": 0.6866590853915644, "grad_norm": 0.8412874341011047, "learning_rate": 7.364663361320081e-05, "loss": 1.2542, "step": 107480 }, { "epoch": 0.686722972541303, "grad_norm": 0.9620808362960815, "learning_rate": 7.364221241906014e-05, "loss": 1.0792, "step": 107490 }, { "epoch": 0.6867868596910417, "grad_norm": 0.8014304637908936, "learning_rate": 7.363779098682193e-05, "loss": 1.1819, "step": 107500 }, { "epoch": 0.6868507468407804, "grad_norm": 1.1913782358169556, "learning_rate": 7.36333693165307e-05, "loss": 1.0777, "step": 107510 }, { "epoch": 0.6869146339905191, "grad_norm": 0.6413132548332214, "learning_rate": 7.362894740823102e-05, "loss": 0.9969, "step": 107520 }, { "epoch": 0.6869785211402578, "grad_norm": 2.0043857097625732, "learning_rate": 7.362452526196738e-05, "loss": 0.7761, "step": 107530 }, { "epoch": 0.6870424082899965, "grad_norm": 2.9130804538726807, "learning_rate": 7.362010287778435e-05, "loss": 0.9517, "step": 107540 }, { "epoch": 0.6871062954397352, "grad_norm": 0.6536256670951843, "learning_rate": 7.361568025572644e-05, "loss": 0.7987, "step": 107550 }, { "epoch": 0.6871701825894739, "grad_norm": 0.8029404878616333, "learning_rate": 7.36112573958382e-05, "loss": 0.8677, "step": 107560 }, { "epoch": 0.6872340697392126, "grad_norm": 1.2548484802246094, "learning_rate": 7.360683429816418e-05, "loss": 0.9721, "step": 107570 }, { "epoch": 0.6872979568889513, "grad_norm": 0.6949800848960876, "learning_rate": 7.360241096274892e-05, "loss": 0.7863, "step": 107580 }, { "epoch": 0.68736184403869, "grad_norm": 0.7144826054573059, "learning_rate": 7.359798738963694e-05, "loss": 0.6767, "step": 107590 }, { "epoch": 0.6874257311884288, "grad_norm": 0.7971734404563904, "learning_rate": 7.359356357887282e-05, "loss": 0.7645, "step": 107600 }, { "epoch": 0.6874896183381675, "grad_norm": 0.6574593186378479, "learning_rate": 7.35891395305011e-05, "loss": 1.1075, "step": 107610 }, { "epoch": 0.6875535054879062, "grad_norm": 0.8098707795143127, "learning_rate": 7.358471524456635e-05, "loss": 0.9526, "step": 107620 }, { "epoch": 0.6876173926376449, "grad_norm": 0.7118765711784363, "learning_rate": 7.35802907211131e-05, "loss": 0.7041, "step": 107630 }, { "epoch": 0.6876812797873836, "grad_norm": 0.8008665442466736, "learning_rate": 7.357586596018594e-05, "loss": 0.8071, "step": 107640 }, { "epoch": 0.6877451669371223, "grad_norm": 0.9328833222389221, "learning_rate": 7.357144096182938e-05, "loss": 1.3249, "step": 107650 }, { "epoch": 0.687809054086861, "grad_norm": 0.6230046153068542, "learning_rate": 7.356701572608806e-05, "loss": 0.7683, "step": 107660 }, { "epoch": 0.6878729412365997, "grad_norm": 0.6966734528541565, "learning_rate": 7.356259025300646e-05, "loss": 0.9071, "step": 107670 }, { "epoch": 0.6879368283863384, "grad_norm": 0.8863798975944519, "learning_rate": 7.355816454262923e-05, "loss": 0.8069, "step": 107680 }, { "epoch": 0.6880007155360771, "grad_norm": 0.928939700126648, "learning_rate": 7.35537385950009e-05, "loss": 0.8535, "step": 107690 }, { "epoch": 0.6880646026858158, "grad_norm": 0.8435116410255432, "learning_rate": 7.354931241016601e-05, "loss": 0.798, "step": 107700 }, { "epoch": 0.6881284898355545, "grad_norm": 1.1882624626159668, "learning_rate": 7.35448859881692e-05, "loss": 1.0083, "step": 107710 }, { "epoch": 0.6881923769852932, "grad_norm": 0.8240717053413391, "learning_rate": 7.3540459329055e-05, "loss": 0.8051, "step": 107720 }, { "epoch": 0.6882562641350318, "grad_norm": 0.9132935404777527, "learning_rate": 7.353603243286805e-05, "loss": 0.9164, "step": 107730 }, { "epoch": 0.6883201512847705, "grad_norm": 0.9722372889518738, "learning_rate": 7.353160529965285e-05, "loss": 0.9007, "step": 107740 }, { "epoch": 0.6883840384345092, "grad_norm": 0.7652561068534851, "learning_rate": 7.352717792945404e-05, "loss": 0.9988, "step": 107750 }, { "epoch": 0.6884479255842479, "grad_norm": 1.0295495986938477, "learning_rate": 7.352275032231619e-05, "loss": 1.0438, "step": 107760 }, { "epoch": 0.6885118127339867, "grad_norm": 1.0043138265609741, "learning_rate": 7.351832247828391e-05, "loss": 0.8635, "step": 107770 }, { "epoch": 0.6885756998837254, "grad_norm": 0.9536296129226685, "learning_rate": 7.351389439740176e-05, "loss": 0.7845, "step": 107780 }, { "epoch": 0.6886395870334641, "grad_norm": 1.182599663734436, "learning_rate": 7.350946607971436e-05, "loss": 0.9473, "step": 107790 }, { "epoch": 0.6887034741832028, "grad_norm": 0.9443296194076538, "learning_rate": 7.35050375252663e-05, "loss": 1.12, "step": 107800 }, { "epoch": 0.6887673613329415, "grad_norm": 1.2377766370773315, "learning_rate": 7.350060873410216e-05, "loss": 0.6376, "step": 107810 }, { "epoch": 0.6888312484826802, "grad_norm": 1.1331062316894531, "learning_rate": 7.349617970626658e-05, "loss": 0.8585, "step": 107820 }, { "epoch": 0.6888951356324189, "grad_norm": 0.9837049245834351, "learning_rate": 7.349175044180414e-05, "loss": 0.7217, "step": 107830 }, { "epoch": 0.6889590227821576, "grad_norm": 0.4539640545845032, "learning_rate": 7.348732094075942e-05, "loss": 0.6076, "step": 107840 }, { "epoch": 0.6890229099318963, "grad_norm": 0.9993829131126404, "learning_rate": 7.348289120317709e-05, "loss": 0.7641, "step": 107850 }, { "epoch": 0.689086797081635, "grad_norm": 0.9905250072479248, "learning_rate": 7.347846122910174e-05, "loss": 0.6454, "step": 107860 }, { "epoch": 0.6891506842313737, "grad_norm": 0.8237646818161011, "learning_rate": 7.347403101857795e-05, "loss": 0.8458, "step": 107870 }, { "epoch": 0.6892145713811124, "grad_norm": 1.0233882665634155, "learning_rate": 7.346960057165036e-05, "loss": 0.8326, "step": 107880 }, { "epoch": 0.6892784585308511, "grad_norm": 1.116274356842041, "learning_rate": 7.34651698883636e-05, "loss": 0.8269, "step": 107890 }, { "epoch": 0.6893423456805898, "grad_norm": 0.6511923670768738, "learning_rate": 7.346073896876227e-05, "loss": 1.2329, "step": 107900 }, { "epoch": 0.6894062328303285, "grad_norm": 1.0535688400268555, "learning_rate": 7.345630781289102e-05, "loss": 0.8788, "step": 107910 }, { "epoch": 0.6894701199800672, "grad_norm": 1.045600175857544, "learning_rate": 7.345187642079443e-05, "loss": 0.8773, "step": 107920 }, { "epoch": 0.689534007129806, "grad_norm": 1.1185442209243774, "learning_rate": 7.344744479251717e-05, "loss": 0.7012, "step": 107930 }, { "epoch": 0.6895978942795447, "grad_norm": 0.8849347233772278, "learning_rate": 7.344301292810385e-05, "loss": 0.8627, "step": 107940 }, { "epoch": 0.6896617814292834, "grad_norm": 0.8291599154472351, "learning_rate": 7.343858082759912e-05, "loss": 0.8959, "step": 107950 }, { "epoch": 0.6897256685790221, "grad_norm": 0.6584329009056091, "learning_rate": 7.34341484910476e-05, "loss": 0.7189, "step": 107960 }, { "epoch": 0.6897895557287607, "grad_norm": 1.2374427318572998, "learning_rate": 7.342971591849393e-05, "loss": 0.9428, "step": 107970 }, { "epoch": 0.6898534428784994, "grad_norm": 0.8575314879417419, "learning_rate": 7.342528310998275e-05, "loss": 0.759, "step": 107980 }, { "epoch": 0.6899173300282381, "grad_norm": 0.7263084650039673, "learning_rate": 7.34208500655587e-05, "loss": 0.7271, "step": 107990 }, { "epoch": 0.6899812171779768, "grad_norm": 1.145310401916504, "learning_rate": 7.341641678526643e-05, "loss": 1.1193, "step": 108000 }, { "epoch": 0.6900451043277155, "grad_norm": 1.2653499841690063, "learning_rate": 7.341198326915057e-05, "loss": 0.8146, "step": 108010 }, { "epoch": 0.6901089914774542, "grad_norm": 0.6225971579551697, "learning_rate": 7.340754951725582e-05, "loss": 0.7708, "step": 108020 }, { "epoch": 0.6901728786271929, "grad_norm": 2.235273838043213, "learning_rate": 7.340311552962676e-05, "loss": 0.8989, "step": 108030 }, { "epoch": 0.6902367657769316, "grad_norm": 0.8102111220359802, "learning_rate": 7.33986813063081e-05, "loss": 1.1533, "step": 108040 }, { "epoch": 0.6903006529266703, "grad_norm": 0.7722830772399902, "learning_rate": 7.339424684734447e-05, "loss": 1.0018, "step": 108050 }, { "epoch": 0.690364540076409, "grad_norm": 0.7864007949829102, "learning_rate": 7.338981215278055e-05, "loss": 1.0525, "step": 108060 }, { "epoch": 0.6904284272261477, "grad_norm": 0.6729293465614319, "learning_rate": 7.338537722266097e-05, "loss": 0.8472, "step": 108070 }, { "epoch": 0.6904923143758864, "grad_norm": 0.7282936573028564, "learning_rate": 7.338094205703043e-05, "loss": 0.9557, "step": 108080 }, { "epoch": 0.6905562015256251, "grad_norm": 1.0277268886566162, "learning_rate": 7.337650665593355e-05, "loss": 0.93, "step": 108090 }, { "epoch": 0.6906200886753638, "grad_norm": 2.55513334274292, "learning_rate": 7.337207101941503e-05, "loss": 0.796, "step": 108100 }, { "epoch": 0.6906839758251025, "grad_norm": 0.7738178968429565, "learning_rate": 7.336763514751954e-05, "loss": 0.8795, "step": 108110 }, { "epoch": 0.6907478629748413, "grad_norm": 0.9889559149742126, "learning_rate": 7.336319904029176e-05, "loss": 0.848, "step": 108120 }, { "epoch": 0.69081175012458, "grad_norm": 1.2246037721633911, "learning_rate": 7.335876269777634e-05, "loss": 0.8715, "step": 108130 }, { "epoch": 0.6908756372743187, "grad_norm": 0.899691641330719, "learning_rate": 7.335432612001798e-05, "loss": 1.013, "step": 108140 }, { "epoch": 0.6909395244240574, "grad_norm": 0.9258847236633301, "learning_rate": 7.334988930706133e-05, "loss": 0.8774, "step": 108150 }, { "epoch": 0.6910034115737961, "grad_norm": 0.825404167175293, "learning_rate": 7.334545225895111e-05, "loss": 0.6752, "step": 108160 }, { "epoch": 0.6910672987235348, "grad_norm": 0.6678471565246582, "learning_rate": 7.334101497573199e-05, "loss": 0.7239, "step": 108170 }, { "epoch": 0.6911311858732735, "grad_norm": 0.8919599056243896, "learning_rate": 7.333657745744866e-05, "loss": 0.8604, "step": 108180 }, { "epoch": 0.6911950730230122, "grad_norm": 0.4956168234348297, "learning_rate": 7.333213970414579e-05, "loss": 0.8364, "step": 108190 }, { "epoch": 0.6912589601727509, "grad_norm": 2.8205111026763916, "learning_rate": 7.332770171586811e-05, "loss": 0.7711, "step": 108200 }, { "epoch": 0.6913228473224896, "grad_norm": 0.9555968046188354, "learning_rate": 7.332326349266028e-05, "loss": 0.8765, "step": 108210 }, { "epoch": 0.6913867344722282, "grad_norm": 0.928036093711853, "learning_rate": 7.331882503456701e-05, "loss": 1.0052, "step": 108220 }, { "epoch": 0.6914506216219669, "grad_norm": 0.8674328923225403, "learning_rate": 7.331438634163298e-05, "loss": 0.7707, "step": 108230 }, { "epoch": 0.6915145087717056, "grad_norm": 0.8306328058242798, "learning_rate": 7.330994741390293e-05, "loss": 0.8573, "step": 108240 }, { "epoch": 0.6915783959214443, "grad_norm": 1.346864938735962, "learning_rate": 7.330550825142156e-05, "loss": 0.7394, "step": 108250 }, { "epoch": 0.691642283071183, "grad_norm": 1.4455012083053589, "learning_rate": 7.330106885423353e-05, "loss": 0.8614, "step": 108260 }, { "epoch": 0.6917061702209217, "grad_norm": 0.7791756391525269, "learning_rate": 7.32966292223836e-05, "loss": 0.7627, "step": 108270 }, { "epoch": 0.6917700573706604, "grad_norm": 0.8995997905731201, "learning_rate": 7.329218935591645e-05, "loss": 0.8276, "step": 108280 }, { "epoch": 0.6918339445203991, "grad_norm": 0.9824413657188416, "learning_rate": 7.328774925487679e-05, "loss": 0.9905, "step": 108290 }, { "epoch": 0.6918978316701379, "grad_norm": 0.9453624486923218, "learning_rate": 7.328330891930937e-05, "loss": 0.9079, "step": 108300 }, { "epoch": 0.6919617188198766, "grad_norm": 0.9004096388816833, "learning_rate": 7.327886834925888e-05, "loss": 0.9236, "step": 108310 }, { "epoch": 0.6920256059696153, "grad_norm": 0.7478508353233337, "learning_rate": 7.327442754477003e-05, "loss": 0.8575, "step": 108320 }, { "epoch": 0.692089493119354, "grad_norm": 2.181452751159668, "learning_rate": 7.326998650588758e-05, "loss": 0.7738, "step": 108330 }, { "epoch": 0.6921533802690927, "grad_norm": 1.4748575687408447, "learning_rate": 7.326554523265624e-05, "loss": 1.3507, "step": 108340 }, { "epoch": 0.6922172674188314, "grad_norm": 1.0010013580322266, "learning_rate": 7.326110372512071e-05, "loss": 0.8854, "step": 108350 }, { "epoch": 0.6922811545685701, "grad_norm": 0.87949138879776, "learning_rate": 7.325666198332575e-05, "loss": 0.746, "step": 108360 }, { "epoch": 0.6923450417183088, "grad_norm": 0.8844693303108215, "learning_rate": 7.325222000731609e-05, "loss": 0.9919, "step": 108370 }, { "epoch": 0.6924089288680475, "grad_norm": 1.2705687284469604, "learning_rate": 7.324777779713644e-05, "loss": 0.9765, "step": 108380 }, { "epoch": 0.6924728160177862, "grad_norm": 0.8071838021278381, "learning_rate": 7.324333535283157e-05, "loss": 0.837, "step": 108390 }, { "epoch": 0.6925367031675249, "grad_norm": 0.9001646637916565, "learning_rate": 7.323889267444621e-05, "loss": 0.846, "step": 108400 }, { "epoch": 0.6926005903172636, "grad_norm": 0.9376798272132874, "learning_rate": 7.323444976202508e-05, "loss": 0.7456, "step": 108410 }, { "epoch": 0.6926644774670023, "grad_norm": 0.8280836939811707, "learning_rate": 7.323000661561295e-05, "loss": 0.9753, "step": 108420 }, { "epoch": 0.692728364616741, "grad_norm": 1.4879751205444336, "learning_rate": 7.322556323525456e-05, "loss": 0.9096, "step": 108430 }, { "epoch": 0.6927922517664797, "grad_norm": 1.0255200862884521, "learning_rate": 7.322111962099465e-05, "loss": 0.9377, "step": 108440 }, { "epoch": 0.6928561389162184, "grad_norm": 0.9533114433288574, "learning_rate": 7.321667577287799e-05, "loss": 0.7927, "step": 108450 }, { "epoch": 0.692920026065957, "grad_norm": 0.7866392731666565, "learning_rate": 7.32122316909493e-05, "loss": 0.9866, "step": 108460 }, { "epoch": 0.6929839132156957, "grad_norm": 1.0992743968963623, "learning_rate": 7.320778737525335e-05, "loss": 0.7761, "step": 108470 }, { "epoch": 0.6930478003654345, "grad_norm": 0.9191528558731079, "learning_rate": 7.320334282583492e-05, "loss": 0.8788, "step": 108480 }, { "epoch": 0.6931116875151732, "grad_norm": 1.2555981874465942, "learning_rate": 7.319889804273876e-05, "loss": 0.9633, "step": 108490 }, { "epoch": 0.6931755746649119, "grad_norm": 0.8771397471427917, "learning_rate": 7.319445302600961e-05, "loss": 0.615, "step": 108500 }, { "epoch": 0.6932394618146506, "grad_norm": 0.714777946472168, "learning_rate": 7.319000777569226e-05, "loss": 0.7238, "step": 108510 }, { "epoch": 0.6933033489643893, "grad_norm": 1.2296061515808105, "learning_rate": 7.318556229183146e-05, "loss": 0.7767, "step": 108520 }, { "epoch": 0.693367236114128, "grad_norm": 0.7856013178825378, "learning_rate": 7.3181116574472e-05, "loss": 0.692, "step": 108530 }, { "epoch": 0.6934311232638667, "grad_norm": 0.9102780818939209, "learning_rate": 7.317667062365863e-05, "loss": 0.9865, "step": 108540 }, { "epoch": 0.6934950104136054, "grad_norm": 1.0297400951385498, "learning_rate": 7.317222443943616e-05, "loss": 0.9191, "step": 108550 }, { "epoch": 0.6935588975633441, "grad_norm": 1.809927225112915, "learning_rate": 7.316777802184934e-05, "loss": 1.084, "step": 108560 }, { "epoch": 0.6936227847130828, "grad_norm": 2.1884663105010986, "learning_rate": 7.316333137094294e-05, "loss": 0.8257, "step": 108570 }, { "epoch": 0.6936866718628215, "grad_norm": 0.8382952213287354, "learning_rate": 7.315888448676175e-05, "loss": 0.8348, "step": 108580 }, { "epoch": 0.6937505590125602, "grad_norm": 0.7834774851799011, "learning_rate": 7.315443736935056e-05, "loss": 0.8987, "step": 108590 }, { "epoch": 0.6938144461622989, "grad_norm": 0.710081934928894, "learning_rate": 7.314999001875415e-05, "loss": 0.6713, "step": 108600 }, { "epoch": 0.6938783333120376, "grad_norm": 0.9444938898086548, "learning_rate": 7.314554243501732e-05, "loss": 0.9177, "step": 108610 }, { "epoch": 0.6939422204617763, "grad_norm": 0.6890098452568054, "learning_rate": 7.314109461818485e-05, "loss": 0.9145, "step": 108620 }, { "epoch": 0.694006107611515, "grad_norm": 0.9023224115371704, "learning_rate": 7.313664656830154e-05, "loss": 1.0199, "step": 108630 }, { "epoch": 0.6940699947612538, "grad_norm": 0.6425119638442993, "learning_rate": 7.31321982854122e-05, "loss": 0.862, "step": 108640 }, { "epoch": 0.6941338819109925, "grad_norm": 1.188393473625183, "learning_rate": 7.312774976956159e-05, "loss": 0.801, "step": 108650 }, { "epoch": 0.6941977690607312, "grad_norm": 0.7165592908859253, "learning_rate": 7.312330102079454e-05, "loss": 1.3727, "step": 108660 }, { "epoch": 0.6942616562104699, "grad_norm": 0.6589129567146301, "learning_rate": 7.311885203915585e-05, "loss": 0.8308, "step": 108670 }, { "epoch": 0.6943255433602086, "grad_norm": 1.0794988870620728, "learning_rate": 7.31144028246903e-05, "loss": 0.9766, "step": 108680 }, { "epoch": 0.6943894305099473, "grad_norm": 1.6722362041473389, "learning_rate": 7.310995337744271e-05, "loss": 0.8217, "step": 108690 }, { "epoch": 0.6944533176596859, "grad_norm": 2.617365598678589, "learning_rate": 7.310550369745793e-05, "loss": 0.8649, "step": 108700 }, { "epoch": 0.6945172048094246, "grad_norm": 1.0052344799041748, "learning_rate": 7.310105378478071e-05, "loss": 0.8908, "step": 108710 }, { "epoch": 0.6945810919591633, "grad_norm": 0.6017476320266724, "learning_rate": 7.309660363945592e-05, "loss": 0.8932, "step": 108720 }, { "epoch": 0.694644979108902, "grad_norm": 1.1323217153549194, "learning_rate": 7.309215326152833e-05, "loss": 0.9389, "step": 108730 }, { "epoch": 0.6947088662586407, "grad_norm": 1.0148589611053467, "learning_rate": 7.308770265104279e-05, "loss": 0.8976, "step": 108740 }, { "epoch": 0.6947727534083794, "grad_norm": 1.195841670036316, "learning_rate": 7.30832518080441e-05, "loss": 1.0468, "step": 108750 }, { "epoch": 0.6948366405581181, "grad_norm": 2.77616810798645, "learning_rate": 7.307880073257711e-05, "loss": 0.8265, "step": 108760 }, { "epoch": 0.6949005277078568, "grad_norm": 0.820035457611084, "learning_rate": 7.30743494246866e-05, "loss": 1.0138, "step": 108770 }, { "epoch": 0.6949644148575955, "grad_norm": 0.768181324005127, "learning_rate": 7.306989788441747e-05, "loss": 0.896, "step": 108780 }, { "epoch": 0.6950283020073342, "grad_norm": 0.9276620745658875, "learning_rate": 7.306544611181449e-05, "loss": 0.9899, "step": 108790 }, { "epoch": 0.6950921891570729, "grad_norm": 1.3727481365203857, "learning_rate": 7.306099410692251e-05, "loss": 0.9883, "step": 108800 }, { "epoch": 0.6951560763068116, "grad_norm": 0.6537569165229797, "learning_rate": 7.305654186978636e-05, "loss": 0.7696, "step": 108810 }, { "epoch": 0.6952199634565503, "grad_norm": 0.8590995669364929, "learning_rate": 7.30520894004509e-05, "loss": 0.9809, "step": 108820 }, { "epoch": 0.6952838506062891, "grad_norm": 0.9551057815551758, "learning_rate": 7.304763669896096e-05, "loss": 0.9619, "step": 108830 }, { "epoch": 0.6953477377560278, "grad_norm": 0.8596848845481873, "learning_rate": 7.304318376536138e-05, "loss": 0.8957, "step": 108840 }, { "epoch": 0.6954116249057665, "grad_norm": 1.1509318351745605, "learning_rate": 7.3038730599697e-05, "loss": 0.6727, "step": 108850 }, { "epoch": 0.6954755120555052, "grad_norm": 0.5256636142730713, "learning_rate": 7.303427720201265e-05, "loss": 0.7634, "step": 108860 }, { "epoch": 0.6955393992052439, "grad_norm": 0.8332456350326538, "learning_rate": 7.302982357235323e-05, "loss": 1.3683, "step": 108870 }, { "epoch": 0.6956032863549826, "grad_norm": 0.7100444436073303, "learning_rate": 7.302536971076355e-05, "loss": 0.8936, "step": 108880 }, { "epoch": 0.6956671735047213, "grad_norm": 1.0301616191864014, "learning_rate": 7.302091561728848e-05, "loss": 0.784, "step": 108890 }, { "epoch": 0.69573106065446, "grad_norm": 0.8167005777359009, "learning_rate": 7.301646129197289e-05, "loss": 1.0153, "step": 108900 }, { "epoch": 0.6957949478041987, "grad_norm": 0.6708621382713318, "learning_rate": 7.30120067348616e-05, "loss": 0.9037, "step": 108910 }, { "epoch": 0.6958588349539374, "grad_norm": 1.8930144309997559, "learning_rate": 7.30075519459995e-05, "loss": 0.9704, "step": 108920 }, { "epoch": 0.6959227221036761, "grad_norm": 0.9844603538513184, "learning_rate": 7.300309692543145e-05, "loss": 1.0861, "step": 108930 }, { "epoch": 0.6959866092534148, "grad_norm": 0.9566649198532104, "learning_rate": 7.299864167320232e-05, "loss": 1.0209, "step": 108940 }, { "epoch": 0.6960504964031534, "grad_norm": 0.9092232584953308, "learning_rate": 7.299418618935695e-05, "loss": 1.0676, "step": 108950 }, { "epoch": 0.6961143835528921, "grad_norm": 0.7573904395103455, "learning_rate": 7.298973047394025e-05, "loss": 0.7415, "step": 108960 }, { "epoch": 0.6961782707026308, "grad_norm": 1.1252961158752441, "learning_rate": 7.298527452699708e-05, "loss": 1.0561, "step": 108970 }, { "epoch": 0.6962421578523695, "grad_norm": 0.7041053175926208, "learning_rate": 7.298081834857229e-05, "loss": 0.9674, "step": 108980 }, { "epoch": 0.6963060450021082, "grad_norm": 0.9071682095527649, "learning_rate": 7.29763619387108e-05, "loss": 0.9063, "step": 108990 }, { "epoch": 0.696369932151847, "grad_norm": 0.698070228099823, "learning_rate": 7.297190529745746e-05, "loss": 0.8875, "step": 109000 }, { "epoch": 0.6964338193015857, "grad_norm": 0.9515412449836731, "learning_rate": 7.296744842485715e-05, "loss": 0.8703, "step": 109010 }, { "epoch": 0.6964977064513244, "grad_norm": 1.2427845001220703, "learning_rate": 7.296299132095478e-05, "loss": 0.9569, "step": 109020 }, { "epoch": 0.6965615936010631, "grad_norm": 0.5841128826141357, "learning_rate": 7.295853398579521e-05, "loss": 0.9137, "step": 109030 }, { "epoch": 0.6966254807508018, "grad_norm": 0.5396087765693665, "learning_rate": 7.295407641942334e-05, "loss": 0.7979, "step": 109040 }, { "epoch": 0.6966893679005405, "grad_norm": 0.7131836414337158, "learning_rate": 7.294961862188407e-05, "loss": 1.0448, "step": 109050 }, { "epoch": 0.6967532550502792, "grad_norm": 1.0554966926574707, "learning_rate": 7.29451605932223e-05, "loss": 0.92, "step": 109060 }, { "epoch": 0.6968171422000179, "grad_norm": 0.7954362630844116, "learning_rate": 7.294070233348289e-05, "loss": 0.841, "step": 109070 }, { "epoch": 0.6968810293497566, "grad_norm": 0.8883830308914185, "learning_rate": 7.293624384271076e-05, "loss": 0.7748, "step": 109080 }, { "epoch": 0.6969449164994953, "grad_norm": 1.4885032176971436, "learning_rate": 7.293178512095082e-05, "loss": 0.8115, "step": 109090 }, { "epoch": 0.697008803649234, "grad_norm": 0.9093277454376221, "learning_rate": 7.292732616824797e-05, "loss": 0.8182, "step": 109100 }, { "epoch": 0.6970726907989727, "grad_norm": 0.9241993427276611, "learning_rate": 7.29228669846471e-05, "loss": 0.7127, "step": 109110 }, { "epoch": 0.6971365779487114, "grad_norm": 0.6447529792785645, "learning_rate": 7.291840757019314e-05, "loss": 0.8501, "step": 109120 }, { "epoch": 0.6972004650984501, "grad_norm": 0.7052245736122131, "learning_rate": 7.291394792493098e-05, "loss": 0.947, "step": 109130 }, { "epoch": 0.6972643522481888, "grad_norm": 1.3450639247894287, "learning_rate": 7.290948804890555e-05, "loss": 0.7365, "step": 109140 }, { "epoch": 0.6973282393979275, "grad_norm": 0.5776755213737488, "learning_rate": 7.290502794216173e-05, "loss": 0.7275, "step": 109150 }, { "epoch": 0.6973921265476662, "grad_norm": 0.8304409980773926, "learning_rate": 7.290056760474448e-05, "loss": 0.9387, "step": 109160 }, { "epoch": 0.697456013697405, "grad_norm": 0.8991537690162659, "learning_rate": 7.289610703669872e-05, "loss": 0.7778, "step": 109170 }, { "epoch": 0.6975199008471437, "grad_norm": 0.8365470170974731, "learning_rate": 7.289164623806933e-05, "loss": 0.8706, "step": 109180 }, { "epoch": 0.6975837879968823, "grad_norm": 0.855769157409668, "learning_rate": 7.288718520890127e-05, "loss": 0.7282, "step": 109190 }, { "epoch": 0.697647675146621, "grad_norm": 0.7348789572715759, "learning_rate": 7.288272394923945e-05, "loss": 1.0745, "step": 109200 }, { "epoch": 0.6977115622963597, "grad_norm": 1.0957111120224, "learning_rate": 7.287826245912879e-05, "loss": 0.7343, "step": 109210 }, { "epoch": 0.6977754494460984, "grad_norm": 0.8726381063461304, "learning_rate": 7.287380073861425e-05, "loss": 1.0231, "step": 109220 }, { "epoch": 0.6978393365958371, "grad_norm": 0.6815057992935181, "learning_rate": 7.286933878774075e-05, "loss": 0.8475, "step": 109230 }, { "epoch": 0.6979032237455758, "grad_norm": 1.1125048398971558, "learning_rate": 7.286487660655323e-05, "loss": 0.8779, "step": 109240 }, { "epoch": 0.6979671108953145, "grad_norm": 0.725688636302948, "learning_rate": 7.28604141950966e-05, "loss": 0.9028, "step": 109250 }, { "epoch": 0.6980309980450532, "grad_norm": 0.8986996412277222, "learning_rate": 7.285595155341583e-05, "loss": 0.9237, "step": 109260 }, { "epoch": 0.6980948851947919, "grad_norm": 0.9736185073852539, "learning_rate": 7.285148868155587e-05, "loss": 0.8967, "step": 109270 }, { "epoch": 0.6981587723445306, "grad_norm": 1.0567455291748047, "learning_rate": 7.284702557956165e-05, "loss": 1.0126, "step": 109280 }, { "epoch": 0.6982226594942693, "grad_norm": 1.032707691192627, "learning_rate": 7.28425622474781e-05, "loss": 1.0844, "step": 109290 }, { "epoch": 0.698286546644008, "grad_norm": 0.6320337653160095, "learning_rate": 7.283809868535018e-05, "loss": 0.745, "step": 109300 }, { "epoch": 0.6983504337937467, "grad_norm": 0.7750630974769592, "learning_rate": 7.283363489322287e-05, "loss": 1.0077, "step": 109310 }, { "epoch": 0.6984143209434854, "grad_norm": 0.7525535821914673, "learning_rate": 7.282917087114109e-05, "loss": 0.9631, "step": 109320 }, { "epoch": 0.6984782080932241, "grad_norm": 0.903925895690918, "learning_rate": 7.282470661914982e-05, "loss": 1.0631, "step": 109330 }, { "epoch": 0.6985420952429628, "grad_norm": 0.6858085989952087, "learning_rate": 7.282024213729399e-05, "loss": 0.9775, "step": 109340 }, { "epoch": 0.6986059823927016, "grad_norm": 1.176261067390442, "learning_rate": 7.28157774256186e-05, "loss": 0.8382, "step": 109350 }, { "epoch": 0.6986698695424403, "grad_norm": 0.7239077091217041, "learning_rate": 7.281131248416858e-05, "loss": 0.8858, "step": 109360 }, { "epoch": 0.698733756692179, "grad_norm": 1.3246084451675415, "learning_rate": 7.280684731298892e-05, "loss": 0.8572, "step": 109370 }, { "epoch": 0.6987976438419177, "grad_norm": 0.7234712839126587, "learning_rate": 7.280238191212455e-05, "loss": 0.7359, "step": 109380 }, { "epoch": 0.6988615309916564, "grad_norm": 1.1668168306350708, "learning_rate": 7.27979162816205e-05, "loss": 0.8897, "step": 109390 }, { "epoch": 0.6989254181413951, "grad_norm": 0.9035739302635193, "learning_rate": 7.279345042152167e-05, "loss": 0.8598, "step": 109400 }, { "epoch": 0.6989893052911338, "grad_norm": 0.9039598107337952, "learning_rate": 7.278898433187311e-05, "loss": 0.9865, "step": 109410 }, { "epoch": 0.6990531924408725, "grad_norm": 0.9996391534805298, "learning_rate": 7.278451801271975e-05, "loss": 0.7356, "step": 109420 }, { "epoch": 0.6991170795906111, "grad_norm": 0.8987241983413696, "learning_rate": 7.27800514641066e-05, "loss": 0.699, "step": 109430 }, { "epoch": 0.6991809667403498, "grad_norm": 0.9513826370239258, "learning_rate": 7.27755846860786e-05, "loss": 1.0641, "step": 109440 }, { "epoch": 0.6992448538900885, "grad_norm": 1.103652000427246, "learning_rate": 7.277111767868076e-05, "loss": 0.7386, "step": 109450 }, { "epoch": 0.6993087410398272, "grad_norm": 0.7837316393852234, "learning_rate": 7.276665044195808e-05, "loss": 0.8191, "step": 109460 }, { "epoch": 0.6993726281895659, "grad_norm": 0.8951888680458069, "learning_rate": 7.276218297595553e-05, "loss": 1.0341, "step": 109470 }, { "epoch": 0.6994365153393046, "grad_norm": 0.8686550259590149, "learning_rate": 7.275771528071811e-05, "loss": 0.9451, "step": 109480 }, { "epoch": 0.6995004024890433, "grad_norm": 0.7066924571990967, "learning_rate": 7.27532473562908e-05, "loss": 0.867, "step": 109490 }, { "epoch": 0.699564289638782, "grad_norm": 1.1765514612197876, "learning_rate": 7.274877920271861e-05, "loss": 0.8446, "step": 109500 }, { "epoch": 0.6996281767885207, "grad_norm": 1.2923158407211304, "learning_rate": 7.274431082004652e-05, "loss": 0.6812, "step": 109510 }, { "epoch": 0.6996920639382594, "grad_norm": 1.4523509740829468, "learning_rate": 7.273984220831956e-05, "loss": 0.7639, "step": 109520 }, { "epoch": 0.6997559510879982, "grad_norm": 1.0498130321502686, "learning_rate": 7.273537336758272e-05, "loss": 0.6902, "step": 109530 }, { "epoch": 0.6998198382377369, "grad_norm": 0.7311170697212219, "learning_rate": 7.273090429788098e-05, "loss": 0.7766, "step": 109540 }, { "epoch": 0.6998837253874756, "grad_norm": 1.5511587858200073, "learning_rate": 7.272643499925937e-05, "loss": 0.8909, "step": 109550 }, { "epoch": 0.6999476125372143, "grad_norm": 0.810479998588562, "learning_rate": 7.27219654717629e-05, "loss": 1.0596, "step": 109560 }, { "epoch": 0.700011499686953, "grad_norm": 0.8483265042304993, "learning_rate": 7.27174957154366e-05, "loss": 0.5923, "step": 109570 }, { "epoch": 0.7000753868366917, "grad_norm": 1.1115506887435913, "learning_rate": 7.271302573032546e-05, "loss": 0.7011, "step": 109580 }, { "epoch": 0.7001392739864304, "grad_norm": 0.8002986907958984, "learning_rate": 7.270855551647449e-05, "loss": 1.025, "step": 109590 }, { "epoch": 0.7002031611361691, "grad_norm": 0.8855366110801697, "learning_rate": 7.270408507392872e-05, "loss": 0.7358, "step": 109600 }, { "epoch": 0.7002670482859078, "grad_norm": 1.6254435777664185, "learning_rate": 7.269961440273317e-05, "loss": 1.0024, "step": 109610 }, { "epoch": 0.7003309354356465, "grad_norm": 0.809699535369873, "learning_rate": 7.269514350293287e-05, "loss": 0.8733, "step": 109620 }, { "epoch": 0.7003948225853852, "grad_norm": 1.2254425287246704, "learning_rate": 7.269111949769275e-05, "loss": 1.0453, "step": 109630 }, { "epoch": 0.7004587097351239, "grad_norm": 0.8027240037918091, "learning_rate": 7.268664816366747e-05, "loss": 0.7901, "step": 109640 }, { "epoch": 0.7005225968848626, "grad_norm": 0.5763524770736694, "learning_rate": 7.268217660116801e-05, "loss": 0.9035, "step": 109650 }, { "epoch": 0.7005864840346013, "grad_norm": 0.8503153920173645, "learning_rate": 7.267770481023941e-05, "loss": 1.1969, "step": 109660 }, { "epoch": 0.70065037118434, "grad_norm": 1.0496258735656738, "learning_rate": 7.26732327909267e-05, "loss": 0.8281, "step": 109670 }, { "epoch": 0.7007142583340786, "grad_norm": 0.9994955658912659, "learning_rate": 7.266876054327491e-05, "loss": 1.0602, "step": 109680 }, { "epoch": 0.7007781454838173, "grad_norm": 0.8883937001228333, "learning_rate": 7.266428806732913e-05, "loss": 1.1059, "step": 109690 }, { "epoch": 0.700842032633556, "grad_norm": 0.8434162735939026, "learning_rate": 7.265981536313432e-05, "loss": 0.7179, "step": 109700 }, { "epoch": 0.7009059197832948, "grad_norm": 0.8952722549438477, "learning_rate": 7.265534243073558e-05, "loss": 0.831, "step": 109710 }, { "epoch": 0.7009698069330335, "grad_norm": 0.49630191922187805, "learning_rate": 7.265086927017795e-05, "loss": 0.8177, "step": 109720 }, { "epoch": 0.7010336940827722, "grad_norm": 1.5639264583587646, "learning_rate": 7.264639588150646e-05, "loss": 0.9209, "step": 109730 }, { "epoch": 0.7010975812325109, "grad_norm": 0.8661503791809082, "learning_rate": 7.264192226476617e-05, "loss": 0.7273, "step": 109740 }, { "epoch": 0.7011614683822496, "grad_norm": 1.6213655471801758, "learning_rate": 7.263744842000214e-05, "loss": 1.4111, "step": 109750 }, { "epoch": 0.7012253555319883, "grad_norm": 1.2722123861312866, "learning_rate": 7.263297434725941e-05, "loss": 0.9177, "step": 109760 }, { "epoch": 0.701289242681727, "grad_norm": 0.9025132060050964, "learning_rate": 7.262850004658308e-05, "loss": 0.8063, "step": 109770 }, { "epoch": 0.7013531298314657, "grad_norm": 0.753537654876709, "learning_rate": 7.262402551801815e-05, "loss": 0.7072, "step": 109780 }, { "epoch": 0.7014170169812044, "grad_norm": 0.7488612532615662, "learning_rate": 7.261955076160972e-05, "loss": 0.9609, "step": 109790 }, { "epoch": 0.7014809041309431, "grad_norm": 0.7096448540687561, "learning_rate": 7.261507577740283e-05, "loss": 1.1737, "step": 109800 }, { "epoch": 0.7015447912806818, "grad_norm": 1.065198540687561, "learning_rate": 7.261060056544258e-05, "loss": 0.9114, "step": 109810 }, { "epoch": 0.7016086784304205, "grad_norm": 0.7157565951347351, "learning_rate": 7.260612512577402e-05, "loss": 0.7947, "step": 109820 }, { "epoch": 0.7016725655801592, "grad_norm": 0.6602898240089417, "learning_rate": 7.260164945844222e-05, "loss": 0.8586, "step": 109830 }, { "epoch": 0.7017364527298979, "grad_norm": 1.0240232944488525, "learning_rate": 7.259717356349224e-05, "loss": 0.8433, "step": 109840 }, { "epoch": 0.7018003398796366, "grad_norm": 0.7069511413574219, "learning_rate": 7.25926974409692e-05, "loss": 0.7815, "step": 109850 }, { "epoch": 0.7018642270293753, "grad_norm": 0.8306097984313965, "learning_rate": 7.258822109091813e-05, "loss": 0.8288, "step": 109860 }, { "epoch": 0.701928114179114, "grad_norm": 0.46350932121276855, "learning_rate": 7.258374451338415e-05, "loss": 0.846, "step": 109870 }, { "epoch": 0.7019920013288528, "grad_norm": 0.7333908677101135, "learning_rate": 7.257926770841231e-05, "loss": 0.863, "step": 109880 }, { "epoch": 0.7020558884785915, "grad_norm": 1.8804274797439575, "learning_rate": 7.25747906760477e-05, "loss": 1.2467, "step": 109890 }, { "epoch": 0.7021197756283302, "grad_norm": 1.2987992763519287, "learning_rate": 7.257031341633545e-05, "loss": 0.8424, "step": 109900 }, { "epoch": 0.7021836627780689, "grad_norm": 0.5555353164672852, "learning_rate": 7.25658359293206e-05, "loss": 0.6798, "step": 109910 }, { "epoch": 0.7022475499278075, "grad_norm": 1.0028846263885498, "learning_rate": 7.256135821504827e-05, "loss": 0.8265, "step": 109920 }, { "epoch": 0.7023114370775462, "grad_norm": 0.8981877565383911, "learning_rate": 7.255688027356353e-05, "loss": 1.0722, "step": 109930 }, { "epoch": 0.7023753242272849, "grad_norm": 0.9332131147384644, "learning_rate": 7.25524021049115e-05, "loss": 0.8057, "step": 109940 }, { "epoch": 0.7024392113770236, "grad_norm": 0.8092306852340698, "learning_rate": 7.254792370913728e-05, "loss": 0.9814, "step": 109950 }, { "epoch": 0.7025030985267623, "grad_norm": 0.5844110250473022, "learning_rate": 7.254344508628594e-05, "loss": 0.9691, "step": 109960 }, { "epoch": 0.702566985676501, "grad_norm": 0.7450523972511292, "learning_rate": 7.253896623640262e-05, "loss": 1.0341, "step": 109970 }, { "epoch": 0.7026308728262397, "grad_norm": 1.2225341796875, "learning_rate": 7.253448715953241e-05, "loss": 0.9289, "step": 109980 }, { "epoch": 0.7026947599759784, "grad_norm": 0.8283683061599731, "learning_rate": 7.25300078557204e-05, "loss": 0.9794, "step": 109990 }, { "epoch": 0.7027586471257171, "grad_norm": 1.274552345275879, "learning_rate": 7.252552832501174e-05, "loss": 1.3319, "step": 110000 }, { "epoch": 0.7028225342754558, "grad_norm": 1.2938295602798462, "learning_rate": 7.252104856745153e-05, "loss": 1.0077, "step": 110010 }, { "epoch": 0.7028864214251945, "grad_norm": 0.6301164031028748, "learning_rate": 7.251656858308484e-05, "loss": 0.7968, "step": 110020 }, { "epoch": 0.7029503085749332, "grad_norm": 1.0087745189666748, "learning_rate": 7.251208837195686e-05, "loss": 0.9097, "step": 110030 }, { "epoch": 0.7030141957246719, "grad_norm": 1.1381967067718506, "learning_rate": 7.250760793411265e-05, "loss": 0.8822, "step": 110040 }, { "epoch": 0.7030780828744106, "grad_norm": 1.071937918663025, "learning_rate": 7.250312726959739e-05, "loss": 0.8275, "step": 110050 }, { "epoch": 0.7031419700241494, "grad_norm": 0.9654282927513123, "learning_rate": 7.249864637845614e-05, "loss": 0.8608, "step": 110060 }, { "epoch": 0.7032058571738881, "grad_norm": 0.739723801612854, "learning_rate": 7.249416526073405e-05, "loss": 0.6858, "step": 110070 }, { "epoch": 0.7032697443236268, "grad_norm": 0.9041827917098999, "learning_rate": 7.248968391647628e-05, "loss": 0.8474, "step": 110080 }, { "epoch": 0.7033336314733655, "grad_norm": 0.9711044430732727, "learning_rate": 7.248520234572794e-05, "loss": 0.7781, "step": 110090 }, { "epoch": 0.7033975186231042, "grad_norm": 0.8251720666885376, "learning_rate": 7.248072054853414e-05, "loss": 1.1387, "step": 110100 }, { "epoch": 0.7034614057728429, "grad_norm": 0.7342681288719177, "learning_rate": 7.247623852494005e-05, "loss": 0.8043, "step": 110110 }, { "epoch": 0.7035252929225816, "grad_norm": 0.8310518264770508, "learning_rate": 7.247175627499078e-05, "loss": 0.935, "step": 110120 }, { "epoch": 0.7035891800723203, "grad_norm": 0.8513674736022949, "learning_rate": 7.24672737987315e-05, "loss": 1.1539, "step": 110130 }, { "epoch": 0.703653067222059, "grad_norm": 1.232692003250122, "learning_rate": 7.246279109620733e-05, "loss": 0.9609, "step": 110140 }, { "epoch": 0.7037169543717977, "grad_norm": 0.6645367741584778, "learning_rate": 7.245830816746342e-05, "loss": 0.8672, "step": 110150 }, { "epoch": 0.7037808415215363, "grad_norm": 0.7718523740768433, "learning_rate": 7.245382501254491e-05, "loss": 0.8136, "step": 110160 }, { "epoch": 0.703844728671275, "grad_norm": 0.9974465370178223, "learning_rate": 7.244934163149697e-05, "loss": 0.8419, "step": 110170 }, { "epoch": 0.7039086158210137, "grad_norm": 1.4745326042175293, "learning_rate": 7.244485802436472e-05, "loss": 0.7839, "step": 110180 }, { "epoch": 0.7039725029707524, "grad_norm": 0.7670671343803406, "learning_rate": 7.244037419119333e-05, "loss": 0.7499, "step": 110190 }, { "epoch": 0.7040363901204911, "grad_norm": 0.8539519906044006, "learning_rate": 7.243589013202799e-05, "loss": 0.9251, "step": 110200 }, { "epoch": 0.7041002772702298, "grad_norm": 0.8276684284210205, "learning_rate": 7.24314058469138e-05, "loss": 1.0614, "step": 110210 }, { "epoch": 0.7041641644199685, "grad_norm": 1.3117198944091797, "learning_rate": 7.242692133589596e-05, "loss": 0.8776, "step": 110220 }, { "epoch": 0.7042280515697072, "grad_norm": 1.0242599248886108, "learning_rate": 7.242243659901961e-05, "loss": 0.7777, "step": 110230 }, { "epoch": 0.704291938719446, "grad_norm": 0.7861204743385315, "learning_rate": 7.241795163632994e-05, "loss": 0.9854, "step": 110240 }, { "epoch": 0.7043558258691847, "grad_norm": 1.0489882230758667, "learning_rate": 7.241346644787208e-05, "loss": 0.9031, "step": 110250 }, { "epoch": 0.7044197130189234, "grad_norm": 1.8665564060211182, "learning_rate": 7.240898103369124e-05, "loss": 0.762, "step": 110260 }, { "epoch": 0.7044836001686621, "grad_norm": 0.9396441578865051, "learning_rate": 7.240449539383257e-05, "loss": 1.0817, "step": 110270 }, { "epoch": 0.7045474873184008, "grad_norm": 0.686319887638092, "learning_rate": 7.240000952834125e-05, "loss": 1.1644, "step": 110280 }, { "epoch": 0.7046113744681395, "grad_norm": 0.7401465773582458, "learning_rate": 7.239552343726246e-05, "loss": 0.7316, "step": 110290 }, { "epoch": 0.7046752616178782, "grad_norm": 1.3043559789657593, "learning_rate": 7.239103712064136e-05, "loss": 1.0058, "step": 110300 }, { "epoch": 0.7047391487676169, "grad_norm": 0.9462155699729919, "learning_rate": 7.238655057852314e-05, "loss": 0.9211, "step": 110310 }, { "epoch": 0.7048030359173556, "grad_norm": 0.9175712466239929, "learning_rate": 7.238206381095302e-05, "loss": 0.9743, "step": 110320 }, { "epoch": 0.7048669230670943, "grad_norm": 1.9088410139083862, "learning_rate": 7.237757681797613e-05, "loss": 0.9467, "step": 110330 }, { "epoch": 0.704930810216833, "grad_norm": 0.5652353763580322, "learning_rate": 7.237308959963769e-05, "loss": 0.7986, "step": 110340 }, { "epoch": 0.7049946973665717, "grad_norm": 0.8433083891868591, "learning_rate": 7.236860215598288e-05, "loss": 0.8457, "step": 110350 }, { "epoch": 0.7050585845163104, "grad_norm": 0.7066556811332703, "learning_rate": 7.236411448705689e-05, "loss": 0.7944, "step": 110360 }, { "epoch": 0.7051224716660491, "grad_norm": NaN, "learning_rate": 7.2360075392454e-05, "loss": 0.784, "step": 110370 }, { "epoch": 0.7051863588157878, "grad_norm": 0.7894417643547058, "learning_rate": 7.23555872956373e-05, "loss": 0.6449, "step": 110380 }, { "epoch": 0.7052502459655265, "grad_norm": 1.0463151931762695, "learning_rate": 7.235109897368049e-05, "loss": 0.8735, "step": 110390 }, { "epoch": 0.7053141331152651, "grad_norm": 0.7032953500747681, "learning_rate": 7.234661042662877e-05, "loss": 0.8867, "step": 110400 }, { "epoch": 0.7053780202650038, "grad_norm": 0.71566241979599, "learning_rate": 7.234212165452736e-05, "loss": 0.7895, "step": 110410 }, { "epoch": 0.7054419074147426, "grad_norm": 2.098254680633545, "learning_rate": 7.233763265742146e-05, "loss": 1.1398, "step": 110420 }, { "epoch": 0.7055057945644813, "grad_norm": 0.8573238253593445, "learning_rate": 7.233314343535627e-05, "loss": 0.778, "step": 110430 }, { "epoch": 0.70556968171422, "grad_norm": 0.5292240381240845, "learning_rate": 7.2328653988377e-05, "loss": 0.7799, "step": 110440 }, { "epoch": 0.7056335688639587, "grad_norm": 0.9991651773452759, "learning_rate": 7.232416431652887e-05, "loss": 0.9966, "step": 110450 }, { "epoch": 0.7056974560136974, "grad_norm": 0.45349401235580444, "learning_rate": 7.23196744198571e-05, "loss": 0.6847, "step": 110460 }, { "epoch": 0.7057613431634361, "grad_norm": 0.9464260935783386, "learning_rate": 7.231518429840689e-05, "loss": 0.979, "step": 110470 }, { "epoch": 0.7058252303131748, "grad_norm": 1.0189330577850342, "learning_rate": 7.231069395222347e-05, "loss": 0.8901, "step": 110480 }, { "epoch": 0.7058891174629135, "grad_norm": 1.015156865119934, "learning_rate": 7.230620338135205e-05, "loss": 0.5721, "step": 110490 }, { "epoch": 0.7059530046126522, "grad_norm": 2.3729195594787598, "learning_rate": 7.230171258583788e-05, "loss": 0.8536, "step": 110500 }, { "epoch": 0.7060168917623909, "grad_norm": 0.6549795269966125, "learning_rate": 7.229722156572616e-05, "loss": 0.743, "step": 110510 }, { "epoch": 0.7060807789121296, "grad_norm": 0.968298614025116, "learning_rate": 7.229273032106214e-05, "loss": 0.965, "step": 110520 }, { "epoch": 0.7061446660618683, "grad_norm": 1.6288883686065674, "learning_rate": 7.228823885189103e-05, "loss": 0.9272, "step": 110530 }, { "epoch": 0.706208553211607, "grad_norm": 0.7440875172615051, "learning_rate": 7.228374715825807e-05, "loss": 1.0698, "step": 110540 }, { "epoch": 0.7062724403613457, "grad_norm": 0.9549334645271301, "learning_rate": 7.227925524020853e-05, "loss": 0.8151, "step": 110550 }, { "epoch": 0.7063363275110844, "grad_norm": 1.4166960716247559, "learning_rate": 7.227476309778759e-05, "loss": 0.8097, "step": 110560 }, { "epoch": 0.7064002146608231, "grad_norm": 1.3286716938018799, "learning_rate": 7.227027073104052e-05, "loss": 1.0101, "step": 110570 }, { "epoch": 0.7064641018105619, "grad_norm": 0.5131798386573792, "learning_rate": 7.226577814001254e-05, "loss": 1.0766, "step": 110580 }, { "epoch": 0.7065279889603006, "grad_norm": 0.8524615168571472, "learning_rate": 7.226128532474893e-05, "loss": 1.2115, "step": 110590 }, { "epoch": 0.7065918761100393, "grad_norm": 0.9582625031471252, "learning_rate": 7.225679228529491e-05, "loss": 0.8322, "step": 110600 }, { "epoch": 0.706655763259778, "grad_norm": 1.0330963134765625, "learning_rate": 7.225229902169575e-05, "loss": 0.8503, "step": 110610 }, { "epoch": 0.7067196504095167, "grad_norm": 1.450884222984314, "learning_rate": 7.224780553399667e-05, "loss": 0.792, "step": 110620 }, { "epoch": 0.7067835375592554, "grad_norm": 1.2359329462051392, "learning_rate": 7.224331182224296e-05, "loss": 0.9444, "step": 110630 }, { "epoch": 0.7068474247089941, "grad_norm": 0.6814751029014587, "learning_rate": 7.223881788647984e-05, "loss": 0.8523, "step": 110640 }, { "epoch": 0.7069113118587327, "grad_norm": 0.5562382340431213, "learning_rate": 7.223432372675258e-05, "loss": 0.9113, "step": 110650 }, { "epoch": 0.7069751990084714, "grad_norm": 1.0418486595153809, "learning_rate": 7.222982934310645e-05, "loss": 0.8662, "step": 110660 }, { "epoch": 0.7070390861582101, "grad_norm": 1.3430500030517578, "learning_rate": 7.222533473558671e-05, "loss": 0.9428, "step": 110670 }, { "epoch": 0.7071029733079488, "grad_norm": 0.9535601139068604, "learning_rate": 7.222083990423863e-05, "loss": 0.8891, "step": 110680 }, { "epoch": 0.7071668604576875, "grad_norm": 0.8033544421195984, "learning_rate": 7.221634484910746e-05, "loss": 0.7643, "step": 110690 }, { "epoch": 0.7072307476074262, "grad_norm": 0.8913300037384033, "learning_rate": 7.221184957023848e-05, "loss": 0.8317, "step": 110700 }, { "epoch": 0.7072946347571649, "grad_norm": 0.8205702304840088, "learning_rate": 7.220735406767696e-05, "loss": 0.9091, "step": 110710 }, { "epoch": 0.7073585219069036, "grad_norm": 0.7440993189811707, "learning_rate": 7.220285834146816e-05, "loss": 1.0378, "step": 110720 }, { "epoch": 0.7074224090566423, "grad_norm": 1.0705007314682007, "learning_rate": 7.219836239165737e-05, "loss": 0.914, "step": 110730 }, { "epoch": 0.707486296206381, "grad_norm": 0.6237671375274658, "learning_rate": 7.219386621828989e-05, "loss": 0.7851, "step": 110740 }, { "epoch": 0.7075501833561197, "grad_norm": 0.8966255784034729, "learning_rate": 7.218936982141096e-05, "loss": 0.8537, "step": 110750 }, { "epoch": 0.7076140705058585, "grad_norm": 0.8700849413871765, "learning_rate": 7.218487320106588e-05, "loss": 1.0906, "step": 110760 }, { "epoch": 0.7076779576555972, "grad_norm": 0.9361857771873474, "learning_rate": 7.218037635729993e-05, "loss": 1.1403, "step": 110770 }, { "epoch": 0.7077418448053359, "grad_norm": 0.9435377717018127, "learning_rate": 7.21758792901584e-05, "loss": 1.1844, "step": 110780 }, { "epoch": 0.7078057319550746, "grad_norm": 2.09425950050354, "learning_rate": 7.21713819996866e-05, "loss": 1.0254, "step": 110790 }, { "epoch": 0.7078696191048133, "grad_norm": 1.783277988433838, "learning_rate": 7.21668844859298e-05, "loss": 1.2829, "step": 110800 }, { "epoch": 0.707933506254552, "grad_norm": 0.7960687279701233, "learning_rate": 7.216238674893328e-05, "loss": 0.7562, "step": 110810 }, { "epoch": 0.7079973934042907, "grad_norm": 0.8216058611869812, "learning_rate": 7.215788878874237e-05, "loss": 0.937, "step": 110820 }, { "epoch": 0.7080612805540294, "grad_norm": 0.8827890753746033, "learning_rate": 7.215339060540231e-05, "loss": 0.9726, "step": 110830 }, { "epoch": 0.7081251677037681, "grad_norm": 1.042328119277954, "learning_rate": 7.214889219895849e-05, "loss": 0.8115, "step": 110840 }, { "epoch": 0.7081890548535068, "grad_norm": 0.9282635450363159, "learning_rate": 7.214439356945614e-05, "loss": 1.0414, "step": 110850 }, { "epoch": 0.7082529420032455, "grad_norm": 0.7158064246177673, "learning_rate": 7.213989471694059e-05, "loss": 0.9759, "step": 110860 }, { "epoch": 0.7083168291529842, "grad_norm": 0.8982157111167908, "learning_rate": 7.213539564145715e-05, "loss": 0.7747, "step": 110870 }, { "epoch": 0.7083807163027229, "grad_norm": 0.5959254503250122, "learning_rate": 7.213089634305112e-05, "loss": 0.844, "step": 110880 }, { "epoch": 0.7084446034524615, "grad_norm": 1.0131826400756836, "learning_rate": 7.212639682176782e-05, "loss": 0.8006, "step": 110890 }, { "epoch": 0.7085084906022002, "grad_norm": 3.458534002304077, "learning_rate": 7.212189707765257e-05, "loss": 0.8292, "step": 110900 }, { "epoch": 0.7085723777519389, "grad_norm": 1.091951847076416, "learning_rate": 7.211739711075067e-05, "loss": 0.8682, "step": 110910 }, { "epoch": 0.7086362649016776, "grad_norm": 1.005330204963684, "learning_rate": 7.211289692110746e-05, "loss": 0.8157, "step": 110920 }, { "epoch": 0.7087001520514163, "grad_norm": 0.5316596627235413, "learning_rate": 7.210839650876824e-05, "loss": 0.721, "step": 110930 }, { "epoch": 0.708764039201155, "grad_norm": 0.904162585735321, "learning_rate": 7.210389587377833e-05, "loss": 0.8916, "step": 110940 }, { "epoch": 0.7088279263508938, "grad_norm": 0.6802520155906677, "learning_rate": 7.209939501618308e-05, "loss": 1.0842, "step": 110950 }, { "epoch": 0.7088918135006325, "grad_norm": 0.8692640066146851, "learning_rate": 7.20948939360278e-05, "loss": 1.13, "step": 110960 }, { "epoch": 0.7089557006503712, "grad_norm": 0.9759564399719238, "learning_rate": 7.209039263335782e-05, "loss": 1.1396, "step": 110970 }, { "epoch": 0.7090195878001099, "grad_norm": 0.8422264456748962, "learning_rate": 7.208589110821848e-05, "loss": 0.9562, "step": 110980 }, { "epoch": 0.7090834749498486, "grad_norm": 0.9133360385894775, "learning_rate": 7.208138936065509e-05, "loss": 0.9616, "step": 110990 }, { "epoch": 0.7091473620995873, "grad_norm": 1.0148824453353882, "learning_rate": 7.207688739071303e-05, "loss": 0.8601, "step": 111000 }, { "epoch": 0.709211249249326, "grad_norm": 0.9856421947479248, "learning_rate": 7.207238519843761e-05, "loss": 0.6616, "step": 111010 }, { "epoch": 0.7092751363990647, "grad_norm": 1.0549514293670654, "learning_rate": 7.206788278387417e-05, "loss": 0.8587, "step": 111020 }, { "epoch": 0.7093390235488034, "grad_norm": 0.8037036657333374, "learning_rate": 7.206338014706806e-05, "loss": 0.782, "step": 111030 }, { "epoch": 0.7094029106985421, "grad_norm": 0.7747255563735962, "learning_rate": 7.205887728806463e-05, "loss": 0.8245, "step": 111040 }, { "epoch": 0.7094667978482808, "grad_norm": 0.8603516221046448, "learning_rate": 7.205437420690922e-05, "loss": 0.884, "step": 111050 }, { "epoch": 0.7095306849980195, "grad_norm": 0.5913922190666199, "learning_rate": 7.204987090364717e-05, "loss": 1.1146, "step": 111060 }, { "epoch": 0.7095945721477582, "grad_norm": 1.0901986360549927, "learning_rate": 7.204536737832385e-05, "loss": 0.99, "step": 111070 }, { "epoch": 0.7096584592974969, "grad_norm": 0.8610712885856628, "learning_rate": 7.204086363098462e-05, "loss": 1.1021, "step": 111080 }, { "epoch": 0.7097223464472356, "grad_norm": 1.4266153573989868, "learning_rate": 7.203635966167482e-05, "loss": 0.9517, "step": 111090 }, { "epoch": 0.7097862335969743, "grad_norm": 1.2162227630615234, "learning_rate": 7.203185547043981e-05, "loss": 0.8217, "step": 111100 }, { "epoch": 0.709850120746713, "grad_norm": 1.2490782737731934, "learning_rate": 7.202735105732497e-05, "loss": 0.7838, "step": 111110 }, { "epoch": 0.7099140078964518, "grad_norm": 0.9295443892478943, "learning_rate": 7.202284642237563e-05, "loss": 0.8437, "step": 111120 }, { "epoch": 0.7099778950461904, "grad_norm": 1.247816562652588, "learning_rate": 7.201834156563718e-05, "loss": 0.7268, "step": 111130 }, { "epoch": 0.7100417821959291, "grad_norm": 0.8253741264343262, "learning_rate": 7.201383648715498e-05, "loss": 0.8062, "step": 111140 }, { "epoch": 0.7101056693456678, "grad_norm": 0.7604842185974121, "learning_rate": 7.200933118697439e-05, "loss": 0.9545, "step": 111150 }, { "epoch": 0.7101695564954065, "grad_norm": 1.0069366693496704, "learning_rate": 7.200482566514081e-05, "loss": 0.8775, "step": 111160 }, { "epoch": 0.7102334436451452, "grad_norm": 0.5719223022460938, "learning_rate": 7.20003199216996e-05, "loss": 0.8167, "step": 111170 }, { "epoch": 0.7102973307948839, "grad_norm": 0.9412128925323486, "learning_rate": 7.199581395669613e-05, "loss": 0.7928, "step": 111180 }, { "epoch": 0.7103612179446226, "grad_norm": 0.8376321196556091, "learning_rate": 7.199130777017578e-05, "loss": 0.9514, "step": 111190 }, { "epoch": 0.7104251050943613, "grad_norm": 1.0094441175460815, "learning_rate": 7.198680136218394e-05, "loss": 0.9398, "step": 111200 }, { "epoch": 0.7104889922441, "grad_norm": 2.3433310985565186, "learning_rate": 7.1982294732766e-05, "loss": 0.8579, "step": 111210 }, { "epoch": 0.7105528793938387, "grad_norm": 0.6974323987960815, "learning_rate": 7.197778788196732e-05, "loss": 0.7997, "step": 111220 }, { "epoch": 0.7106167665435774, "grad_norm": 0.4892445206642151, "learning_rate": 7.197328080983331e-05, "loss": 0.6908, "step": 111230 }, { "epoch": 0.7106806536933161, "grad_norm": 0.5663550496101379, "learning_rate": 7.196877351640934e-05, "loss": 0.7576, "step": 111240 }, { "epoch": 0.7107445408430548, "grad_norm": 1.5886496305465698, "learning_rate": 7.196426600174083e-05, "loss": 1.0888, "step": 111250 }, { "epoch": 0.7108084279927935, "grad_norm": 1.0448259115219116, "learning_rate": 7.195975826587315e-05, "loss": 0.9002, "step": 111260 }, { "epoch": 0.7108723151425322, "grad_norm": 1.3552470207214355, "learning_rate": 7.195525030885173e-05, "loss": 1.0711, "step": 111270 }, { "epoch": 0.710936202292271, "grad_norm": 0.8098346590995789, "learning_rate": 7.195074213072192e-05, "loss": 0.7809, "step": 111280 }, { "epoch": 0.7110000894420097, "grad_norm": 0.7576483488082886, "learning_rate": 7.194623373152916e-05, "loss": 1.0012, "step": 111290 }, { "epoch": 0.7110639765917484, "grad_norm": 1.0490570068359375, "learning_rate": 7.194172511131883e-05, "loss": 0.8137, "step": 111300 }, { "epoch": 0.7111278637414871, "grad_norm": 1.080182671546936, "learning_rate": 7.193721627013635e-05, "loss": 1.161, "step": 111310 }, { "epoch": 0.7111917508912258, "grad_norm": 0.9876430034637451, "learning_rate": 7.193270720802713e-05, "loss": 1.1287, "step": 111320 }, { "epoch": 0.7112556380409645, "grad_norm": 0.8091046214103699, "learning_rate": 7.192819792503656e-05, "loss": 0.8741, "step": 111330 }, { "epoch": 0.7113195251907032, "grad_norm": 0.6781719326972961, "learning_rate": 7.192368842121008e-05, "loss": 0.8945, "step": 111340 }, { "epoch": 0.7113834123404419, "grad_norm": 2.1423685550689697, "learning_rate": 7.191917869659307e-05, "loss": 0.7859, "step": 111350 }, { "epoch": 0.7114472994901806, "grad_norm": 0.6364580988883972, "learning_rate": 7.191466875123099e-05, "loss": 0.7873, "step": 111360 }, { "epoch": 0.7115111866399193, "grad_norm": 0.8204523921012878, "learning_rate": 7.191015858516921e-05, "loss": 0.8482, "step": 111370 }, { "epoch": 0.7115750737896579, "grad_norm": 0.5086873173713684, "learning_rate": 7.19056481984532e-05, "loss": 0.951, "step": 111380 }, { "epoch": 0.7116389609393966, "grad_norm": 0.7575923204421997, "learning_rate": 7.190113759112837e-05, "loss": 1.0358, "step": 111390 }, { "epoch": 0.7117028480891353, "grad_norm": 0.6204468607902527, "learning_rate": 7.189662676324012e-05, "loss": 1.0471, "step": 111400 }, { "epoch": 0.711766735238874, "grad_norm": 0.7456675171852112, "learning_rate": 7.18921157148339e-05, "loss": 0.7964, "step": 111410 }, { "epoch": 0.7118306223886127, "grad_norm": 0.8651039004325867, "learning_rate": 7.188760444595513e-05, "loss": 0.596, "step": 111420 }, { "epoch": 0.7118945095383514, "grad_norm": 1.0460362434387207, "learning_rate": 7.188309295664926e-05, "loss": 0.9561, "step": 111430 }, { "epoch": 0.7119583966880901, "grad_norm": 1.1043407917022705, "learning_rate": 7.187858124696171e-05, "loss": 0.95, "step": 111440 }, { "epoch": 0.7120222838378288, "grad_norm": 1.1724969148635864, "learning_rate": 7.187406931693791e-05, "loss": 1.278, "step": 111450 }, { "epoch": 0.7120861709875675, "grad_norm": 0.8911068439483643, "learning_rate": 7.186955716662332e-05, "loss": 0.9101, "step": 111460 }, { "epoch": 0.7121500581373063, "grad_norm": 0.9001386761665344, "learning_rate": 7.186504479606336e-05, "loss": 0.6922, "step": 111470 }, { "epoch": 0.712213945287045, "grad_norm": 0.7261586785316467, "learning_rate": 7.186053220530349e-05, "loss": 0.9494, "step": 111480 }, { "epoch": 0.7122778324367837, "grad_norm": 1.1621700525283813, "learning_rate": 7.185601939438914e-05, "loss": 1.1188, "step": 111490 }, { "epoch": 0.7123417195865224, "grad_norm": 1.0625040531158447, "learning_rate": 7.185150636336578e-05, "loss": 0.6796, "step": 111500 }, { "epoch": 0.7124056067362611, "grad_norm": 0.5753048062324524, "learning_rate": 7.184699311227883e-05, "loss": 0.6958, "step": 111510 }, { "epoch": 0.7124694938859998, "grad_norm": 0.8998501896858215, "learning_rate": 7.184247964117376e-05, "loss": 0.765, "step": 111520 }, { "epoch": 0.7125333810357385, "grad_norm": 1.5378433465957642, "learning_rate": 7.183796595009604e-05, "loss": 0.7618, "step": 111530 }, { "epoch": 0.7125972681854772, "grad_norm": 0.5962892174720764, "learning_rate": 7.18334520390911e-05, "loss": 0.8256, "step": 111540 }, { "epoch": 0.7126611553352159, "grad_norm": 0.8945503234863281, "learning_rate": 7.182893790820441e-05, "loss": 0.935, "step": 111550 }, { "epoch": 0.7127250424849546, "grad_norm": 0.8816238045692444, "learning_rate": 7.182442355748143e-05, "loss": 0.6085, "step": 111560 }, { "epoch": 0.7127889296346933, "grad_norm": 0.7212803959846497, "learning_rate": 7.181990898696762e-05, "loss": 0.7271, "step": 111570 }, { "epoch": 0.712852816784432, "grad_norm": 0.9324487447738647, "learning_rate": 7.181539419670847e-05, "loss": 0.8302, "step": 111580 }, { "epoch": 0.7129167039341707, "grad_norm": 0.7855546474456787, "learning_rate": 7.18108791867494e-05, "loss": 1.1443, "step": 111590 }, { "epoch": 0.7129805910839094, "grad_norm": 1.1288433074951172, "learning_rate": 7.180636395713592e-05, "loss": 0.77, "step": 111600 }, { "epoch": 0.7130444782336481, "grad_norm": 0.8179686665534973, "learning_rate": 7.18018485079135e-05, "loss": 0.9342, "step": 111610 }, { "epoch": 0.7131083653833867, "grad_norm": 0.5550733804702759, "learning_rate": 7.179733283912759e-05, "loss": 1.1414, "step": 111620 }, { "epoch": 0.7131722525331254, "grad_norm": 1.0934454202651978, "learning_rate": 7.179281695082369e-05, "loss": 1.1329, "step": 111630 }, { "epoch": 0.7132361396828641, "grad_norm": 0.8720685243606567, "learning_rate": 7.178830084304725e-05, "loss": 0.9261, "step": 111640 }, { "epoch": 0.7133000268326029, "grad_norm": 1.734836220741272, "learning_rate": 7.17837845158438e-05, "loss": 1.0047, "step": 111650 }, { "epoch": 0.7133639139823416, "grad_norm": 0.9459303021430969, "learning_rate": 7.177926796925877e-05, "loss": 1.0278, "step": 111660 }, { "epoch": 0.7134278011320803, "grad_norm": 1.2539857625961304, "learning_rate": 7.177475120333767e-05, "loss": 0.7048, "step": 111670 }, { "epoch": 0.713491688281819, "grad_norm": 1.275357723236084, "learning_rate": 7.177023421812601e-05, "loss": 0.7448, "step": 111680 }, { "epoch": 0.7135555754315577, "grad_norm": 0.5253180265426636, "learning_rate": 7.176571701366924e-05, "loss": 0.9166, "step": 111690 }, { "epoch": 0.7136194625812964, "grad_norm": 2.3519647121429443, "learning_rate": 7.176119959001287e-05, "loss": 0.7995, "step": 111700 }, { "epoch": 0.7136833497310351, "grad_norm": 0.8496062755584717, "learning_rate": 7.17566819472024e-05, "loss": 0.9215, "step": 111710 }, { "epoch": 0.7137472368807738, "grad_norm": 0.6206763386726379, "learning_rate": 7.175216408528331e-05, "loss": 0.7705, "step": 111720 }, { "epoch": 0.7138111240305125, "grad_norm": 0.9494533538818359, "learning_rate": 7.174764600430112e-05, "loss": 0.9813, "step": 111730 }, { "epoch": 0.7138750111802512, "grad_norm": 0.609451413154602, "learning_rate": 7.174312770430131e-05, "loss": 0.8221, "step": 111740 }, { "epoch": 0.7139388983299899, "grad_norm": 0.6117620468139648, "learning_rate": 7.17386091853294e-05, "loss": 1.1774, "step": 111750 }, { "epoch": 0.7140027854797286, "grad_norm": 0.881416916847229, "learning_rate": 7.173409044743092e-05, "loss": 1.1644, "step": 111760 }, { "epoch": 0.7140666726294673, "grad_norm": 0.9561392068862915, "learning_rate": 7.17295714906513e-05, "loss": 1.0794, "step": 111770 }, { "epoch": 0.714130559779206, "grad_norm": 0.9907708764076233, "learning_rate": 7.172505231503613e-05, "loss": 0.8979, "step": 111780 }, { "epoch": 0.7141944469289447, "grad_norm": 1.2147117853164673, "learning_rate": 7.172053292063085e-05, "loss": 0.8886, "step": 111790 }, { "epoch": 0.7142583340786834, "grad_norm": 0.8002836108207703, "learning_rate": 7.171601330748104e-05, "loss": 0.9329, "step": 111800 }, { "epoch": 0.7143222212284221, "grad_norm": 0.9203763604164124, "learning_rate": 7.171149347563219e-05, "loss": 0.7629, "step": 111810 }, { "epoch": 0.7143861083781609, "grad_norm": 1.0033005475997925, "learning_rate": 7.17069734251298e-05, "loss": 0.5576, "step": 111820 }, { "epoch": 0.7144499955278996, "grad_norm": 0.8255144357681274, "learning_rate": 7.170245315601942e-05, "loss": 0.8515, "step": 111830 }, { "epoch": 0.7145138826776383, "grad_norm": 0.6121490597724915, "learning_rate": 7.169793266834657e-05, "loss": 0.7734, "step": 111840 }, { "epoch": 0.714577769827377, "grad_norm": 0.9062861204147339, "learning_rate": 7.169341196215675e-05, "loss": 0.9229, "step": 111850 }, { "epoch": 0.7146416569771156, "grad_norm": 1.0304701328277588, "learning_rate": 7.168889103749552e-05, "loss": 0.7766, "step": 111860 }, { "epoch": 0.7147055441268543, "grad_norm": 0.49770671129226685, "learning_rate": 7.168436989440838e-05, "loss": 0.9304, "step": 111870 }, { "epoch": 0.714769431276593, "grad_norm": 2.51269268989563, "learning_rate": 7.167984853294087e-05, "loss": 0.9755, "step": 111880 }, { "epoch": 0.7148333184263317, "grad_norm": 0.7806798219680786, "learning_rate": 7.167532695313855e-05, "loss": 0.8631, "step": 111890 }, { "epoch": 0.7148972055760704, "grad_norm": 0.9068145155906677, "learning_rate": 7.167080515504692e-05, "loss": 0.8938, "step": 111900 }, { "epoch": 0.7149610927258091, "grad_norm": 0.778988778591156, "learning_rate": 7.166628313871155e-05, "loss": 1.0556, "step": 111910 }, { "epoch": 0.7150249798755478, "grad_norm": 1.0016741752624512, "learning_rate": 7.166176090417794e-05, "loss": 0.9363, "step": 111920 }, { "epoch": 0.7150888670252865, "grad_norm": 1.0079103708267212, "learning_rate": 7.165723845149169e-05, "loss": 0.9302, "step": 111930 }, { "epoch": 0.7151527541750252, "grad_norm": 1.5212242603302002, "learning_rate": 7.165271578069827e-05, "loss": 0.9895, "step": 111940 }, { "epoch": 0.7152166413247639, "grad_norm": 0.8512176871299744, "learning_rate": 7.16481928918433e-05, "loss": 0.7161, "step": 111950 }, { "epoch": 0.7152805284745026, "grad_norm": 1.4987943172454834, "learning_rate": 7.16436697849723e-05, "loss": 0.8515, "step": 111960 }, { "epoch": 0.7153444156242413, "grad_norm": 1.0176150798797607, "learning_rate": 7.163914646013082e-05, "loss": 0.8803, "step": 111970 }, { "epoch": 0.71540830277398, "grad_norm": 0.5044176578521729, "learning_rate": 7.16346229173644e-05, "loss": 0.7152, "step": 111980 }, { "epoch": 0.7154721899237187, "grad_norm": 2.0591912269592285, "learning_rate": 7.163009915671863e-05, "loss": 0.8914, "step": 111990 }, { "epoch": 0.7155360770734575, "grad_norm": 0.6870359778404236, "learning_rate": 7.162557517823904e-05, "loss": 0.9074, "step": 112000 }, { "epoch": 0.7155999642231962, "grad_norm": 0.9231833815574646, "learning_rate": 7.16210509819712e-05, "loss": 0.9811, "step": 112010 }, { "epoch": 0.7156638513729349, "grad_norm": 1.032814860343933, "learning_rate": 7.161652656796068e-05, "loss": 0.8585, "step": 112020 }, { "epoch": 0.7157277385226736, "grad_norm": 0.6325913667678833, "learning_rate": 7.161200193625302e-05, "loss": 0.8663, "step": 112030 }, { "epoch": 0.7157916256724123, "grad_norm": 0.9060227870941162, "learning_rate": 7.16074770868938e-05, "loss": 0.904, "step": 112040 }, { "epoch": 0.715855512822151, "grad_norm": 1.0490593910217285, "learning_rate": 7.16029520199286e-05, "loss": 0.7224, "step": 112050 }, { "epoch": 0.7159193999718897, "grad_norm": 0.7744579911231995, "learning_rate": 7.1598426735403e-05, "loss": 0.953, "step": 112060 }, { "epoch": 0.7159832871216284, "grad_norm": 1.1592707633972168, "learning_rate": 7.159390123336253e-05, "loss": 0.8888, "step": 112070 }, { "epoch": 0.7160471742713671, "grad_norm": 1.0290485620498657, "learning_rate": 7.158937551385281e-05, "loss": 0.7144, "step": 112080 }, { "epoch": 0.7161110614211058, "grad_norm": 0.7626058459281921, "learning_rate": 7.15848495769194e-05, "loss": 0.8806, "step": 112090 }, { "epoch": 0.7161749485708444, "grad_norm": 1.230587363243103, "learning_rate": 7.158032342260787e-05, "loss": 0.9783, "step": 112100 }, { "epoch": 0.7162388357205831, "grad_norm": 1.150781273841858, "learning_rate": 7.157579705096384e-05, "loss": 0.8128, "step": 112110 }, { "epoch": 0.7163027228703218, "grad_norm": 0.9989941716194153, "learning_rate": 7.157127046203285e-05, "loss": 0.982, "step": 112120 }, { "epoch": 0.7163666100200605, "grad_norm": 0.5104489922523499, "learning_rate": 7.15667436558605e-05, "loss": 1.1264, "step": 112130 }, { "epoch": 0.7164304971697992, "grad_norm": 0.8887497186660767, "learning_rate": 7.156221663249238e-05, "loss": 0.9689, "step": 112140 }, { "epoch": 0.7164943843195379, "grad_norm": 0.9902895092964172, "learning_rate": 7.155768939197411e-05, "loss": 0.8488, "step": 112150 }, { "epoch": 0.7165582714692766, "grad_norm": 0.9794628024101257, "learning_rate": 7.155316193435123e-05, "loss": 0.8772, "step": 112160 }, { "epoch": 0.7166221586190153, "grad_norm": 1.3520082235336304, "learning_rate": 7.154863425966938e-05, "loss": 1.1314, "step": 112170 }, { "epoch": 0.716686045768754, "grad_norm": 0.871411919593811, "learning_rate": 7.154410636797413e-05, "loss": 0.9373, "step": 112180 }, { "epoch": 0.7167499329184928, "grad_norm": 1.0834548473358154, "learning_rate": 7.15395782593111e-05, "loss": 0.7362, "step": 112190 }, { "epoch": 0.7168138200682315, "grad_norm": 0.9471587538719177, "learning_rate": 7.153504993372587e-05, "loss": 0.8928, "step": 112200 }, { "epoch": 0.7168777072179702, "grad_norm": 1.114270806312561, "learning_rate": 7.153052139126407e-05, "loss": 0.7417, "step": 112210 }, { "epoch": 0.7169415943677089, "grad_norm": 1.393097162246704, "learning_rate": 7.152599263197128e-05, "loss": 1.0208, "step": 112220 }, { "epoch": 0.7170054815174476, "grad_norm": 0.7526148557662964, "learning_rate": 7.152146365589313e-05, "loss": 1.0583, "step": 112230 }, { "epoch": 0.7170693686671863, "grad_norm": 1.4979679584503174, "learning_rate": 7.151693446307524e-05, "loss": 0.8657, "step": 112240 }, { "epoch": 0.717133255816925, "grad_norm": 0.7757830619812012, "learning_rate": 7.151240505356318e-05, "loss": 0.7491, "step": 112250 }, { "epoch": 0.7171971429666637, "grad_norm": 1.1512292623519897, "learning_rate": 7.15078754274026e-05, "loss": 0.818, "step": 112260 }, { "epoch": 0.7172610301164024, "grad_norm": 1.370301365852356, "learning_rate": 7.150334558463911e-05, "loss": 0.8435, "step": 112270 }, { "epoch": 0.7173249172661411, "grad_norm": 1.1701213121414185, "learning_rate": 7.149881552531832e-05, "loss": 1.1135, "step": 112280 }, { "epoch": 0.7173888044158798, "grad_norm": 0.6550779342651367, "learning_rate": 7.149428524948585e-05, "loss": 0.8574, "step": 112290 }, { "epoch": 0.7174526915656185, "grad_norm": 0.9960959553718567, "learning_rate": 7.148975475718734e-05, "loss": 0.9118, "step": 112300 }, { "epoch": 0.7175165787153572, "grad_norm": 0.6779653429985046, "learning_rate": 7.148522404846841e-05, "loss": 0.9284, "step": 112310 }, { "epoch": 0.7175804658650959, "grad_norm": 0.8120177984237671, "learning_rate": 7.14806931233747e-05, "loss": 1.0227, "step": 112320 }, { "epoch": 0.7176443530148346, "grad_norm": 1.1333248615264893, "learning_rate": 7.147616198195181e-05, "loss": 0.7896, "step": 112330 }, { "epoch": 0.7177082401645734, "grad_norm": 0.5088018178939819, "learning_rate": 7.147163062424539e-05, "loss": 0.6615, "step": 112340 }, { "epoch": 0.717772127314312, "grad_norm": 0.9224886894226074, "learning_rate": 7.146709905030108e-05, "loss": 0.9036, "step": 112350 }, { "epoch": 0.7178360144640507, "grad_norm": 0.9467249512672424, "learning_rate": 7.146256726016452e-05, "loss": 0.8896, "step": 112360 }, { "epoch": 0.7178999016137894, "grad_norm": 1.132169246673584, "learning_rate": 7.145803525388132e-05, "loss": 1.1764, "step": 112370 }, { "epoch": 0.7179637887635281, "grad_norm": 0.9718843102455139, "learning_rate": 7.145350303149715e-05, "loss": 0.8263, "step": 112380 }, { "epoch": 0.7180276759132668, "grad_norm": 0.5153815746307373, "learning_rate": 7.144897059305764e-05, "loss": 0.9022, "step": 112390 }, { "epoch": 0.7180915630630055, "grad_norm": 1.2004663944244385, "learning_rate": 7.144443793860845e-05, "loss": 0.8924, "step": 112400 }, { "epoch": 0.7181554502127442, "grad_norm": 0.48930367827415466, "learning_rate": 7.14399050681952e-05, "loss": 0.6762, "step": 112410 }, { "epoch": 0.7182193373624829, "grad_norm": 0.9120885729789734, "learning_rate": 7.143537198186356e-05, "loss": 0.7038, "step": 112420 }, { "epoch": 0.7182832245122216, "grad_norm": 0.7880602478981018, "learning_rate": 7.14308386796592e-05, "loss": 0.6403, "step": 112430 }, { "epoch": 0.7183471116619603, "grad_norm": 0.8461630940437317, "learning_rate": 7.142630516162774e-05, "loss": 1.245, "step": 112440 }, { "epoch": 0.718410998811699, "grad_norm": 0.7924696803092957, "learning_rate": 7.142177142781485e-05, "loss": 0.7825, "step": 112450 }, { "epoch": 0.7184748859614377, "grad_norm": 1.4075403213500977, "learning_rate": 7.14172374782662e-05, "loss": 1.0038, "step": 112460 }, { "epoch": 0.7185387731111764, "grad_norm": 0.6617513298988342, "learning_rate": 7.141270331302743e-05, "loss": 0.9936, "step": 112470 }, { "epoch": 0.7186026602609151, "grad_norm": 0.9950401186943054, "learning_rate": 7.140816893214421e-05, "loss": 0.9455, "step": 112480 }, { "epoch": 0.7186665474106538, "grad_norm": 1.0970572233200073, "learning_rate": 7.140363433566224e-05, "loss": 0.9689, "step": 112490 }, { "epoch": 0.7187304345603925, "grad_norm": 0.8020222783088684, "learning_rate": 7.139909952362712e-05, "loss": 0.8451, "step": 112500 }, { "epoch": 0.7187943217101312, "grad_norm": 1.0068836212158203, "learning_rate": 7.139456449608458e-05, "loss": 1.3056, "step": 112510 }, { "epoch": 0.71885820885987, "grad_norm": 0.8701362013816833, "learning_rate": 7.139002925308024e-05, "loss": 0.8939, "step": 112520 }, { "epoch": 0.7189220960096087, "grad_norm": 2.2286477088928223, "learning_rate": 7.138549379465982e-05, "loss": 1.0509, "step": 112530 }, { "epoch": 0.7189859831593474, "grad_norm": 0.22156091034412384, "learning_rate": 7.138095812086896e-05, "loss": 0.6598, "step": 112540 }, { "epoch": 0.7190498703090861, "grad_norm": 1.0287528038024902, "learning_rate": 7.137642223175337e-05, "loss": 0.9708, "step": 112550 }, { "epoch": 0.7191137574588248, "grad_norm": 0.772317111492157, "learning_rate": 7.13718861273587e-05, "loss": 0.8922, "step": 112560 }, { "epoch": 0.7191776446085635, "grad_norm": 1.1389001607894897, "learning_rate": 7.136734980773066e-05, "loss": 0.6469, "step": 112570 }, { "epoch": 0.7192415317583022, "grad_norm": 0.9356949329376221, "learning_rate": 7.136281327291491e-05, "loss": 0.8607, "step": 112580 }, { "epoch": 0.7193054189080408, "grad_norm": 0.8384791016578674, "learning_rate": 7.135827652295715e-05, "loss": 0.8469, "step": 112590 }, { "epoch": 0.7193693060577795, "grad_norm": 1.2862927913665771, "learning_rate": 7.135373955790308e-05, "loss": 0.8742, "step": 112600 }, { "epoch": 0.7194331932075182, "grad_norm": 0.7916562557220459, "learning_rate": 7.134920237779837e-05, "loss": 0.9424, "step": 112610 }, { "epoch": 0.7194970803572569, "grad_norm": 1.3547780513763428, "learning_rate": 7.134466498268872e-05, "loss": 0.9223, "step": 112620 }, { "epoch": 0.7195609675069956, "grad_norm": 0.8931356072425842, "learning_rate": 7.134012737261985e-05, "loss": 0.9001, "step": 112630 }, { "epoch": 0.7196248546567343, "grad_norm": 1.0962817668914795, "learning_rate": 7.133558954763741e-05, "loss": 0.6841, "step": 112640 }, { "epoch": 0.719688741806473, "grad_norm": 0.8664074540138245, "learning_rate": 7.133105150778714e-05, "loss": 0.84, "step": 112650 }, { "epoch": 0.7197526289562117, "grad_norm": 0.7376250624656677, "learning_rate": 7.132651325311472e-05, "loss": 1.0094, "step": 112660 }, { "epoch": 0.7198165161059504, "grad_norm": 0.8420968055725098, "learning_rate": 7.132197478366587e-05, "loss": 1.052, "step": 112670 }, { "epoch": 0.7198804032556891, "grad_norm": 1.0015677213668823, "learning_rate": 7.131743609948628e-05, "loss": 0.9265, "step": 112680 }, { "epoch": 0.7199442904054278, "grad_norm": 0.7853860259056091, "learning_rate": 7.131289720062167e-05, "loss": 0.8856, "step": 112690 }, { "epoch": 0.7200081775551666, "grad_norm": 1.2557804584503174, "learning_rate": 7.130835808711773e-05, "loss": 0.975, "step": 112700 }, { "epoch": 0.7200720647049053, "grad_norm": 0.8270767331123352, "learning_rate": 7.130381875902021e-05, "loss": 1.0227, "step": 112710 }, { "epoch": 0.720135951854644, "grad_norm": 1.5142698287963867, "learning_rate": 7.12992792163748e-05, "loss": 0.9033, "step": 112720 }, { "epoch": 0.7201998390043827, "grad_norm": 0.9564334750175476, "learning_rate": 7.129473945922722e-05, "loss": 0.6761, "step": 112730 }, { "epoch": 0.7202637261541214, "grad_norm": 0.5844874382019043, "learning_rate": 7.129019948762319e-05, "loss": 0.7452, "step": 112740 }, { "epoch": 0.7203276133038601, "grad_norm": 0.7936009764671326, "learning_rate": 7.128565930160844e-05, "loss": 0.8532, "step": 112750 }, { "epoch": 0.7203915004535988, "grad_norm": 1.0036197900772095, "learning_rate": 7.128111890122868e-05, "loss": 0.6713, "step": 112760 }, { "epoch": 0.7204553876033375, "grad_norm": 0.6564218997955322, "learning_rate": 7.127657828652964e-05, "loss": 0.8136, "step": 112770 }, { "epoch": 0.7205192747530762, "grad_norm": 0.7329919338226318, "learning_rate": 7.127203745755705e-05, "loss": 0.9779, "step": 112780 }, { "epoch": 0.7205831619028149, "grad_norm": 0.9217239022254944, "learning_rate": 7.126749641435664e-05, "loss": 1.0803, "step": 112790 }, { "epoch": 0.7206470490525536, "grad_norm": 1.523088812828064, "learning_rate": 7.126295515697414e-05, "loss": 0.8893, "step": 112800 }, { "epoch": 0.7207109362022923, "grad_norm": 1.220182180404663, "learning_rate": 7.125841368545529e-05, "loss": 0.7783, "step": 112810 }, { "epoch": 0.720774823352031, "grad_norm": 0.7349340319633484, "learning_rate": 7.125387199984583e-05, "loss": 0.8759, "step": 112820 }, { "epoch": 0.7208387105017696, "grad_norm": 1.1350910663604736, "learning_rate": 7.124933010019148e-05, "loss": 0.8982, "step": 112830 }, { "epoch": 0.7209025976515083, "grad_norm": 1.2227561473846436, "learning_rate": 7.124478798653801e-05, "loss": 0.6953, "step": 112840 }, { "epoch": 0.720966484801247, "grad_norm": 1.2927758693695068, "learning_rate": 7.124024565893112e-05, "loss": 0.8957, "step": 112850 }, { "epoch": 0.7210303719509857, "grad_norm": 0.8586512804031372, "learning_rate": 7.12357031174166e-05, "loss": 1.1933, "step": 112860 }, { "epoch": 0.7210942591007244, "grad_norm": 1.5274994373321533, "learning_rate": 7.123116036204017e-05, "loss": 0.8434, "step": 112870 }, { "epoch": 0.7211581462504632, "grad_norm": 0.8376038670539856, "learning_rate": 7.122661739284759e-05, "loss": 0.7802, "step": 112880 }, { "epoch": 0.7212220334002019, "grad_norm": 0.9995211362838745, "learning_rate": 7.122207420988462e-05, "loss": 0.7681, "step": 112890 }, { "epoch": 0.7212859205499406, "grad_norm": 1.5679831504821777, "learning_rate": 7.121753081319699e-05, "loss": 0.9598, "step": 112900 }, { "epoch": 0.7213498076996793, "grad_norm": 1.7187330722808838, "learning_rate": 7.121298720283048e-05, "loss": 1.0863, "step": 112910 }, { "epoch": 0.721413694849418, "grad_norm": 0.8918151259422302, "learning_rate": 7.120844337883082e-05, "loss": 1.0098, "step": 112920 }, { "epoch": 0.7214775819991567, "grad_norm": 0.8101955056190491, "learning_rate": 7.120389934124379e-05, "loss": 1.092, "step": 112930 }, { "epoch": 0.7215414691488954, "grad_norm": 0.7652488946914673, "learning_rate": 7.119935509011516e-05, "loss": 0.8215, "step": 112940 }, { "epoch": 0.7216053562986341, "grad_norm": 1.972680687904358, "learning_rate": 7.119481062549067e-05, "loss": 0.8574, "step": 112950 }, { "epoch": 0.7216692434483728, "grad_norm": 0.8050053715705872, "learning_rate": 7.11902659474161e-05, "loss": 1.347, "step": 112960 }, { "epoch": 0.7217331305981115, "grad_norm": 1.1817753314971924, "learning_rate": 7.118572105593725e-05, "loss": 0.7859, "step": 112970 }, { "epoch": 0.7217970177478502, "grad_norm": 0.73277747631073, "learning_rate": 7.118117595109984e-05, "loss": 0.8999, "step": 112980 }, { "epoch": 0.7218609048975889, "grad_norm": 0.7769888639450073, "learning_rate": 7.117663063294965e-05, "loss": 0.9172, "step": 112990 }, { "epoch": 0.7219247920473276, "grad_norm": 1.4832031726837158, "learning_rate": 7.117253966426993e-05, "loss": 1.0567, "step": 113000 }, { "epoch": 0.7219886791970663, "grad_norm": 0.9582386612892151, "learning_rate": 7.11679939409516e-05, "loss": 0.6723, "step": 113010 }, { "epoch": 0.722052566346805, "grad_norm": 0.8009851574897766, "learning_rate": 7.116344800445327e-05, "loss": 0.8508, "step": 113020 }, { "epoch": 0.7221164534965437, "grad_norm": 0.7712252736091614, "learning_rate": 7.115890185482071e-05, "loss": 0.7957, "step": 113030 }, { "epoch": 0.7221803406462824, "grad_norm": 0.7053341865539551, "learning_rate": 7.11543554920997e-05, "loss": 0.8246, "step": 113040 }, { "epoch": 0.7222442277960212, "grad_norm": 0.8619422912597656, "learning_rate": 7.114980891633602e-05, "loss": 1.0908, "step": 113050 }, { "epoch": 0.7223081149457599, "grad_norm": 0.6670997738838196, "learning_rate": 7.114526212757549e-05, "loss": 0.8958, "step": 113060 }, { "epoch": 0.7223720020954986, "grad_norm": 0.7335458397865295, "learning_rate": 7.114071512586385e-05, "loss": 1.0648, "step": 113070 }, { "epoch": 0.7224358892452372, "grad_norm": 1.3357338905334473, "learning_rate": 7.113616791124694e-05, "loss": 0.7619, "step": 113080 }, { "epoch": 0.7224997763949759, "grad_norm": 0.960849404335022, "learning_rate": 7.113162048377053e-05, "loss": 0.9608, "step": 113090 }, { "epoch": 0.7225636635447146, "grad_norm": 0.47955894470214844, "learning_rate": 7.112707284348042e-05, "loss": 0.7415, "step": 113100 }, { "epoch": 0.7226275506944533, "grad_norm": 0.6127536296844482, "learning_rate": 7.11225249904224e-05, "loss": 0.8654, "step": 113110 }, { "epoch": 0.722691437844192, "grad_norm": 0.9961467385292053, "learning_rate": 7.11179769246423e-05, "loss": 0.8188, "step": 113120 }, { "epoch": 0.7227553249939307, "grad_norm": 0.8931620121002197, "learning_rate": 7.11134286461859e-05, "loss": 0.7527, "step": 113130 }, { "epoch": 0.7228192121436694, "grad_norm": 1.2289701700210571, "learning_rate": 7.1108880155099e-05, "loss": 0.7977, "step": 113140 }, { "epoch": 0.7228830992934081, "grad_norm": 0.7578348517417908, "learning_rate": 7.110433145142741e-05, "loss": 1.0557, "step": 113150 }, { "epoch": 0.7229469864431468, "grad_norm": 0.5664851069450378, "learning_rate": 7.109978253521694e-05, "loss": 0.6821, "step": 113160 }, { "epoch": 0.7230108735928855, "grad_norm": 1.7400668859481812, "learning_rate": 7.109523340651342e-05, "loss": 0.993, "step": 113170 }, { "epoch": 0.7230747607426242, "grad_norm": 0.9611324071884155, "learning_rate": 7.109068406536265e-05, "loss": 0.9319, "step": 113180 }, { "epoch": 0.7231386478923629, "grad_norm": 0.6872121691703796, "learning_rate": 7.108613451181043e-05, "loss": 1.1291, "step": 113190 }, { "epoch": 0.7232025350421016, "grad_norm": 0.8210169076919556, "learning_rate": 7.108158474590261e-05, "loss": 0.8069, "step": 113200 }, { "epoch": 0.7232664221918403, "grad_norm": 1.7412443161010742, "learning_rate": 7.107703476768497e-05, "loss": 0.8441, "step": 113210 }, { "epoch": 0.723330309341579, "grad_norm": 0.7331858277320862, "learning_rate": 7.107248457720337e-05, "loss": 0.7081, "step": 113220 }, { "epoch": 0.7233941964913178, "grad_norm": 1.0155048370361328, "learning_rate": 7.106793417450362e-05, "loss": 0.8548, "step": 113230 }, { "epoch": 0.7234580836410565, "grad_norm": 0.6159857511520386, "learning_rate": 7.106338355963155e-05, "loss": 0.7694, "step": 113240 }, { "epoch": 0.7235219707907952, "grad_norm": 0.9287189841270447, "learning_rate": 7.105883273263298e-05, "loss": 0.965, "step": 113250 }, { "epoch": 0.7235858579405339, "grad_norm": 0.7032301425933838, "learning_rate": 7.105428169355375e-05, "loss": 0.7095, "step": 113260 }, { "epoch": 0.7236497450902726, "grad_norm": 0.7181684970855713, "learning_rate": 7.104973044243969e-05, "loss": 1.1339, "step": 113270 }, { "epoch": 0.7237136322400113, "grad_norm": 0.703015148639679, "learning_rate": 7.104517897933662e-05, "loss": 0.8453, "step": 113280 }, { "epoch": 0.72377751938975, "grad_norm": 2.4534735679626465, "learning_rate": 7.10406273042904e-05, "loss": 0.8108, "step": 113290 }, { "epoch": 0.7238414065394887, "grad_norm": 0.7906631231307983, "learning_rate": 7.103607541734688e-05, "loss": 1.0326, "step": 113300 }, { "epoch": 0.7239052936892274, "grad_norm": 0.8502821922302246, "learning_rate": 7.103152331855187e-05, "loss": 0.8985, "step": 113310 }, { "epoch": 0.723969180838966, "grad_norm": 1.1090575456619263, "learning_rate": 7.102697100795122e-05, "loss": 0.8659, "step": 113320 }, { "epoch": 0.7240330679887047, "grad_norm": 1.2656605243682861, "learning_rate": 7.102241848559077e-05, "loss": 0.7826, "step": 113330 }, { "epoch": 0.7240969551384434, "grad_norm": 0.6625283360481262, "learning_rate": 7.101786575151639e-05, "loss": 0.7639, "step": 113340 }, { "epoch": 0.7241608422881821, "grad_norm": 0.988913357257843, "learning_rate": 7.101331280577392e-05, "loss": 1.0447, "step": 113350 }, { "epoch": 0.7242247294379208, "grad_norm": 1.8037471771240234, "learning_rate": 7.100875964840922e-05, "loss": 1.0335, "step": 113360 }, { "epoch": 0.7242886165876595, "grad_norm": 0.8089826107025146, "learning_rate": 7.100420627946812e-05, "loss": 0.8345, "step": 113370 }, { "epoch": 0.7243525037373982, "grad_norm": 0.894549548625946, "learning_rate": 7.099965269899648e-05, "loss": 0.9204, "step": 113380 }, { "epoch": 0.7244163908871369, "grad_norm": 0.7039246559143066, "learning_rate": 7.099509890704019e-05, "loss": 0.6761, "step": 113390 }, { "epoch": 0.7244802780368756, "grad_norm": 0.907172679901123, "learning_rate": 7.09905449036451e-05, "loss": 0.6746, "step": 113400 }, { "epoch": 0.7245441651866144, "grad_norm": 0.6476519107818604, "learning_rate": 7.098599068885704e-05, "loss": 0.9781, "step": 113410 }, { "epoch": 0.7246080523363531, "grad_norm": 0.5977177023887634, "learning_rate": 7.098143626272192e-05, "loss": 0.856, "step": 113420 }, { "epoch": 0.7246719394860918, "grad_norm": 1.3149287700653076, "learning_rate": 7.097688162528556e-05, "loss": 0.7815, "step": 113430 }, { "epoch": 0.7247358266358305, "grad_norm": 0.7405048608779907, "learning_rate": 7.097232677659387e-05, "loss": 0.895, "step": 113440 }, { "epoch": 0.7247997137855692, "grad_norm": 0.8446990847587585, "learning_rate": 7.09677717166927e-05, "loss": 0.6864, "step": 113450 }, { "epoch": 0.7248636009353079, "grad_norm": 0.761848509311676, "learning_rate": 7.096321644562793e-05, "loss": 0.8818, "step": 113460 }, { "epoch": 0.7249274880850466, "grad_norm": 0.8329225778579712, "learning_rate": 7.095866096344544e-05, "loss": 0.8544, "step": 113470 }, { "epoch": 0.7249913752347853, "grad_norm": 1.2276092767715454, "learning_rate": 7.095410527019111e-05, "loss": 0.8486, "step": 113480 }, { "epoch": 0.725055262384524, "grad_norm": 1.3161121606826782, "learning_rate": 7.094954936591081e-05, "loss": 0.7856, "step": 113490 }, { "epoch": 0.7251191495342627, "grad_norm": 1.220802664756775, "learning_rate": 7.09449932506504e-05, "loss": 0.8293, "step": 113500 }, { "epoch": 0.7251830366840014, "grad_norm": 1.623045802116394, "learning_rate": 7.094043692445581e-05, "loss": 1.4337, "step": 113510 }, { "epoch": 0.7252469238337401, "grad_norm": 1.979931116104126, "learning_rate": 7.09358803873729e-05, "loss": 0.7465, "step": 113520 }, { "epoch": 0.7253108109834788, "grad_norm": 1.0702683925628662, "learning_rate": 7.093132363944756e-05, "loss": 0.7137, "step": 113530 }, { "epoch": 0.7253746981332175, "grad_norm": 0.940356969833374, "learning_rate": 7.092676668072569e-05, "loss": 0.9097, "step": 113540 }, { "epoch": 0.7254385852829562, "grad_norm": 0.7708202004432678, "learning_rate": 7.092220951125315e-05, "loss": 1.0073, "step": 113550 }, { "epoch": 0.7255024724326948, "grad_norm": 1.922377347946167, "learning_rate": 7.091765213107589e-05, "loss": 0.77, "step": 113560 }, { "epoch": 0.7255663595824335, "grad_norm": 0.671876847743988, "learning_rate": 7.091309454023976e-05, "loss": 0.6341, "step": 113570 }, { "epoch": 0.7256302467321722, "grad_norm": 0.9415796995162964, "learning_rate": 7.090853673879068e-05, "loss": 0.6898, "step": 113580 }, { "epoch": 0.725694133881911, "grad_norm": 0.5804014205932617, "learning_rate": 7.090397872677455e-05, "loss": 0.8133, "step": 113590 }, { "epoch": 0.7257580210316497, "grad_norm": 1.0379526615142822, "learning_rate": 7.089942050423725e-05, "loss": 0.855, "step": 113600 }, { "epoch": 0.7258219081813884, "grad_norm": 0.6411370635032654, "learning_rate": 7.089486207122474e-05, "loss": 0.7423, "step": 113610 }, { "epoch": 0.7258857953311271, "grad_norm": 2.031083106994629, "learning_rate": 7.089030342778288e-05, "loss": 0.7442, "step": 113620 }, { "epoch": 0.7259496824808658, "grad_norm": 0.9208039045333862, "learning_rate": 7.088574457395758e-05, "loss": 0.8881, "step": 113630 }, { "epoch": 0.7260135696306045, "grad_norm": 1.0437136888504028, "learning_rate": 7.088118550979477e-05, "loss": 1.015, "step": 113640 }, { "epoch": 0.7260774567803432, "grad_norm": 1.4243861436843872, "learning_rate": 7.087662623534036e-05, "loss": 0.7728, "step": 113650 }, { "epoch": 0.7261413439300819, "grad_norm": 1.2670698165893555, "learning_rate": 7.087206675064026e-05, "loss": 0.9942, "step": 113660 }, { "epoch": 0.7262052310798206, "grad_norm": 0.9617191553115845, "learning_rate": 7.086750705574038e-05, "loss": 1.0011, "step": 113670 }, { "epoch": 0.7262691182295593, "grad_norm": 1.161468505859375, "learning_rate": 7.086294715068667e-05, "loss": 1.22, "step": 113680 }, { "epoch": 0.726333005379298, "grad_norm": 1.2413885593414307, "learning_rate": 7.085838703552503e-05, "loss": 0.9355, "step": 113690 }, { "epoch": 0.7263968925290367, "grad_norm": 0.607776403427124, "learning_rate": 7.085382671030138e-05, "loss": 1.0288, "step": 113700 }, { "epoch": 0.7264607796787754, "grad_norm": 1.1839098930358887, "learning_rate": 7.084926617506166e-05, "loss": 1.0052, "step": 113710 }, { "epoch": 0.7265246668285141, "grad_norm": 0.5576828718185425, "learning_rate": 7.084470542985178e-05, "loss": 1.1483, "step": 113720 }, { "epoch": 0.7265885539782528, "grad_norm": 0.6610636711120605, "learning_rate": 7.084014447471769e-05, "loss": 0.777, "step": 113730 }, { "epoch": 0.7266524411279915, "grad_norm": 0.726379930973053, "learning_rate": 7.083558330970532e-05, "loss": 0.7009, "step": 113740 }, { "epoch": 0.7267163282777303, "grad_norm": 0.8821578621864319, "learning_rate": 7.083102193486058e-05, "loss": 1.1292, "step": 113750 }, { "epoch": 0.726780215427469, "grad_norm": 0.8552307486534119, "learning_rate": 7.082646035022946e-05, "loss": 0.936, "step": 113760 }, { "epoch": 0.7268441025772077, "grad_norm": 0.9332167506217957, "learning_rate": 7.082189855585784e-05, "loss": 0.9092, "step": 113770 }, { "epoch": 0.7269079897269464, "grad_norm": 0.7814182043075562, "learning_rate": 7.081733655179171e-05, "loss": 1.0203, "step": 113780 }, { "epoch": 0.7269718768766851, "grad_norm": 1.0449836254119873, "learning_rate": 7.081277433807697e-05, "loss": 0.8905, "step": 113790 }, { "epoch": 0.7270357640264237, "grad_norm": 0.9948442578315735, "learning_rate": 7.080821191475962e-05, "loss": 0.8614, "step": 113800 }, { "epoch": 0.7270996511761624, "grad_norm": 0.6735957860946655, "learning_rate": 7.080364928188555e-05, "loss": 0.7434, "step": 113810 }, { "epoch": 0.7271635383259011, "grad_norm": 1.021897554397583, "learning_rate": 7.079908643950072e-05, "loss": 0.8579, "step": 113820 }, { "epoch": 0.7272274254756398, "grad_norm": 0.7740781903266907, "learning_rate": 7.079452338765112e-05, "loss": 0.9425, "step": 113830 }, { "epoch": 0.7272913126253785, "grad_norm": 0.5648607015609741, "learning_rate": 7.078996012638268e-05, "loss": 0.8408, "step": 113840 }, { "epoch": 0.7273551997751172, "grad_norm": 0.5744165182113647, "learning_rate": 7.078539665574135e-05, "loss": 0.8827, "step": 113850 }, { "epoch": 0.7274190869248559, "grad_norm": 0.5075027346611023, "learning_rate": 7.07808329757731e-05, "loss": 0.741, "step": 113860 }, { "epoch": 0.7274829740745946, "grad_norm": 2.1645233631134033, "learning_rate": 7.077626908652387e-05, "loss": 0.8138, "step": 113870 }, { "epoch": 0.7275468612243333, "grad_norm": 1.198243498802185, "learning_rate": 7.077170498803964e-05, "loss": 1.0056, "step": 113880 }, { "epoch": 0.727610748374072, "grad_norm": 0.8382686376571655, "learning_rate": 7.076714068036639e-05, "loss": 0.8976, "step": 113890 }, { "epoch": 0.7276746355238107, "grad_norm": 0.751120388507843, "learning_rate": 7.076257616355003e-05, "loss": 0.918, "step": 113900 }, { "epoch": 0.7277385226735494, "grad_norm": 0.7622794508934021, "learning_rate": 7.075801143763658e-05, "loss": 1.125, "step": 113910 }, { "epoch": 0.7278024098232881, "grad_norm": 0.7073959112167358, "learning_rate": 7.0753446502672e-05, "loss": 0.8341, "step": 113920 }, { "epoch": 0.7278662969730268, "grad_norm": 0.6458455324172974, "learning_rate": 7.074888135870227e-05, "loss": 1.1074, "step": 113930 }, { "epoch": 0.7279301841227656, "grad_norm": 0.837853729724884, "learning_rate": 7.074431600577335e-05, "loss": 0.8365, "step": 113940 }, { "epoch": 0.7279940712725043, "grad_norm": 0.8658714890480042, "learning_rate": 7.073975044393121e-05, "loss": 0.9752, "step": 113950 }, { "epoch": 0.728057958422243, "grad_norm": 1.0802479982376099, "learning_rate": 7.073518467322186e-05, "loss": 0.7144, "step": 113960 }, { "epoch": 0.7281218455719817, "grad_norm": 0.8560570478439331, "learning_rate": 7.073061869369124e-05, "loss": 0.6664, "step": 113970 }, { "epoch": 0.7281857327217204, "grad_norm": 0.9239840507507324, "learning_rate": 7.072605250538536e-05, "loss": 1.1588, "step": 113980 }, { "epoch": 0.7282496198714591, "grad_norm": 0.7781822085380554, "learning_rate": 7.07214861083502e-05, "loss": 1.1213, "step": 113990 }, { "epoch": 0.7283135070211978, "grad_norm": 0.6489850282669067, "learning_rate": 7.071691950263177e-05, "loss": 0.9027, "step": 114000 }, { "epoch": 0.7283773941709365, "grad_norm": 0.8504793047904968, "learning_rate": 7.071235268827601e-05, "loss": 1.1383, "step": 114010 }, { "epoch": 0.7284412813206752, "grad_norm": 0.9675794243812561, "learning_rate": 7.070778566532896e-05, "loss": 1.1451, "step": 114020 }, { "epoch": 0.7285051684704139, "grad_norm": 0.9038040041923523, "learning_rate": 7.070321843383659e-05, "loss": 0.9998, "step": 114030 }, { "epoch": 0.7285690556201526, "grad_norm": 0.6558981537818909, "learning_rate": 7.06986509938449e-05, "loss": 0.8363, "step": 114040 }, { "epoch": 0.7286329427698912, "grad_norm": 0.9019342660903931, "learning_rate": 7.069408334539987e-05, "loss": 0.992, "step": 114050 }, { "epoch": 0.7286968299196299, "grad_norm": 0.7221403121948242, "learning_rate": 7.068951548854755e-05, "loss": 0.8516, "step": 114060 }, { "epoch": 0.7287607170693686, "grad_norm": 1.507217288017273, "learning_rate": 7.068494742333388e-05, "loss": 0.7527, "step": 114070 }, { "epoch": 0.7288246042191073, "grad_norm": 0.8516684770584106, "learning_rate": 7.06803791498049e-05, "loss": 0.6731, "step": 114080 }, { "epoch": 0.728888491368846, "grad_norm": 1.2646251916885376, "learning_rate": 7.067581066800661e-05, "loss": 0.8718, "step": 114090 }, { "epoch": 0.7289523785185847, "grad_norm": 0.7315905094146729, "learning_rate": 7.067124197798504e-05, "loss": 1.0181, "step": 114100 }, { "epoch": 0.7290162656683234, "grad_norm": 1.3411294221878052, "learning_rate": 7.066667307978617e-05, "loss": 0.8343, "step": 114110 }, { "epoch": 0.7290801528180622, "grad_norm": 1.737226963043213, "learning_rate": 7.0662103973456e-05, "loss": 0.9154, "step": 114120 }, { "epoch": 0.7291440399678009, "grad_norm": 0.9302464127540588, "learning_rate": 7.065753465904059e-05, "loss": 0.7609, "step": 114130 }, { "epoch": 0.7292079271175396, "grad_norm": 0.7784197926521301, "learning_rate": 7.065296513658594e-05, "loss": 1.1499, "step": 114140 }, { "epoch": 0.7292718142672783, "grad_norm": 1.0966049432754517, "learning_rate": 7.064839540613805e-05, "loss": 0.8757, "step": 114150 }, { "epoch": 0.729335701417017, "grad_norm": 0.7327684760093689, "learning_rate": 7.064382546774297e-05, "loss": 0.8503, "step": 114160 }, { "epoch": 0.7293995885667557, "grad_norm": 0.817319393157959, "learning_rate": 7.063925532144668e-05, "loss": 1.0078, "step": 114170 }, { "epoch": 0.7294634757164944, "grad_norm": 0.5275333523750305, "learning_rate": 7.063468496729526e-05, "loss": 1.1289, "step": 114180 }, { "epoch": 0.7295273628662331, "grad_norm": 0.9112656712532043, "learning_rate": 7.06301144053347e-05, "loss": 0.9515, "step": 114190 }, { "epoch": 0.7295912500159718, "grad_norm": 0.7202227711677551, "learning_rate": 7.062554363561105e-05, "loss": 1.0629, "step": 114200 }, { "epoch": 0.7296551371657105, "grad_norm": 0.6695742011070251, "learning_rate": 7.062097265817031e-05, "loss": 0.8514, "step": 114210 }, { "epoch": 0.7297190243154492, "grad_norm": 1.3881930112838745, "learning_rate": 7.061640147305856e-05, "loss": 0.8752, "step": 114220 }, { "epoch": 0.7297829114651879, "grad_norm": 0.9712892770767212, "learning_rate": 7.06118300803218e-05, "loss": 0.8757, "step": 114230 }, { "epoch": 0.7298467986149266, "grad_norm": 0.7274371981620789, "learning_rate": 7.060725848000607e-05, "loss": 0.8209, "step": 114240 }, { "epoch": 0.7299106857646653, "grad_norm": 1.0749263763427734, "learning_rate": 7.060268667215743e-05, "loss": 1.059, "step": 114250 }, { "epoch": 0.729974572914404, "grad_norm": 0.828329861164093, "learning_rate": 7.059811465682192e-05, "loss": 1.3196, "step": 114260 }, { "epoch": 0.7300384600641427, "grad_norm": 1.0756471157073975, "learning_rate": 7.059354243404555e-05, "loss": 0.9988, "step": 114270 }, { "epoch": 0.7301023472138815, "grad_norm": 1.016433835029602, "learning_rate": 7.05889700038744e-05, "loss": 0.6839, "step": 114280 }, { "epoch": 0.73016623436362, "grad_norm": 0.9101114273071289, "learning_rate": 7.058439736635454e-05, "loss": 1.0044, "step": 114290 }, { "epoch": 0.7302301215133588, "grad_norm": 1.0833832025527954, "learning_rate": 7.057982452153196e-05, "loss": 1.0722, "step": 114300 }, { "epoch": 0.7302940086630975, "grad_norm": 1.2768323421478271, "learning_rate": 7.057525146945276e-05, "loss": 1.0266, "step": 114310 }, { "epoch": 0.7303578958128362, "grad_norm": 0.776053786277771, "learning_rate": 7.057067821016297e-05, "loss": 0.9476, "step": 114320 }, { "epoch": 0.7304217829625749, "grad_norm": 0.6913490295410156, "learning_rate": 7.056610474370865e-05, "loss": 0.8089, "step": 114330 }, { "epoch": 0.7304856701123136, "grad_norm": 0.6486290693283081, "learning_rate": 7.056153107013588e-05, "loss": 0.813, "step": 114340 }, { "epoch": 0.7305495572620523, "grad_norm": 0.7323374152183533, "learning_rate": 7.05569571894907e-05, "loss": 0.9397, "step": 114350 }, { "epoch": 0.730613444411791, "grad_norm": 1.247586727142334, "learning_rate": 7.055238310181915e-05, "loss": 0.8101, "step": 114360 }, { "epoch": 0.7306773315615297, "grad_norm": 1.2062445878982544, "learning_rate": 7.054780880716733e-05, "loss": 0.7138, "step": 114370 }, { "epoch": 0.7307412187112684, "grad_norm": 2.156268358230591, "learning_rate": 7.054323430558132e-05, "loss": 0.8173, "step": 114380 }, { "epoch": 0.7308051058610071, "grad_norm": 1.4800580739974976, "learning_rate": 7.053865959710717e-05, "loss": 0.8313, "step": 114390 }, { "epoch": 0.7308689930107458, "grad_norm": 0.7692814469337463, "learning_rate": 7.053408468179093e-05, "loss": 0.7696, "step": 114400 }, { "epoch": 0.7309328801604845, "grad_norm": 1.617647409439087, "learning_rate": 7.052950955967869e-05, "loss": 0.8307, "step": 114410 }, { "epoch": 0.7309967673102232, "grad_norm": 1.1893336772918701, "learning_rate": 7.052493423081655e-05, "loss": 0.8407, "step": 114420 }, { "epoch": 0.7310606544599619, "grad_norm": 1.0456907749176025, "learning_rate": 7.052035869525053e-05, "loss": 1.0971, "step": 114430 }, { "epoch": 0.7311245416097006, "grad_norm": 0.8813536167144775, "learning_rate": 7.051578295302676e-05, "loss": 0.9145, "step": 114440 }, { "epoch": 0.7311884287594393, "grad_norm": 0.9212775826454163, "learning_rate": 7.051120700419131e-05, "loss": 0.9651, "step": 114450 }, { "epoch": 0.731252315909178, "grad_norm": 0.7198718786239624, "learning_rate": 7.050663084879027e-05, "loss": 0.844, "step": 114460 }, { "epoch": 0.7313162030589168, "grad_norm": 0.91295325756073, "learning_rate": 7.050205448686971e-05, "loss": 0.7403, "step": 114470 }, { "epoch": 0.7313800902086555, "grad_norm": 1.0438035726547241, "learning_rate": 7.049747791847574e-05, "loss": 0.893, "step": 114480 }, { "epoch": 0.7314439773583942, "grad_norm": 0.6840182542800903, "learning_rate": 7.049290114365441e-05, "loss": 0.7915, "step": 114490 }, { "epoch": 0.7315078645081329, "grad_norm": 0.7132487893104553, "learning_rate": 7.048832416245185e-05, "loss": 1.0361, "step": 114500 }, { "epoch": 0.7315717516578716, "grad_norm": 1.2934064865112305, "learning_rate": 7.048374697491414e-05, "loss": 0.9494, "step": 114510 }, { "epoch": 0.7316356388076103, "grad_norm": 1.7626994848251343, "learning_rate": 7.047916958108737e-05, "loss": 1.0967, "step": 114520 }, { "epoch": 0.7316995259573489, "grad_norm": 1.077722191810608, "learning_rate": 7.047459198101766e-05, "loss": 0.8701, "step": 114530 }, { "epoch": 0.7317634131070876, "grad_norm": 0.6629199981689453, "learning_rate": 7.047001417475109e-05, "loss": 0.9575, "step": 114540 }, { "epoch": 0.7318273002568263, "grad_norm": 0.9705789089202881, "learning_rate": 7.046543616233376e-05, "loss": 0.6918, "step": 114550 }, { "epoch": 0.731891187406565, "grad_norm": 0.897789478302002, "learning_rate": 7.046085794381179e-05, "loss": 0.9453, "step": 114560 }, { "epoch": 0.7319550745563037, "grad_norm": 0.7040274143218994, "learning_rate": 7.045627951923127e-05, "loss": 0.9701, "step": 114570 }, { "epoch": 0.7320189617060424, "grad_norm": 1.2493051290512085, "learning_rate": 7.045170088863834e-05, "loss": 1.0576, "step": 114580 }, { "epoch": 0.7320828488557811, "grad_norm": 1.6483665704727173, "learning_rate": 7.044712205207907e-05, "loss": 0.9102, "step": 114590 }, { "epoch": 0.7321467360055198, "grad_norm": 0.8068165183067322, "learning_rate": 7.044254300959958e-05, "loss": 0.9693, "step": 114600 }, { "epoch": 0.7322106231552585, "grad_norm": 1.0065369606018066, "learning_rate": 7.043796376124602e-05, "loss": 1.0352, "step": 114610 }, { "epoch": 0.7322745103049972, "grad_norm": 0.5376549959182739, "learning_rate": 7.043338430706448e-05, "loss": 1.0502, "step": 114620 }, { "epoch": 0.732338397454736, "grad_norm": 0.7183248400688171, "learning_rate": 7.042880464710106e-05, "loss": 0.6899, "step": 114630 }, { "epoch": 0.7324022846044747, "grad_norm": 0.9804075956344604, "learning_rate": 7.042422478140194e-05, "loss": 0.9902, "step": 114640 }, { "epoch": 0.7324661717542134, "grad_norm": 1.1558316946029663, "learning_rate": 7.041964471001318e-05, "loss": 0.8413, "step": 114650 }, { "epoch": 0.7325300589039521, "grad_norm": 0.6205730438232422, "learning_rate": 7.041506443298093e-05, "loss": 0.8944, "step": 114660 }, { "epoch": 0.7325939460536908, "grad_norm": 1.1276192665100098, "learning_rate": 7.041048395035135e-05, "loss": 0.7895, "step": 114670 }, { "epoch": 0.7326578332034295, "grad_norm": 0.9537439942359924, "learning_rate": 7.040590326217052e-05, "loss": 0.875, "step": 114680 }, { "epoch": 0.7327217203531682, "grad_norm": 0.7265706062316895, "learning_rate": 7.040132236848457e-05, "loss": 1.1706, "step": 114690 }, { "epoch": 0.7327856075029069, "grad_norm": 0.9442006945610046, "learning_rate": 7.039674126933969e-05, "loss": 0.9093, "step": 114700 }, { "epoch": 0.7328494946526456, "grad_norm": 1.7052466869354248, "learning_rate": 7.039215996478195e-05, "loss": 0.7726, "step": 114710 }, { "epoch": 0.7329133818023843, "grad_norm": 0.691369891166687, "learning_rate": 7.038757845485754e-05, "loss": 0.7376, "step": 114720 }, { "epoch": 0.732977268952123, "grad_norm": 1.4131755828857422, "learning_rate": 7.038299673961258e-05, "loss": 1.1266, "step": 114730 }, { "epoch": 0.7330411561018617, "grad_norm": 1.0800511837005615, "learning_rate": 7.037841481909319e-05, "loss": 0.8857, "step": 114740 }, { "epoch": 0.7331050432516004, "grad_norm": 0.9274218678474426, "learning_rate": 7.037383269334555e-05, "loss": 0.9862, "step": 114750 }, { "epoch": 0.7331689304013391, "grad_norm": 1.05172860622406, "learning_rate": 7.036925036241578e-05, "loss": 1.024, "step": 114760 }, { "epoch": 0.7332328175510778, "grad_norm": 1.1139484643936157, "learning_rate": 7.036466782635003e-05, "loss": 0.7455, "step": 114770 }, { "epoch": 0.7332967047008164, "grad_norm": 0.9190326929092407, "learning_rate": 7.036008508519446e-05, "loss": 0.7979, "step": 114780 }, { "epoch": 0.7333605918505551, "grad_norm": 0.7148388028144836, "learning_rate": 7.03555021389952e-05, "loss": 0.8942, "step": 114790 }, { "epoch": 0.7334244790002938, "grad_norm": 1.5679404735565186, "learning_rate": 7.035091898779846e-05, "loss": 0.7687, "step": 114800 }, { "epoch": 0.7334883661500325, "grad_norm": 1.5291404724121094, "learning_rate": 7.034633563165034e-05, "loss": 0.7391, "step": 114810 }, { "epoch": 0.7335522532997713, "grad_norm": 1.019393801689148, "learning_rate": 7.034175207059704e-05, "loss": 0.7867, "step": 114820 }, { "epoch": 0.73361614044951, "grad_norm": 0.5086638331413269, "learning_rate": 7.033716830468467e-05, "loss": 0.8308, "step": 114830 }, { "epoch": 0.7336800275992487, "grad_norm": 0.7575461268424988, "learning_rate": 7.033258433395944e-05, "loss": 0.6507, "step": 114840 }, { "epoch": 0.7337439147489874, "grad_norm": 1.0149903297424316, "learning_rate": 7.032800015846749e-05, "loss": 0.9033, "step": 114850 }, { "epoch": 0.7338078018987261, "grad_norm": 1.2490330934524536, "learning_rate": 7.032341577825499e-05, "loss": 0.7165, "step": 114860 }, { "epoch": 0.7338716890484648, "grad_norm": 0.9309149384498596, "learning_rate": 7.031883119336811e-05, "loss": 0.7779, "step": 114870 }, { "epoch": 0.7339355761982035, "grad_norm": 1.0880928039550781, "learning_rate": 7.031424640385303e-05, "loss": 0.8678, "step": 114880 }, { "epoch": 0.7339994633479422, "grad_norm": 1.2819446325302124, "learning_rate": 7.03096614097559e-05, "loss": 0.9925, "step": 114890 }, { "epoch": 0.7340633504976809, "grad_norm": 0.588641345500946, "learning_rate": 7.030507621112293e-05, "loss": 0.6344, "step": 114900 }, { "epoch": 0.7341272376474196, "grad_norm": 0.7878732085227966, "learning_rate": 7.030049080800025e-05, "loss": 0.7343, "step": 114910 }, { "epoch": 0.7341911247971583, "grad_norm": 1.4143778085708618, "learning_rate": 7.029590520043409e-05, "loss": 0.7973, "step": 114920 }, { "epoch": 0.734255011946897, "grad_norm": 2.662449598312378, "learning_rate": 7.02913193884706e-05, "loss": 0.8654, "step": 114930 }, { "epoch": 0.7343188990966357, "grad_norm": 0.9117518663406372, "learning_rate": 7.028673337215596e-05, "loss": 0.8811, "step": 114940 }, { "epoch": 0.7343827862463744, "grad_norm": 1.0859434604644775, "learning_rate": 7.028214715153636e-05, "loss": 0.8603, "step": 114950 }, { "epoch": 0.7344466733961131, "grad_norm": 0.9327182173728943, "learning_rate": 7.027756072665798e-05, "loss": 1.0731, "step": 114960 }, { "epoch": 0.7345105605458518, "grad_norm": 0.760019838809967, "learning_rate": 7.027297409756706e-05, "loss": 0.8329, "step": 114970 }, { "epoch": 0.7345744476955905, "grad_norm": 1.3158267736434937, "learning_rate": 7.026838726430972e-05, "loss": 1.0469, "step": 114980 }, { "epoch": 0.7346383348453293, "grad_norm": 0.855215311050415, "learning_rate": 7.026380022693219e-05, "loss": 1.0875, "step": 114990 }, { "epoch": 0.734702221995068, "grad_norm": 0.7679759860038757, "learning_rate": 7.025921298548069e-05, "loss": 0.7983, "step": 115000 }, { "epoch": 0.7347661091448067, "grad_norm": 0.7040578722953796, "learning_rate": 7.025462554000136e-05, "loss": 0.8196, "step": 115010 }, { "epoch": 0.7348299962945453, "grad_norm": 0.7212196588516235, "learning_rate": 7.025003789054044e-05, "loss": 0.9989, "step": 115020 }, { "epoch": 0.734893883444284, "grad_norm": 0.892850935459137, "learning_rate": 7.024545003714411e-05, "loss": 1.2155, "step": 115030 }, { "epoch": 0.7349577705940227, "grad_norm": 0.9270761013031006, "learning_rate": 7.02408619798586e-05, "loss": 0.8714, "step": 115040 }, { "epoch": 0.7350216577437614, "grad_norm": 0.827022910118103, "learning_rate": 7.023627371873008e-05, "loss": 0.7664, "step": 115050 }, { "epoch": 0.7350855448935001, "grad_norm": 0.9258151650428772, "learning_rate": 7.023168525380479e-05, "loss": 0.8016, "step": 115060 }, { "epoch": 0.7351494320432388, "grad_norm": 0.732083797454834, "learning_rate": 7.022709658512892e-05, "loss": 0.7217, "step": 115070 }, { "epoch": 0.7352133191929775, "grad_norm": 1.1232346296310425, "learning_rate": 7.02225077127487e-05, "loss": 0.9083, "step": 115080 }, { "epoch": 0.7352772063427162, "grad_norm": 0.9207686185836792, "learning_rate": 7.021791863671032e-05, "loss": 0.9492, "step": 115090 }, { "epoch": 0.7353410934924549, "grad_norm": 1.1235870122909546, "learning_rate": 7.021332935706e-05, "loss": 1.0061, "step": 115100 }, { "epoch": 0.7354049806421936, "grad_norm": 0.5921577215194702, "learning_rate": 7.020873987384398e-05, "loss": 1.0309, "step": 115110 }, { "epoch": 0.7354688677919323, "grad_norm": 0.9022099375724792, "learning_rate": 7.020415018710846e-05, "loss": 0.956, "step": 115120 }, { "epoch": 0.735532754941671, "grad_norm": 0.7757014036178589, "learning_rate": 7.019956029689968e-05, "loss": 1.2207, "step": 115130 }, { "epoch": 0.7355966420914097, "grad_norm": 0.7293660640716553, "learning_rate": 7.019497020326384e-05, "loss": 0.8932, "step": 115140 }, { "epoch": 0.7356605292411484, "grad_norm": 0.7669858932495117, "learning_rate": 7.019037990624718e-05, "loss": 0.8368, "step": 115150 }, { "epoch": 0.7357244163908871, "grad_norm": 1.5827726125717163, "learning_rate": 7.018578940589592e-05, "loss": 0.7371, "step": 115160 }, { "epoch": 0.7357883035406259, "grad_norm": 1.0692715644836426, "learning_rate": 7.018119870225632e-05, "loss": 0.9331, "step": 115170 }, { "epoch": 0.7358521906903646, "grad_norm": 1.1816247701644897, "learning_rate": 7.017660779537458e-05, "loss": 0.7824, "step": 115180 }, { "epoch": 0.7359160778401033, "grad_norm": 1.0669434070587158, "learning_rate": 7.017201668529695e-05, "loss": 0.8516, "step": 115190 }, { "epoch": 0.735979964989842, "grad_norm": 1.2087671756744385, "learning_rate": 7.016742537206965e-05, "loss": 1.0443, "step": 115200 }, { "epoch": 0.7360438521395807, "grad_norm": 0.8520811200141907, "learning_rate": 7.016283385573893e-05, "loss": 0.7544, "step": 115210 }, { "epoch": 0.7361077392893194, "grad_norm": 0.6800863146781921, "learning_rate": 7.015824213635104e-05, "loss": 1.0469, "step": 115220 }, { "epoch": 0.7361716264390581, "grad_norm": 1.1106778383255005, "learning_rate": 7.01536502139522e-05, "loss": 1.0884, "step": 115230 }, { "epoch": 0.7362355135887968, "grad_norm": 1.0716586112976074, "learning_rate": 7.014905808858868e-05, "loss": 0.8972, "step": 115240 }, { "epoch": 0.7362994007385355, "grad_norm": 0.961650550365448, "learning_rate": 7.01444657603067e-05, "loss": 0.7001, "step": 115250 }, { "epoch": 0.7363632878882741, "grad_norm": 0.7025936245918274, "learning_rate": 7.013987322915252e-05, "loss": 0.9839, "step": 115260 }, { "epoch": 0.7364271750380128, "grad_norm": 0.7885773181915283, "learning_rate": 7.013528049517241e-05, "loss": 0.8556, "step": 115270 }, { "epoch": 0.7364910621877515, "grad_norm": 0.9782485365867615, "learning_rate": 7.013068755841258e-05, "loss": 0.7966, "step": 115280 }, { "epoch": 0.7365549493374902, "grad_norm": 1.2593889236450195, "learning_rate": 7.012609441891934e-05, "loss": 0.7664, "step": 115290 }, { "epoch": 0.7366188364872289, "grad_norm": 0.7124470472335815, "learning_rate": 7.01215010767389e-05, "loss": 1.2127, "step": 115300 }, { "epoch": 0.7366827236369676, "grad_norm": 1.3822424411773682, "learning_rate": 7.011690753191754e-05, "loss": 1.2461, "step": 115310 }, { "epoch": 0.7367466107867063, "grad_norm": 1.3084379434585571, "learning_rate": 7.011231378450152e-05, "loss": 0.7628, "step": 115320 }, { "epoch": 0.736810497936445, "grad_norm": 1.2158416509628296, "learning_rate": 7.01077198345371e-05, "loss": 0.7993, "step": 115330 }, { "epoch": 0.7368743850861837, "grad_norm": 1.6002458333969116, "learning_rate": 7.010312568207055e-05, "loss": 0.9804, "step": 115340 }, { "epoch": 0.7369382722359225, "grad_norm": 1.1068108081817627, "learning_rate": 7.009853132714812e-05, "loss": 0.7037, "step": 115350 }, { "epoch": 0.7370021593856612, "grad_norm": 0.4763220548629761, "learning_rate": 7.00939367698161e-05, "loss": 0.6586, "step": 115360 }, { "epoch": 0.7370660465353999, "grad_norm": 0.9020888805389404, "learning_rate": 7.008934201012076e-05, "loss": 1.0595, "step": 115370 }, { "epoch": 0.7371299336851386, "grad_norm": 1.2904754877090454, "learning_rate": 7.008474704810835e-05, "loss": 0.8614, "step": 115380 }, { "epoch": 0.7371938208348773, "grad_norm": 1.1419029235839844, "learning_rate": 7.008015188382517e-05, "loss": 0.7458, "step": 115390 }, { "epoch": 0.737257707984616, "grad_norm": 0.887784481048584, "learning_rate": 7.00755565173175e-05, "loss": 0.7534, "step": 115400 }, { "epoch": 0.7373215951343547, "grad_norm": 0.8305013179779053, "learning_rate": 7.007096094863159e-05, "loss": 0.9282, "step": 115410 }, { "epoch": 0.7373854822840934, "grad_norm": 0.9823849201202393, "learning_rate": 7.006636517781376e-05, "loss": 1.0846, "step": 115420 }, { "epoch": 0.7374493694338321, "grad_norm": 1.4131437540054321, "learning_rate": 7.006176920491025e-05, "loss": 0.7773, "step": 115430 }, { "epoch": 0.7375132565835708, "grad_norm": 0.7678616642951965, "learning_rate": 7.005717302996739e-05, "loss": 1.084, "step": 115440 }, { "epoch": 0.7375771437333095, "grad_norm": 0.655906617641449, "learning_rate": 7.005257665303142e-05, "loss": 0.9624, "step": 115450 }, { "epoch": 0.7376410308830482, "grad_norm": 0.9372738003730774, "learning_rate": 7.004798007414867e-05, "loss": 0.8082, "step": 115460 }, { "epoch": 0.7377049180327869, "grad_norm": 1.036291480064392, "learning_rate": 7.004338329336541e-05, "loss": 1.0762, "step": 115470 }, { "epoch": 0.7377688051825256, "grad_norm": 0.8642080426216125, "learning_rate": 7.003878631072794e-05, "loss": 0.9366, "step": 115480 }, { "epoch": 0.7378326923322643, "grad_norm": 1.0046378374099731, "learning_rate": 7.003418912628257e-05, "loss": 0.9384, "step": 115490 }, { "epoch": 0.7378965794820029, "grad_norm": 0.8233842849731445, "learning_rate": 7.002959174007558e-05, "loss": 0.6712, "step": 115500 }, { "epoch": 0.7379604666317416, "grad_norm": 1.354340672492981, "learning_rate": 7.002499415215325e-05, "loss": 0.9442, "step": 115510 }, { "epoch": 0.7380243537814803, "grad_norm": 1.4941532611846924, "learning_rate": 7.002039636256192e-05, "loss": 0.7647, "step": 115520 }, { "epoch": 0.738088240931219, "grad_norm": 1.0750895738601685, "learning_rate": 7.001579837134789e-05, "loss": 1.0533, "step": 115530 }, { "epoch": 0.7381521280809578, "grad_norm": 1.1024094820022583, "learning_rate": 7.001120017855745e-05, "loss": 0.959, "step": 115540 }, { "epoch": 0.7382160152306965, "grad_norm": 1.5375107526779175, "learning_rate": 7.000660178423691e-05, "loss": 0.9801, "step": 115550 }, { "epoch": 0.7382799023804352, "grad_norm": 0.8644910454750061, "learning_rate": 7.000200318843258e-05, "loss": 0.9112, "step": 115560 }, { "epoch": 0.7383437895301739, "grad_norm": 0.8466353416442871, "learning_rate": 6.999740439119078e-05, "loss": 1.028, "step": 115570 }, { "epoch": 0.7384076766799126, "grad_norm": 0.8331221342086792, "learning_rate": 6.99928053925578e-05, "loss": 0.7816, "step": 115580 }, { "epoch": 0.7384715638296513, "grad_norm": 1.330575704574585, "learning_rate": 6.998820619257999e-05, "loss": 1.1076, "step": 115590 }, { "epoch": 0.73853545097939, "grad_norm": 1.0668553113937378, "learning_rate": 6.998360679130364e-05, "loss": 0.7225, "step": 115600 }, { "epoch": 0.7385993381291287, "grad_norm": 2.099946975708008, "learning_rate": 6.997900718877509e-05, "loss": 0.8623, "step": 115610 }, { "epoch": 0.7386632252788674, "grad_norm": 1.1908918619155884, "learning_rate": 6.997440738504065e-05, "loss": 0.9463, "step": 115620 }, { "epoch": 0.7387271124286061, "grad_norm": 0.8096728920936584, "learning_rate": 6.996980738014665e-05, "loss": 0.7725, "step": 115630 }, { "epoch": 0.7387909995783448, "grad_norm": 0.7145434617996216, "learning_rate": 6.996520717413939e-05, "loss": 0.9579, "step": 115640 }, { "epoch": 0.7388548867280835, "grad_norm": 2.0472443103790283, "learning_rate": 6.996060676706525e-05, "loss": 1.1305, "step": 115650 }, { "epoch": 0.7389187738778222, "grad_norm": 0.7645730376243591, "learning_rate": 6.995600615897052e-05, "loss": 1.1646, "step": 115660 }, { "epoch": 0.7389826610275609, "grad_norm": 0.8512725234031677, "learning_rate": 6.995140534990155e-05, "loss": 0.9598, "step": 115670 }, { "epoch": 0.7390465481772996, "grad_norm": 1.5269567966461182, "learning_rate": 6.994680433990466e-05, "loss": 0.6954, "step": 115680 }, { "epoch": 0.7391104353270384, "grad_norm": 1.0922000408172607, "learning_rate": 6.99422031290262e-05, "loss": 0.6855, "step": 115690 }, { "epoch": 0.7391743224767771, "grad_norm": 0.9279537796974182, "learning_rate": 6.993760171731251e-05, "loss": 1.0426, "step": 115700 }, { "epoch": 0.7392382096265158, "grad_norm": 1.6063908338546753, "learning_rate": 6.993300010480991e-05, "loss": 0.779, "step": 115710 }, { "epoch": 0.7393020967762545, "grad_norm": 2.612882614135742, "learning_rate": 6.992839829156475e-05, "loss": 0.9478, "step": 115720 }, { "epoch": 0.7393659839259932, "grad_norm": 1.1300225257873535, "learning_rate": 6.992379627762339e-05, "loss": 0.8534, "step": 115730 }, { "epoch": 0.7394298710757319, "grad_norm": 0.7757554650306702, "learning_rate": 6.991919406303216e-05, "loss": 1.0868, "step": 115740 }, { "epoch": 0.7394937582254705, "grad_norm": 0.7098391652107239, "learning_rate": 6.991459164783741e-05, "loss": 0.8308, "step": 115750 }, { "epoch": 0.7395576453752092, "grad_norm": 0.7299323678016663, "learning_rate": 6.99099890320855e-05, "loss": 0.9169, "step": 115760 }, { "epoch": 0.7396215325249479, "grad_norm": 0.8165879249572754, "learning_rate": 6.990538621582278e-05, "loss": 0.8462, "step": 115770 }, { "epoch": 0.7396854196746866, "grad_norm": 0.8184595108032227, "learning_rate": 6.990078319909559e-05, "loss": 1.0554, "step": 115780 }, { "epoch": 0.7397493068244253, "grad_norm": 0.7778357863426208, "learning_rate": 6.989617998195032e-05, "loss": 0.9903, "step": 115790 }, { "epoch": 0.739813193974164, "grad_norm": 0.9583641290664673, "learning_rate": 6.989157656443327e-05, "loss": 0.9248, "step": 115800 }, { "epoch": 0.7398770811239027, "grad_norm": 1.5333011150360107, "learning_rate": 6.988697294659085e-05, "loss": 0.9296, "step": 115810 }, { "epoch": 0.7399409682736414, "grad_norm": 0.9242226481437683, "learning_rate": 6.98823691284694e-05, "loss": 0.9196, "step": 115820 }, { "epoch": 0.7400048554233801, "grad_norm": 1.562110185623169, "learning_rate": 6.98777651101153e-05, "loss": 0.8195, "step": 115830 }, { "epoch": 0.7400687425731188, "grad_norm": 0.8381466865539551, "learning_rate": 6.987316089157492e-05, "loss": 0.757, "step": 115840 }, { "epoch": 0.7401326297228575, "grad_norm": 0.7180655002593994, "learning_rate": 6.986855647289461e-05, "loss": 0.7828, "step": 115850 }, { "epoch": 0.7401965168725962, "grad_norm": 1.6034409999847412, "learning_rate": 6.986395185412073e-05, "loss": 1.4035, "step": 115860 }, { "epoch": 0.740260404022335, "grad_norm": 1.0359718799591064, "learning_rate": 6.985934703529969e-05, "loss": 0.8757, "step": 115870 }, { "epoch": 0.7403242911720737, "grad_norm": 0.6328555345535278, "learning_rate": 6.985474201647784e-05, "loss": 0.6099, "step": 115880 }, { "epoch": 0.7403881783218124, "grad_norm": 0.6946358680725098, "learning_rate": 6.985013679770156e-05, "loss": 0.8574, "step": 115890 }, { "epoch": 0.7404520654715511, "grad_norm": 1.2363545894622803, "learning_rate": 6.984553137901722e-05, "loss": 0.8605, "step": 115900 }, { "epoch": 0.7405159526212898, "grad_norm": 1.1174249649047852, "learning_rate": 6.984092576047123e-05, "loss": 0.7995, "step": 115910 }, { "epoch": 0.7405798397710285, "grad_norm": 0.9834713339805603, "learning_rate": 6.983631994210994e-05, "loss": 1.31, "step": 115920 }, { "epoch": 0.7406437269207672, "grad_norm": 1.397002935409546, "learning_rate": 6.983171392397975e-05, "loss": 0.8187, "step": 115930 }, { "epoch": 0.7407076140705059, "grad_norm": 1.3270031213760376, "learning_rate": 6.982710770612704e-05, "loss": 1.112, "step": 115940 }, { "epoch": 0.7407715012202446, "grad_norm": 0.6648245453834534, "learning_rate": 6.98225012885982e-05, "loss": 0.8693, "step": 115950 }, { "epoch": 0.7408353883699833, "grad_norm": 0.7026130557060242, "learning_rate": 6.981789467143965e-05, "loss": 0.8299, "step": 115960 }, { "epoch": 0.740899275519722, "grad_norm": 0.9298969507217407, "learning_rate": 6.981328785469772e-05, "loss": 0.8988, "step": 115970 }, { "epoch": 0.7409631626694607, "grad_norm": 0.921608567237854, "learning_rate": 6.980868083841887e-05, "loss": 0.7417, "step": 115980 }, { "epoch": 0.7410270498191993, "grad_norm": 0.960408091545105, "learning_rate": 6.980407362264945e-05, "loss": 0.8521, "step": 115990 }, { "epoch": 0.741090936968938, "grad_norm": 0.6170063018798828, "learning_rate": 6.979946620743587e-05, "loss": 0.8537, "step": 116000 }, { "epoch": 0.7411548241186767, "grad_norm": 1.5411709547042847, "learning_rate": 6.979485859282453e-05, "loss": 1.0145, "step": 116010 }, { "epoch": 0.7412187112684154, "grad_norm": 1.529691457748413, "learning_rate": 6.979025077886185e-05, "loss": 0.8084, "step": 116020 }, { "epoch": 0.7412825984181541, "grad_norm": 1.5334669351577759, "learning_rate": 6.978564276559423e-05, "loss": 0.6795, "step": 116030 }, { "epoch": 0.7413464855678928, "grad_norm": 0.7906901240348816, "learning_rate": 6.978103455306808e-05, "loss": 0.8419, "step": 116040 }, { "epoch": 0.7414103727176315, "grad_norm": 1.2242873907089233, "learning_rate": 6.977642614132979e-05, "loss": 0.8119, "step": 116050 }, { "epoch": 0.7414742598673703, "grad_norm": 0.705220639705658, "learning_rate": 6.977181753042577e-05, "loss": 1.2762, "step": 116060 }, { "epoch": 0.741538147017109, "grad_norm": 0.8187665343284607, "learning_rate": 6.976720872040245e-05, "loss": 0.8109, "step": 116070 }, { "epoch": 0.7416020341668477, "grad_norm": 1.5236612558364868, "learning_rate": 6.976259971130624e-05, "loss": 0.9235, "step": 116080 }, { "epoch": 0.7416659213165864, "grad_norm": 3.65586519241333, "learning_rate": 6.975799050318355e-05, "loss": 0.8398, "step": 116090 }, { "epoch": 0.7417298084663251, "grad_norm": 0.6140323877334595, "learning_rate": 6.97533810960808e-05, "loss": 0.9414, "step": 116100 }, { "epoch": 0.7417936956160638, "grad_norm": 0.6180686950683594, "learning_rate": 6.974877149004441e-05, "loss": 0.8651, "step": 116110 }, { "epoch": 0.7418575827658025, "grad_norm": 0.8425277471542358, "learning_rate": 6.97441616851208e-05, "loss": 1.0168, "step": 116120 }, { "epoch": 0.7419214699155412, "grad_norm": 0.7344639897346497, "learning_rate": 6.973955168135642e-05, "loss": 0.6021, "step": 116130 }, { "epoch": 0.7419853570652799, "grad_norm": 0.8152681589126587, "learning_rate": 6.973494147879767e-05, "loss": 0.9441, "step": 116140 }, { "epoch": 0.7420492442150186, "grad_norm": 0.810077428817749, "learning_rate": 6.973033107749098e-05, "loss": 0.7308, "step": 116150 }, { "epoch": 0.7421131313647573, "grad_norm": 0.8788096308708191, "learning_rate": 6.972572047748281e-05, "loss": 0.7761, "step": 116160 }, { "epoch": 0.742177018514496, "grad_norm": 1.0663613080978394, "learning_rate": 6.972110967881953e-05, "loss": 1.0319, "step": 116170 }, { "epoch": 0.7422409056642347, "grad_norm": 1.0700383186340332, "learning_rate": 6.971649868154764e-05, "loss": 0.7867, "step": 116180 }, { "epoch": 0.7423047928139734, "grad_norm": 1.0407042503356934, "learning_rate": 6.971188748571355e-05, "loss": 1.0002, "step": 116190 }, { "epoch": 0.7423686799637121, "grad_norm": 0.9793998599052429, "learning_rate": 6.97072760913637e-05, "loss": 1.1765, "step": 116200 }, { "epoch": 0.7424325671134508, "grad_norm": 1.643677830696106, "learning_rate": 6.970266449854452e-05, "loss": 0.9574, "step": 116210 }, { "epoch": 0.7424964542631896, "grad_norm": 0.7675092220306396, "learning_rate": 6.969805270730248e-05, "loss": 0.7953, "step": 116220 }, { "epoch": 0.7425603414129281, "grad_norm": 1.0452982187271118, "learning_rate": 6.969344071768398e-05, "loss": 0.9938, "step": 116230 }, { "epoch": 0.7426242285626669, "grad_norm": 1.2801587581634521, "learning_rate": 6.968882852973553e-05, "loss": 0.8495, "step": 116240 }, { "epoch": 0.7426881157124056, "grad_norm": 0.7818521857261658, "learning_rate": 6.968421614350352e-05, "loss": 0.9984, "step": 116250 }, { "epoch": 0.7427520028621443, "grad_norm": 1.2182539701461792, "learning_rate": 6.967960355903442e-05, "loss": 0.8587, "step": 116260 }, { "epoch": 0.742815890011883, "grad_norm": 0.7654426097869873, "learning_rate": 6.96749907763747e-05, "loss": 0.8998, "step": 116270 }, { "epoch": 0.7428797771616217, "grad_norm": 4.754692554473877, "learning_rate": 6.967037779557082e-05, "loss": 0.8934, "step": 116280 }, { "epoch": 0.7429436643113604, "grad_norm": 0.9145668745040894, "learning_rate": 6.966576461666919e-05, "loss": 1.082, "step": 116290 }, { "epoch": 0.7430075514610991, "grad_norm": 0.6794173717498779, "learning_rate": 6.96611512397163e-05, "loss": 0.82, "step": 116300 }, { "epoch": 0.7430714386108378, "grad_norm": 1.012447714805603, "learning_rate": 6.965653766475862e-05, "loss": 0.9966, "step": 116310 }, { "epoch": 0.7431353257605765, "grad_norm": 0.8482702374458313, "learning_rate": 6.96519238918426e-05, "loss": 0.8475, "step": 116320 }, { "epoch": 0.7431992129103152, "grad_norm": 0.8447071313858032, "learning_rate": 6.964730992101468e-05, "loss": 0.9462, "step": 116330 }, { "epoch": 0.7432631000600539, "grad_norm": 0.6218491792678833, "learning_rate": 6.964269575232138e-05, "loss": 0.8291, "step": 116340 }, { "epoch": 0.7433269872097926, "grad_norm": 1.0346623659133911, "learning_rate": 6.963808138580912e-05, "loss": 0.9003, "step": 116350 }, { "epoch": 0.7433908743595313, "grad_norm": 0.7092266082763672, "learning_rate": 6.96334668215244e-05, "loss": 0.906, "step": 116360 }, { "epoch": 0.74345476150927, "grad_norm": 2.0959596633911133, "learning_rate": 6.962885205951369e-05, "loss": 1.2469, "step": 116370 }, { "epoch": 0.7435186486590087, "grad_norm": 0.5212934613227844, "learning_rate": 6.962423709982345e-05, "loss": 0.6491, "step": 116380 }, { "epoch": 0.7435825358087474, "grad_norm": 0.7486282587051392, "learning_rate": 6.961962194250017e-05, "loss": 0.8276, "step": 116390 }, { "epoch": 0.7436464229584862, "grad_norm": 0.8907299041748047, "learning_rate": 6.961500658759033e-05, "loss": 0.8261, "step": 116400 }, { "epoch": 0.7437103101082249, "grad_norm": 1.0088621377944946, "learning_rate": 6.961039103514039e-05, "loss": 0.8501, "step": 116410 }, { "epoch": 0.7437741972579636, "grad_norm": 0.852279007434845, "learning_rate": 6.960577528519685e-05, "loss": 1.0004, "step": 116420 }, { "epoch": 0.7438380844077023, "grad_norm": 0.6919979453086853, "learning_rate": 6.96011593378062e-05, "loss": 0.9841, "step": 116430 }, { "epoch": 0.743901971557441, "grad_norm": 1.0231778621673584, "learning_rate": 6.959654319301492e-05, "loss": 0.9452, "step": 116440 }, { "epoch": 0.7439658587071797, "grad_norm": 1.6192152500152588, "learning_rate": 6.959238849396364e-05, "loss": 0.8734, "step": 116450 }, { "epoch": 0.7440297458569184, "grad_norm": 1.1661548614501953, "learning_rate": 6.958777197423922e-05, "loss": 0.8547, "step": 116460 }, { "epoch": 0.7440936330066571, "grad_norm": 1.9804883003234863, "learning_rate": 6.958315525724901e-05, "loss": 0.8883, "step": 116470 }, { "epoch": 0.7441575201563957, "grad_norm": 0.9184843897819519, "learning_rate": 6.957853834303946e-05, "loss": 1.0803, "step": 116480 }, { "epoch": 0.7442214073061344, "grad_norm": 0.9524401426315308, "learning_rate": 6.957392123165711e-05, "loss": 0.823, "step": 116490 }, { "epoch": 0.7442852944558731, "grad_norm": 0.7346342206001282, "learning_rate": 6.956930392314845e-05, "loss": 0.842, "step": 116500 }, { "epoch": 0.7443491816056118, "grad_norm": 0.6176126003265381, "learning_rate": 6.956468641755994e-05, "loss": 0.824, "step": 116510 }, { "epoch": 0.7444130687553505, "grad_norm": 1.0610926151275635, "learning_rate": 6.956006871493814e-05, "loss": 0.8781, "step": 116520 }, { "epoch": 0.7444769559050892, "grad_norm": 1.190373420715332, "learning_rate": 6.95554508153295e-05, "loss": 0.9477, "step": 116530 }, { "epoch": 0.7445408430548279, "grad_norm": 1.2164260149002075, "learning_rate": 6.955083271878056e-05, "loss": 0.7542, "step": 116540 }, { "epoch": 0.7446047302045666, "grad_norm": 0.9188566207885742, "learning_rate": 6.954621442533784e-05, "loss": 0.9441, "step": 116550 }, { "epoch": 0.7446686173543053, "grad_norm": 1.2796574831008911, "learning_rate": 6.954159593504781e-05, "loss": 0.738, "step": 116560 }, { "epoch": 0.744732504504044, "grad_norm": 0.8466264605522156, "learning_rate": 6.953697724795702e-05, "loss": 1.0705, "step": 116570 }, { "epoch": 0.7447963916537828, "grad_norm": 0.9667829275131226, "learning_rate": 6.953235836411194e-05, "loss": 0.9622, "step": 116580 }, { "epoch": 0.7448602788035215, "grad_norm": 3.007852792739868, "learning_rate": 6.952773928355913e-05, "loss": 0.8599, "step": 116590 }, { "epoch": 0.7449241659532602, "grad_norm": 0.6320720314979553, "learning_rate": 6.95231200063451e-05, "loss": 0.9257, "step": 116600 }, { "epoch": 0.7449880531029989, "grad_norm": 0.8152862787246704, "learning_rate": 6.951850053251636e-05, "loss": 0.7299, "step": 116610 }, { "epoch": 0.7450519402527376, "grad_norm": 0.709783673286438, "learning_rate": 6.951388086211943e-05, "loss": 0.8396, "step": 116620 }, { "epoch": 0.7451158274024763, "grad_norm": 0.844637393951416, "learning_rate": 6.950926099520084e-05, "loss": 0.5487, "step": 116630 }, { "epoch": 0.745179714552215, "grad_norm": 1.2991611957550049, "learning_rate": 6.95046409318071e-05, "loss": 0.8171, "step": 116640 }, { "epoch": 0.7452436017019537, "grad_norm": 1.085801124572754, "learning_rate": 6.950002067198475e-05, "loss": 0.9065, "step": 116650 }, { "epoch": 0.7453074888516924, "grad_norm": 0.8042502999305725, "learning_rate": 6.949540021578034e-05, "loss": 0.9323, "step": 116660 }, { "epoch": 0.7453713760014311, "grad_norm": 0.6853629946708679, "learning_rate": 6.949077956324038e-05, "loss": 0.8285, "step": 116670 }, { "epoch": 0.7454352631511698, "grad_norm": 0.7794731259346008, "learning_rate": 6.94861587144114e-05, "loss": 0.9293, "step": 116680 }, { "epoch": 0.7454991503009085, "grad_norm": 0.9132157564163208, "learning_rate": 6.948153766933995e-05, "loss": 1.0174, "step": 116690 }, { "epoch": 0.7455630374506472, "grad_norm": 1.2286196947097778, "learning_rate": 6.947691642807256e-05, "loss": 1.0193, "step": 116700 }, { "epoch": 0.7456269246003859, "grad_norm": 0.5221213102340698, "learning_rate": 6.947229499065578e-05, "loss": 0.7766, "step": 116710 }, { "epoch": 0.7456908117501245, "grad_norm": 1.4980177879333496, "learning_rate": 6.946767335713613e-05, "loss": 0.8005, "step": 116720 }, { "epoch": 0.7457546988998632, "grad_norm": 0.8484123349189758, "learning_rate": 6.946305152756017e-05, "loss": 0.6168, "step": 116730 }, { "epoch": 0.7458185860496019, "grad_norm": 0.9975723028182983, "learning_rate": 6.945842950197446e-05, "loss": 0.9931, "step": 116740 }, { "epoch": 0.7458824731993406, "grad_norm": 1.0813270807266235, "learning_rate": 6.945380728042549e-05, "loss": 0.8407, "step": 116750 }, { "epoch": 0.7459463603490794, "grad_norm": 1.2126818895339966, "learning_rate": 6.944918486295989e-05, "loss": 0.8995, "step": 116760 }, { "epoch": 0.7460102474988181, "grad_norm": 0.9279122948646545, "learning_rate": 6.944456224962417e-05, "loss": 0.819, "step": 116770 }, { "epoch": 0.7460741346485568, "grad_norm": 0.9613460302352905, "learning_rate": 6.943993944046487e-05, "loss": 1.41, "step": 116780 }, { "epoch": 0.7461380217982955, "grad_norm": 0.6390883922576904, "learning_rate": 6.943531643552857e-05, "loss": 0.7246, "step": 116790 }, { "epoch": 0.7462019089480342, "grad_norm": 1.0150036811828613, "learning_rate": 6.943069323486183e-05, "loss": 0.7269, "step": 116800 }, { "epoch": 0.7462657960977729, "grad_norm": 0.7378376722335815, "learning_rate": 6.942606983851116e-05, "loss": 0.9508, "step": 116810 }, { "epoch": 0.7463296832475116, "grad_norm": 0.9831222295761108, "learning_rate": 6.94214462465232e-05, "loss": 0.9079, "step": 116820 }, { "epoch": 0.7463935703972503, "grad_norm": 1.4024206399917603, "learning_rate": 6.941682245894446e-05, "loss": 0.8302, "step": 116830 }, { "epoch": 0.746457457546989, "grad_norm": 2.6313674449920654, "learning_rate": 6.94121984758215e-05, "loss": 0.9034, "step": 116840 }, { "epoch": 0.7465213446967277, "grad_norm": 0.7993502616882324, "learning_rate": 6.940757429720094e-05, "loss": 0.681, "step": 116850 }, { "epoch": 0.7465852318464664, "grad_norm": 1.0172010660171509, "learning_rate": 6.940294992312932e-05, "loss": 1.0379, "step": 116860 }, { "epoch": 0.7466491189962051, "grad_norm": 0.764042854309082, "learning_rate": 6.939832535365319e-05, "loss": 0.8982, "step": 116870 }, { "epoch": 0.7467130061459438, "grad_norm": 1.4996678829193115, "learning_rate": 6.939370058881914e-05, "loss": 1.0452, "step": 116880 }, { "epoch": 0.7467768932956825, "grad_norm": 0.6007105708122253, "learning_rate": 6.938907562867374e-05, "loss": 0.8955, "step": 116890 }, { "epoch": 0.7468407804454212, "grad_norm": 0.753452718257904, "learning_rate": 6.93844504732636e-05, "loss": 0.8096, "step": 116900 }, { "epoch": 0.7469046675951599, "grad_norm": 0.8642897009849548, "learning_rate": 6.937982512263528e-05, "loss": 0.7916, "step": 116910 }, { "epoch": 0.7469685547448986, "grad_norm": 0.8521572947502136, "learning_rate": 6.937519957683534e-05, "loss": 0.7633, "step": 116920 }, { "epoch": 0.7470324418946374, "grad_norm": 1.336063027381897, "learning_rate": 6.937057383591037e-05, "loss": 0.8625, "step": 116930 }, { "epoch": 0.7470963290443761, "grad_norm": 2.280611276626587, "learning_rate": 6.936594789990696e-05, "loss": 1.225, "step": 116940 }, { "epoch": 0.7471602161941148, "grad_norm": 0.5119796991348267, "learning_rate": 6.936132176887171e-05, "loss": 0.7675, "step": 116950 }, { "epoch": 0.7472241033438534, "grad_norm": 1.0470057725906372, "learning_rate": 6.93566954428512e-05, "loss": 0.9467, "step": 116960 }, { "epoch": 0.7472879904935921, "grad_norm": 0.823755145072937, "learning_rate": 6.935206892189202e-05, "loss": 0.7573, "step": 116970 }, { "epoch": 0.7473518776433308, "grad_norm": 0.7342879176139832, "learning_rate": 6.934744220604076e-05, "loss": 0.926, "step": 116980 }, { "epoch": 0.7474157647930695, "grad_norm": 0.5635343790054321, "learning_rate": 6.934281529534403e-05, "loss": 0.8136, "step": 116990 }, { "epoch": 0.7474796519428082, "grad_norm": 0.515261173248291, "learning_rate": 6.93381881898484e-05, "loss": 0.6738, "step": 117000 }, { "epoch": 0.7475435390925469, "grad_norm": 1.1176193952560425, "learning_rate": 6.93335608896005e-05, "loss": 0.7295, "step": 117010 }, { "epoch": 0.7476074262422856, "grad_norm": 0.7528888583183289, "learning_rate": 6.93289333946469e-05, "loss": 0.9808, "step": 117020 }, { "epoch": 0.7476713133920243, "grad_norm": 0.6792475581169128, "learning_rate": 6.932430570503423e-05, "loss": 0.7732, "step": 117030 }, { "epoch": 0.747735200541763, "grad_norm": 0.49816229939460754, "learning_rate": 6.931967782080908e-05, "loss": 1.0765, "step": 117040 }, { "epoch": 0.7477990876915017, "grad_norm": 0.6919913291931152, "learning_rate": 6.931504974201806e-05, "loss": 0.9868, "step": 117050 }, { "epoch": 0.7478629748412404, "grad_norm": 1.421985387802124, "learning_rate": 6.931042146870779e-05, "loss": 1.0408, "step": 117060 }, { "epoch": 0.7479268619909791, "grad_norm": 0.6791272163391113, "learning_rate": 6.930579300092487e-05, "loss": 1.0113, "step": 117070 }, { "epoch": 0.7479907491407178, "grad_norm": 0.6858437657356262, "learning_rate": 6.93011643387159e-05, "loss": 0.7507, "step": 117080 }, { "epoch": 0.7480546362904565, "grad_norm": 0.5481752753257751, "learning_rate": 6.92965354821275e-05, "loss": 0.7424, "step": 117090 }, { "epoch": 0.7481185234401952, "grad_norm": 0.6619887948036194, "learning_rate": 6.929190643120632e-05, "loss": 0.8167, "step": 117100 }, { "epoch": 0.748182410589934, "grad_norm": 1.6470102071762085, "learning_rate": 6.928727718599893e-05, "loss": 0.9335, "step": 117110 }, { "epoch": 0.7482462977396727, "grad_norm": 0.7294175028800964, "learning_rate": 6.928264774655198e-05, "loss": 0.8117, "step": 117120 }, { "epoch": 0.7483101848894114, "grad_norm": 1.0997979640960693, "learning_rate": 6.927801811291209e-05, "loss": 1.0709, "step": 117130 }, { "epoch": 0.7483740720391501, "grad_norm": 0.9277620911598206, "learning_rate": 6.927338828512588e-05, "loss": 0.8636, "step": 117140 }, { "epoch": 0.7484379591888888, "grad_norm": 1.3911194801330566, "learning_rate": 6.926875826323997e-05, "loss": 0.9657, "step": 117150 }, { "epoch": 0.7485018463386275, "grad_norm": 0.7593978047370911, "learning_rate": 6.9264128047301e-05, "loss": 0.8314, "step": 117160 }, { "epoch": 0.7485657334883662, "grad_norm": 0.8902554512023926, "learning_rate": 6.92594976373556e-05, "loss": 1.0123, "step": 117170 }, { "epoch": 0.7486296206381049, "grad_norm": 0.8889968991279602, "learning_rate": 6.925486703345038e-05, "loss": 0.9378, "step": 117180 }, { "epoch": 0.7486935077878436, "grad_norm": 0.7080551981925964, "learning_rate": 6.925023623563201e-05, "loss": 1.1536, "step": 117190 }, { "epoch": 0.7487573949375823, "grad_norm": 0.7877635955810547, "learning_rate": 6.924560524394709e-05, "loss": 0.9396, "step": 117200 }, { "epoch": 0.7488212820873209, "grad_norm": 3.540712594985962, "learning_rate": 6.924097405844227e-05, "loss": 0.9817, "step": 117210 }, { "epoch": 0.7488851692370596, "grad_norm": 0.6759431958198547, "learning_rate": 6.923634267916422e-05, "loss": 0.9781, "step": 117220 }, { "epoch": 0.7489490563867983, "grad_norm": 0.7441065311431885, "learning_rate": 6.923171110615954e-05, "loss": 0.6876, "step": 117230 }, { "epoch": 0.749012943536537, "grad_norm": 0.90581214427948, "learning_rate": 6.92270793394749e-05, "loss": 0.9416, "step": 117240 }, { "epoch": 0.7490768306862757, "grad_norm": 0.991303563117981, "learning_rate": 6.922244737915692e-05, "loss": 0.9986, "step": 117250 }, { "epoch": 0.7491407178360144, "grad_norm": 0.9509017467498779, "learning_rate": 6.921781522525229e-05, "loss": 0.9976, "step": 117260 }, { "epoch": 0.7492046049857531, "grad_norm": 1.060160756111145, "learning_rate": 6.921318287780763e-05, "loss": 0.9884, "step": 117270 }, { "epoch": 0.7492684921354918, "grad_norm": 0.8051804900169373, "learning_rate": 6.920855033686959e-05, "loss": 1.0251, "step": 117280 }, { "epoch": 0.7493323792852306, "grad_norm": 0.6516896486282349, "learning_rate": 6.920391760248482e-05, "loss": 1.0851, "step": 117290 }, { "epoch": 0.7493962664349693, "grad_norm": 1.0302486419677734, "learning_rate": 6.91992846747e-05, "loss": 0.9056, "step": 117300 }, { "epoch": 0.749460153584708, "grad_norm": 0.5640853047370911, "learning_rate": 6.919465155356177e-05, "loss": 0.8081, "step": 117310 }, { "epoch": 0.7495240407344467, "grad_norm": 0.6302241086959839, "learning_rate": 6.91900182391168e-05, "loss": 0.8306, "step": 117320 }, { "epoch": 0.7495879278841854, "grad_norm": 1.006842851638794, "learning_rate": 6.918538473141174e-05, "loss": 0.7887, "step": 117330 }, { "epoch": 0.7496518150339241, "grad_norm": 0.8143184781074524, "learning_rate": 6.918075103049325e-05, "loss": 1.0579, "step": 117340 }, { "epoch": 0.7497157021836628, "grad_norm": 0.6692333817481995, "learning_rate": 6.9176117136408e-05, "loss": 0.7448, "step": 117350 }, { "epoch": 0.7497795893334015, "grad_norm": 1.042280912399292, "learning_rate": 6.917148304920267e-05, "loss": 0.8737, "step": 117360 }, { "epoch": 0.7498434764831402, "grad_norm": 1.1402003765106201, "learning_rate": 6.916684876892391e-05, "loss": 0.8044, "step": 117370 }, { "epoch": 0.7499073636328789, "grad_norm": 1.2697054147720337, "learning_rate": 6.916221429561843e-05, "loss": 0.7297, "step": 117380 }, { "epoch": 0.7499712507826176, "grad_norm": 0.7075098156929016, "learning_rate": 6.915757962933284e-05, "loss": 0.696, "step": 117390 }, { "epoch": 0.7500351379323563, "grad_norm": 1.1511503458023071, "learning_rate": 6.915294477011389e-05, "loss": 0.8506, "step": 117400 }, { "epoch": 0.750099025082095, "grad_norm": 0.8749009370803833, "learning_rate": 6.914830971800818e-05, "loss": 1.041, "step": 117410 }, { "epoch": 0.7501629122318337, "grad_norm": 0.7011004686355591, "learning_rate": 6.914367447306244e-05, "loss": 0.8774, "step": 117420 }, { "epoch": 0.7502267993815724, "grad_norm": 1.408872365951538, "learning_rate": 6.913903903532334e-05, "loss": 0.8093, "step": 117430 }, { "epoch": 0.7502906865313111, "grad_norm": 0.79103022813797, "learning_rate": 6.913440340483755e-05, "loss": 0.6807, "step": 117440 }, { "epoch": 0.7503545736810497, "grad_norm": 1.1001317501068115, "learning_rate": 6.912976758165177e-05, "loss": 0.8683, "step": 117450 }, { "epoch": 0.7504184608307884, "grad_norm": 0.7966405749320984, "learning_rate": 6.912513156581267e-05, "loss": 0.8673, "step": 117460 }, { "epoch": 0.7504823479805272, "grad_norm": 0.5426264405250549, "learning_rate": 6.912049535736697e-05, "loss": 0.6471, "step": 117470 }, { "epoch": 0.7505462351302659, "grad_norm": 0.8461551070213318, "learning_rate": 6.911585895636132e-05, "loss": 0.9877, "step": 117480 }, { "epoch": 0.7506101222800046, "grad_norm": 0.7813534736633301, "learning_rate": 6.911122236284244e-05, "loss": 0.8591, "step": 117490 }, { "epoch": 0.7506740094297433, "grad_norm": 2.3332297801971436, "learning_rate": 6.910658557685701e-05, "loss": 1.2769, "step": 117500 }, { "epoch": 0.750737896579482, "grad_norm": 0.9277547597885132, "learning_rate": 6.910194859845174e-05, "loss": 0.7398, "step": 117510 }, { "epoch": 0.7508017837292207, "grad_norm": 0.5283322930335999, "learning_rate": 6.909731142767333e-05, "loss": 0.8047, "step": 117520 }, { "epoch": 0.7508656708789594, "grad_norm": 0.9669355154037476, "learning_rate": 6.909267406456847e-05, "loss": 1.0771, "step": 117530 }, { "epoch": 0.7509295580286981, "grad_norm": 0.8970743417739868, "learning_rate": 6.908803650918385e-05, "loss": 0.9949, "step": 117540 }, { "epoch": 0.7509934451784368, "grad_norm": 0.9849328398704529, "learning_rate": 6.90833987615662e-05, "loss": 0.6818, "step": 117550 }, { "epoch": 0.7510573323281755, "grad_norm": 0.4582323431968689, "learning_rate": 6.907876082176222e-05, "loss": 0.8551, "step": 117560 }, { "epoch": 0.7511212194779142, "grad_norm": 0.8646737933158875, "learning_rate": 6.90741226898186e-05, "loss": 0.8972, "step": 117570 }, { "epoch": 0.7511851066276529, "grad_norm": 1.1300363540649414, "learning_rate": 6.906948436578206e-05, "loss": 0.8249, "step": 117580 }, { "epoch": 0.7512489937773916, "grad_norm": 0.7908068895339966, "learning_rate": 6.906484584969934e-05, "loss": 0.8863, "step": 117590 }, { "epoch": 0.7513128809271303, "grad_norm": 1.099071741104126, "learning_rate": 6.906020714161711e-05, "loss": 0.9949, "step": 117600 }, { "epoch": 0.751376768076869, "grad_norm": 0.874218761920929, "learning_rate": 6.905556824158212e-05, "loss": 0.8182, "step": 117610 }, { "epoch": 0.7514406552266077, "grad_norm": 1.2314951419830322, "learning_rate": 6.905092914964105e-05, "loss": 1.1335, "step": 117620 }, { "epoch": 0.7515045423763465, "grad_norm": 0.8381962180137634, "learning_rate": 6.904628986584066e-05, "loss": 0.8751, "step": 117630 }, { "epoch": 0.7515684295260852, "grad_norm": 1.2576111555099487, "learning_rate": 6.904165039022766e-05, "loss": 0.8728, "step": 117640 }, { "epoch": 0.7516323166758239, "grad_norm": 0.8034125566482544, "learning_rate": 6.903701072284875e-05, "loss": 0.8502, "step": 117650 }, { "epoch": 0.7516962038255626, "grad_norm": 0.6873534917831421, "learning_rate": 6.903237086375068e-05, "loss": 0.8783, "step": 117660 }, { "epoch": 0.7517600909753013, "grad_norm": 0.8254218101501465, "learning_rate": 6.902773081298015e-05, "loss": 0.8267, "step": 117670 }, { "epoch": 0.75182397812504, "grad_norm": 2.0020554065704346, "learning_rate": 6.902309057058393e-05, "loss": 0.8673, "step": 117680 }, { "epoch": 0.7518878652747786, "grad_norm": 1.3021756410598755, "learning_rate": 6.901845013660873e-05, "loss": 0.9579, "step": 117690 }, { "epoch": 0.7519517524245173, "grad_norm": 0.6825690865516663, "learning_rate": 6.901380951110128e-05, "loss": 0.6835, "step": 117700 }, { "epoch": 0.752015639574256, "grad_norm": 1.2867560386657715, "learning_rate": 6.900916869410831e-05, "loss": 0.9329, "step": 117710 }, { "epoch": 0.7520795267239947, "grad_norm": 0.7000182271003723, "learning_rate": 6.900452768567657e-05, "loss": 0.7614, "step": 117720 }, { "epoch": 0.7521434138737334, "grad_norm": 0.7229273319244385, "learning_rate": 6.89998864858528e-05, "loss": 1.082, "step": 117730 }, { "epoch": 0.7522073010234721, "grad_norm": 0.7700644135475159, "learning_rate": 6.899524509468375e-05, "loss": 1.1019, "step": 117740 }, { "epoch": 0.7522711881732108, "grad_norm": 1.3923498392105103, "learning_rate": 6.899060351221613e-05, "loss": 0.8848, "step": 117750 }, { "epoch": 0.7523350753229495, "grad_norm": 0.6365454792976379, "learning_rate": 6.898596173849672e-05, "loss": 1.1148, "step": 117760 }, { "epoch": 0.7523989624726882, "grad_norm": 1.0675365924835205, "learning_rate": 6.898131977357223e-05, "loss": 0.9215, "step": 117770 }, { "epoch": 0.7524628496224269, "grad_norm": 1.0569766759872437, "learning_rate": 6.897667761748943e-05, "loss": 0.7153, "step": 117780 }, { "epoch": 0.7525267367721656, "grad_norm": 1.0126324892044067, "learning_rate": 6.897203527029508e-05, "loss": 0.925, "step": 117790 }, { "epoch": 0.7525906239219043, "grad_norm": 0.9633859992027283, "learning_rate": 6.896739273203592e-05, "loss": 1.0008, "step": 117800 }, { "epoch": 0.752654511071643, "grad_norm": 1.014870047569275, "learning_rate": 6.896275000275872e-05, "loss": 0.8294, "step": 117810 }, { "epoch": 0.7527183982213818, "grad_norm": 1.1315689086914062, "learning_rate": 6.895810708251019e-05, "loss": 0.8227, "step": 117820 }, { "epoch": 0.7527822853711205, "grad_norm": 2.6392271518707275, "learning_rate": 6.895346397133714e-05, "loss": 0.8076, "step": 117830 }, { "epoch": 0.7528461725208592, "grad_norm": 0.7529024481773376, "learning_rate": 6.89488206692863e-05, "loss": 0.8401, "step": 117840 }, { "epoch": 0.7529100596705979, "grad_norm": 0.9664776921272278, "learning_rate": 6.894417717640447e-05, "loss": 1.1103, "step": 117850 }, { "epoch": 0.7529739468203366, "grad_norm": 0.732601523399353, "learning_rate": 6.893953349273836e-05, "loss": 0.7659, "step": 117860 }, { "epoch": 0.7530378339700753, "grad_norm": 0.8885082006454468, "learning_rate": 6.893488961833477e-05, "loss": 0.7957, "step": 117870 }, { "epoch": 0.753101721119814, "grad_norm": 1.1969749927520752, "learning_rate": 6.893024555324045e-05, "loss": 1.0971, "step": 117880 }, { "epoch": 0.7531656082695527, "grad_norm": 1.1240543127059937, "learning_rate": 6.892560129750221e-05, "loss": 0.7384, "step": 117890 }, { "epoch": 0.7532294954192914, "grad_norm": 1.059037208557129, "learning_rate": 6.892095685116677e-05, "loss": 0.7629, "step": 117900 }, { "epoch": 0.7532933825690301, "grad_norm": 0.8897120356559753, "learning_rate": 6.891631221428092e-05, "loss": 1.11, "step": 117910 }, { "epoch": 0.7533572697187688, "grad_norm": 1.0134267807006836, "learning_rate": 6.891166738689146e-05, "loss": 0.8046, "step": 117920 }, { "epoch": 0.7534211568685074, "grad_norm": 1.6655761003494263, "learning_rate": 6.890702236904514e-05, "loss": 0.9587, "step": 117930 }, { "epoch": 0.7534850440182461, "grad_norm": 1.1611841917037964, "learning_rate": 6.890237716078874e-05, "loss": 1.0593, "step": 117940 }, { "epoch": 0.7535489311679848, "grad_norm": 0.8009079098701477, "learning_rate": 6.889773176216905e-05, "loss": 0.901, "step": 117950 }, { "epoch": 0.7536128183177235, "grad_norm": 1.4743709564208984, "learning_rate": 6.889308617323286e-05, "loss": 1.0526, "step": 117960 }, { "epoch": 0.7536767054674622, "grad_norm": 0.7194849252700806, "learning_rate": 6.888844039402695e-05, "loss": 0.7397, "step": 117970 }, { "epoch": 0.7537405926172009, "grad_norm": 1.0453287363052368, "learning_rate": 6.88837944245981e-05, "loss": 1.104, "step": 117980 }, { "epoch": 0.7538044797669397, "grad_norm": 0.8043333292007446, "learning_rate": 6.88791482649931e-05, "loss": 0.798, "step": 117990 }, { "epoch": 0.7538683669166784, "grad_norm": 0.8196787238121033, "learning_rate": 6.887450191525875e-05, "loss": 0.8353, "step": 118000 }, { "epoch": 0.7539322540664171, "grad_norm": 0.6180843114852905, "learning_rate": 6.886985537544183e-05, "loss": 1.0143, "step": 118010 }, { "epoch": 0.7539961412161558, "grad_norm": 1.4987810850143433, "learning_rate": 6.886520864558914e-05, "loss": 0.7404, "step": 118020 }, { "epoch": 0.7540600283658945, "grad_norm": 0.7595736384391785, "learning_rate": 6.886056172574747e-05, "loss": 1.0103, "step": 118030 }, { "epoch": 0.7541239155156332, "grad_norm": 0.7522437572479248, "learning_rate": 6.885591461596364e-05, "loss": 1.1119, "step": 118040 }, { "epoch": 0.7541878026653719, "grad_norm": 0.7806057333946228, "learning_rate": 6.885126731628445e-05, "loss": 0.921, "step": 118050 }, { "epoch": 0.7542516898151106, "grad_norm": 0.722546398639679, "learning_rate": 6.884661982675666e-05, "loss": 0.9464, "step": 118060 }, { "epoch": 0.7543155769648493, "grad_norm": 1.262364149093628, "learning_rate": 6.884197214742713e-05, "loss": 0.9972, "step": 118070 }, { "epoch": 0.754379464114588, "grad_norm": 0.8939563035964966, "learning_rate": 6.883732427834263e-05, "loss": 1.019, "step": 118080 }, { "epoch": 0.7544433512643267, "grad_norm": 1.2244893312454224, "learning_rate": 6.883267621954998e-05, "loss": 0.965, "step": 118090 }, { "epoch": 0.7545072384140654, "grad_norm": 0.7305797338485718, "learning_rate": 6.882802797109599e-05, "loss": 0.976, "step": 118100 }, { "epoch": 0.7545711255638041, "grad_norm": 0.7178799510002136, "learning_rate": 6.882337953302747e-05, "loss": 0.781, "step": 118110 }, { "epoch": 0.7546350127135428, "grad_norm": 1.3888294696807861, "learning_rate": 6.881873090539121e-05, "loss": 1.1481, "step": 118120 }, { "epoch": 0.7546988998632815, "grad_norm": 1.024906873703003, "learning_rate": 6.881408208823409e-05, "loss": 0.8872, "step": 118130 }, { "epoch": 0.7547627870130202, "grad_norm": 1.0983256101608276, "learning_rate": 6.880943308160287e-05, "loss": 0.7869, "step": 118140 }, { "epoch": 0.754826674162759, "grad_norm": 0.6162832975387573, "learning_rate": 6.880478388554438e-05, "loss": 0.5942, "step": 118150 }, { "epoch": 0.7548905613124977, "grad_norm": 0.8574840426445007, "learning_rate": 6.880013450010545e-05, "loss": 0.8025, "step": 118160 }, { "epoch": 0.7549544484622364, "grad_norm": 0.7543234825134277, "learning_rate": 6.87954849253329e-05, "loss": 0.6696, "step": 118170 }, { "epoch": 0.755018335611975, "grad_norm": 0.6971087455749512, "learning_rate": 6.879083516127356e-05, "loss": 1.0234, "step": 118180 }, { "epoch": 0.7550822227617137, "grad_norm": 1.7899150848388672, "learning_rate": 6.878618520797424e-05, "loss": 0.791, "step": 118190 }, { "epoch": 0.7551461099114524, "grad_norm": 0.8531982898712158, "learning_rate": 6.87815350654818e-05, "loss": 0.9281, "step": 118200 }, { "epoch": 0.7552099970611911, "grad_norm": 1.196276068687439, "learning_rate": 6.877688473384304e-05, "loss": 0.8512, "step": 118210 }, { "epoch": 0.7552738842109298, "grad_norm": 0.9306949377059937, "learning_rate": 6.877223421310481e-05, "loss": 1.0111, "step": 118220 }, { "epoch": 0.7553377713606685, "grad_norm": 0.6089597940444946, "learning_rate": 6.876758350331395e-05, "loss": 1.1216, "step": 118230 }, { "epoch": 0.7554016585104072, "grad_norm": 0.9803066253662109, "learning_rate": 6.876293260451728e-05, "loss": 0.8555, "step": 118240 }, { "epoch": 0.7554655456601459, "grad_norm": 1.0859441757202148, "learning_rate": 6.875828151676165e-05, "loss": 0.9888, "step": 118250 }, { "epoch": 0.7555294328098846, "grad_norm": 2.5297441482543945, "learning_rate": 6.875363024009389e-05, "loss": 1.0641, "step": 118260 }, { "epoch": 0.7555933199596233, "grad_norm": 0.7911348938941956, "learning_rate": 6.874897877456086e-05, "loss": 0.9526, "step": 118270 }, { "epoch": 0.755657207109362, "grad_norm": 0.6605247855186462, "learning_rate": 6.874432712020938e-05, "loss": 0.7285, "step": 118280 }, { "epoch": 0.7557210942591007, "grad_norm": 1.0286058187484741, "learning_rate": 6.873967527708633e-05, "loss": 0.8865, "step": 118290 }, { "epoch": 0.7557849814088394, "grad_norm": 0.8798809051513672, "learning_rate": 6.873502324523852e-05, "loss": 0.8385, "step": 118300 }, { "epoch": 0.7558488685585781, "grad_norm": 0.9660366177558899, "learning_rate": 6.873037102471283e-05, "loss": 0.8482, "step": 118310 }, { "epoch": 0.7559127557083168, "grad_norm": 0.5325950980186462, "learning_rate": 6.872571861555609e-05, "loss": 0.903, "step": 118320 }, { "epoch": 0.7559766428580555, "grad_norm": 0.715363085269928, "learning_rate": 6.872106601781518e-05, "loss": 1.1637, "step": 118330 }, { "epoch": 0.7560405300077943, "grad_norm": 0.5783165097236633, "learning_rate": 6.871641323153692e-05, "loss": 1.0579, "step": 118340 }, { "epoch": 0.756104417157533, "grad_norm": 0.7666789889335632, "learning_rate": 6.871176025676818e-05, "loss": 0.673, "step": 118350 }, { "epoch": 0.7561683043072717, "grad_norm": 0.9602919816970825, "learning_rate": 6.870710709355584e-05, "loss": 0.795, "step": 118360 }, { "epoch": 0.7562321914570104, "grad_norm": 0.9613474011421204, "learning_rate": 6.870245374194675e-05, "loss": 0.8152, "step": 118370 }, { "epoch": 0.7562960786067491, "grad_norm": 0.9112039804458618, "learning_rate": 6.869780020198777e-05, "loss": 0.797, "step": 118380 }, { "epoch": 0.7563599657564878, "grad_norm": 1.0254278182983398, "learning_rate": 6.869314647372577e-05, "loss": 1.0978, "step": 118390 }, { "epoch": 0.7564238529062265, "grad_norm": 1.1823453903198242, "learning_rate": 6.86884925572076e-05, "loss": 1.1565, "step": 118400 }, { "epoch": 0.7564877400559652, "grad_norm": 0.7849447727203369, "learning_rate": 6.868383845248015e-05, "loss": 0.949, "step": 118410 }, { "epoch": 0.7565516272057038, "grad_norm": 1.0870212316513062, "learning_rate": 6.867918415959028e-05, "loss": 1.1851, "step": 118420 }, { "epoch": 0.7566155143554425, "grad_norm": 1.099289894104004, "learning_rate": 6.867452967858487e-05, "loss": 0.6001, "step": 118430 }, { "epoch": 0.7566794015051812, "grad_norm": 0.7557351589202881, "learning_rate": 6.866987500951079e-05, "loss": 1.1415, "step": 118440 }, { "epoch": 0.7567432886549199, "grad_norm": 1.02070152759552, "learning_rate": 6.866522015241493e-05, "loss": 0.8612, "step": 118450 }, { "epoch": 0.7568071758046586, "grad_norm": 1.3030376434326172, "learning_rate": 6.866056510734414e-05, "loss": 1.0833, "step": 118460 }, { "epoch": 0.7568710629543973, "grad_norm": 1.1571980714797974, "learning_rate": 6.86559098743453e-05, "loss": 1.0281, "step": 118470 }, { "epoch": 0.756934950104136, "grad_norm": 0.5520379543304443, "learning_rate": 6.865125445346533e-05, "loss": 0.7683, "step": 118480 }, { "epoch": 0.7569988372538747, "grad_norm": 1.1610947847366333, "learning_rate": 6.864659884475108e-05, "loss": 0.8432, "step": 118490 }, { "epoch": 0.7570627244036134, "grad_norm": 0.5098273754119873, "learning_rate": 6.864194304824946e-05, "loss": 0.9143, "step": 118500 }, { "epoch": 0.7571266115533521, "grad_norm": 0.9760499596595764, "learning_rate": 6.863728706400734e-05, "loss": 0.8953, "step": 118510 }, { "epoch": 0.7571904987030909, "grad_norm": 0.7442782521247864, "learning_rate": 6.863263089207162e-05, "loss": 1.1744, "step": 118520 }, { "epoch": 0.7572543858528296, "grad_norm": 1.004907488822937, "learning_rate": 6.862797453248918e-05, "loss": 0.8577, "step": 118530 }, { "epoch": 0.7573182730025683, "grad_norm": 1.782226800918579, "learning_rate": 6.862331798530692e-05, "loss": 0.8857, "step": 118540 }, { "epoch": 0.757382160152307, "grad_norm": 0.9573549032211304, "learning_rate": 6.861866125057175e-05, "loss": 0.8601, "step": 118550 }, { "epoch": 0.7574460473020457, "grad_norm": 1.518573522567749, "learning_rate": 6.861400432833053e-05, "loss": 1.1323, "step": 118560 }, { "epoch": 0.7575099344517844, "grad_norm": 0.9608773589134216, "learning_rate": 6.86093472186302e-05, "loss": 0.9014, "step": 118570 }, { "epoch": 0.7575738216015231, "grad_norm": 1.326219081878662, "learning_rate": 6.860468992151764e-05, "loss": 0.8637, "step": 118580 }, { "epoch": 0.7576377087512618, "grad_norm": 0.6975284218788147, "learning_rate": 6.860003243703976e-05, "loss": 0.8435, "step": 118590 }, { "epoch": 0.7577015959010005, "grad_norm": 0.8688485026359558, "learning_rate": 6.859537476524346e-05, "loss": 0.7777, "step": 118600 }, { "epoch": 0.7577654830507392, "grad_norm": 1.3222057819366455, "learning_rate": 6.859071690617565e-05, "loss": 0.8211, "step": 118610 }, { "epoch": 0.7578293702004779, "grad_norm": 0.7221174836158752, "learning_rate": 6.858605885988325e-05, "loss": 0.9684, "step": 118620 }, { "epoch": 0.7578932573502166, "grad_norm": 1.1914112567901611, "learning_rate": 6.858140062641313e-05, "loss": 0.7354, "step": 118630 }, { "epoch": 0.7579571444999553, "grad_norm": 0.7366169095039368, "learning_rate": 6.857674220581225e-05, "loss": 0.8568, "step": 118640 }, { "epoch": 0.758021031649694, "grad_norm": 0.91144859790802, "learning_rate": 6.85720835981275e-05, "loss": 0.8024, "step": 118650 }, { "epoch": 0.7580849187994326, "grad_norm": 1.1600289344787598, "learning_rate": 6.856742480340581e-05, "loss": 0.7997, "step": 118660 }, { "epoch": 0.7581488059491713, "grad_norm": 1.0003736019134521, "learning_rate": 6.856276582169408e-05, "loss": 0.8877, "step": 118670 }, { "epoch": 0.75821269309891, "grad_norm": 1.289528489112854, "learning_rate": 6.855810665303923e-05, "loss": 0.6617, "step": 118680 }, { "epoch": 0.7582765802486487, "grad_norm": 0.9788122773170471, "learning_rate": 6.85534472974882e-05, "loss": 0.7778, "step": 118690 }, { "epoch": 0.7583404673983875, "grad_norm": 0.6570330858230591, "learning_rate": 6.854878775508792e-05, "loss": 0.9725, "step": 118700 }, { "epoch": 0.7584043545481262, "grad_norm": 0.911495566368103, "learning_rate": 6.854412802588528e-05, "loss": 0.83, "step": 118710 }, { "epoch": 0.7584682416978649, "grad_norm": 0.729153037071228, "learning_rate": 6.853946810992722e-05, "loss": 1.1369, "step": 118720 }, { "epoch": 0.7585321288476036, "grad_norm": 1.3419857025146484, "learning_rate": 6.853480800726069e-05, "loss": 0.7661, "step": 118730 }, { "epoch": 0.7585960159973423, "grad_norm": 0.9699906706809998, "learning_rate": 6.853014771793261e-05, "loss": 0.9556, "step": 118740 }, { "epoch": 0.758659903147081, "grad_norm": 0.9490513205528259, "learning_rate": 6.852548724198992e-05, "loss": 0.8333, "step": 118750 }, { "epoch": 0.7587237902968197, "grad_norm": 0.9197043776512146, "learning_rate": 6.852082657947953e-05, "loss": 0.888, "step": 118760 }, { "epoch": 0.7587876774465584, "grad_norm": 0.6110111474990845, "learning_rate": 6.85161657304484e-05, "loss": 0.8897, "step": 118770 }, { "epoch": 0.7588515645962971, "grad_norm": 0.7827733159065247, "learning_rate": 6.851150469494347e-05, "loss": 0.6686, "step": 118780 }, { "epoch": 0.7589154517460358, "grad_norm": 0.9810250997543335, "learning_rate": 6.850684347301166e-05, "loss": 0.9115, "step": 118790 }, { "epoch": 0.7589793388957745, "grad_norm": 0.816536545753479, "learning_rate": 6.850218206469993e-05, "loss": 1.0305, "step": 118800 }, { "epoch": 0.7590432260455132, "grad_norm": 0.9732635617256165, "learning_rate": 6.849752047005522e-05, "loss": 0.8531, "step": 118810 }, { "epoch": 0.7591071131952519, "grad_norm": 1.23931086063385, "learning_rate": 6.849285868912448e-05, "loss": 0.9552, "step": 118820 }, { "epoch": 0.7591710003449906, "grad_norm": 1.2935327291488647, "learning_rate": 6.848819672195466e-05, "loss": 0.8719, "step": 118830 }, { "epoch": 0.7592348874947293, "grad_norm": 1.2548699378967285, "learning_rate": 6.84835345685927e-05, "loss": 0.9801, "step": 118840 }, { "epoch": 0.759298774644468, "grad_norm": 1.043545126914978, "learning_rate": 6.847887222908555e-05, "loss": 1.1205, "step": 118850 }, { "epoch": 0.7593626617942068, "grad_norm": 0.5109646320343018, "learning_rate": 6.847420970348018e-05, "loss": 0.8246, "step": 118860 }, { "epoch": 0.7594265489439455, "grad_norm": 0.5622779130935669, "learning_rate": 6.846954699182352e-05, "loss": 0.7426, "step": 118870 }, { "epoch": 0.7594904360936842, "grad_norm": 0.672226071357727, "learning_rate": 6.846488409416256e-05, "loss": 0.7645, "step": 118880 }, { "epoch": 0.7595543232434229, "grad_norm": 0.6986059546470642, "learning_rate": 6.846022101054422e-05, "loss": 1.0862, "step": 118890 }, { "epoch": 0.7596182103931616, "grad_norm": 1.0945719480514526, "learning_rate": 6.84555577410155e-05, "loss": 1.0323, "step": 118900 }, { "epoch": 0.7596820975429002, "grad_norm": 0.9224714040756226, "learning_rate": 6.845089428562336e-05, "loss": 0.8719, "step": 118910 }, { "epoch": 0.7597459846926389, "grad_norm": 0.9247092008590698, "learning_rate": 6.844623064441473e-05, "loss": 0.8067, "step": 118920 }, { "epoch": 0.7598098718423776, "grad_norm": 0.732523500919342, "learning_rate": 6.84415668174366e-05, "loss": 1.2519, "step": 118930 }, { "epoch": 0.7598737589921163, "grad_norm": 0.9560425281524658, "learning_rate": 6.843690280473596e-05, "loss": 0.903, "step": 118940 }, { "epoch": 0.759937646141855, "grad_norm": 0.913837194442749, "learning_rate": 6.843223860635974e-05, "loss": 0.9495, "step": 118950 }, { "epoch": 0.7600015332915937, "grad_norm": 1.002140998840332, "learning_rate": 6.842757422235494e-05, "loss": 0.8853, "step": 118960 }, { "epoch": 0.7600654204413324, "grad_norm": 0.9161799550056458, "learning_rate": 6.842290965276852e-05, "loss": 0.8618, "step": 118970 }, { "epoch": 0.7601293075910711, "grad_norm": 0.6475778222084045, "learning_rate": 6.841824489764746e-05, "loss": 0.809, "step": 118980 }, { "epoch": 0.7601931947408098, "grad_norm": 1.090684413909912, "learning_rate": 6.841357995703874e-05, "loss": 1.0392, "step": 118990 }, { "epoch": 0.7602570818905485, "grad_norm": 0.6572669148445129, "learning_rate": 6.840891483098935e-05, "loss": 1.0299, "step": 119000 }, { "epoch": 0.7603209690402872, "grad_norm": 0.9290599226951599, "learning_rate": 6.840424951954625e-05, "loss": 0.9194, "step": 119010 }, { "epoch": 0.7603848561900259, "grad_norm": 0.7174366116523743, "learning_rate": 6.839958402275643e-05, "loss": 0.8349, "step": 119020 }, { "epoch": 0.7604487433397646, "grad_norm": 0.9106315970420837, "learning_rate": 6.839491834066691e-05, "loss": 0.8991, "step": 119030 }, { "epoch": 0.7605126304895033, "grad_norm": 1.6887893676757812, "learning_rate": 6.839025247332462e-05, "loss": 0.8116, "step": 119040 }, { "epoch": 0.7605765176392421, "grad_norm": 0.7932513356208801, "learning_rate": 6.838558642077658e-05, "loss": 0.952, "step": 119050 }, { "epoch": 0.7606404047889808, "grad_norm": 0.733961284160614, "learning_rate": 6.838092018306979e-05, "loss": 0.9746, "step": 119060 }, { "epoch": 0.7607042919387195, "grad_norm": 0.9534251093864441, "learning_rate": 6.837625376025123e-05, "loss": 0.9129, "step": 119070 }, { "epoch": 0.7607681790884582, "grad_norm": 0.867732048034668, "learning_rate": 6.837158715236789e-05, "loss": 1.0368, "step": 119080 }, { "epoch": 0.7608320662381969, "grad_norm": 0.9323291778564453, "learning_rate": 6.836692035946677e-05, "loss": 0.7597, "step": 119090 }, { "epoch": 0.7608959533879356, "grad_norm": 1.1437997817993164, "learning_rate": 6.83622533815949e-05, "loss": 0.9955, "step": 119100 }, { "epoch": 0.7609598405376743, "grad_norm": 0.6462964415550232, "learning_rate": 6.835758621879922e-05, "loss": 1.2323, "step": 119110 }, { "epoch": 0.761023727687413, "grad_norm": 0.7670947313308716, "learning_rate": 6.835291887112678e-05, "loss": 0.762, "step": 119120 }, { "epoch": 0.7610876148371517, "grad_norm": 1.1615947484970093, "learning_rate": 6.834825133862457e-05, "loss": 0.8781, "step": 119130 }, { "epoch": 0.7611515019868904, "grad_norm": 0.6855329871177673, "learning_rate": 6.834358362133959e-05, "loss": 0.9, "step": 119140 }, { "epoch": 0.761215389136629, "grad_norm": 1.5815876722335815, "learning_rate": 6.833891571931886e-05, "loss": 0.6895, "step": 119150 }, { "epoch": 0.7612792762863677, "grad_norm": 0.804578423500061, "learning_rate": 6.833424763260938e-05, "loss": 0.8916, "step": 119160 }, { "epoch": 0.7613431634361064, "grad_norm": 0.6342503428459167, "learning_rate": 6.832957936125816e-05, "loss": 1.0142, "step": 119170 }, { "epoch": 0.7614070505858451, "grad_norm": 1.194042682647705, "learning_rate": 6.832491090531223e-05, "loss": 0.7734, "step": 119180 }, { "epoch": 0.7614709377355838, "grad_norm": 0.8138452172279358, "learning_rate": 6.83202422648186e-05, "loss": 0.833, "step": 119190 }, { "epoch": 0.7615348248853225, "grad_norm": 0.6419638395309448, "learning_rate": 6.831557343982427e-05, "loss": 0.8826, "step": 119200 }, { "epoch": 0.7615987120350612, "grad_norm": 0.9119747281074524, "learning_rate": 6.831090443037626e-05, "loss": 0.8635, "step": 119210 }, { "epoch": 0.7616625991848, "grad_norm": 1.2391308546066284, "learning_rate": 6.83062352365216e-05, "loss": 1.0166, "step": 119220 }, { "epoch": 0.7617264863345387, "grad_norm": 1.1494985818862915, "learning_rate": 6.830156585830734e-05, "loss": 1.0373, "step": 119230 }, { "epoch": 0.7617903734842774, "grad_norm": 0.8222819566726685, "learning_rate": 6.829689629578046e-05, "loss": 0.7228, "step": 119240 }, { "epoch": 0.7618542606340161, "grad_norm": 0.60460364818573, "learning_rate": 6.829222654898799e-05, "loss": 0.8322, "step": 119250 }, { "epoch": 0.7619181477837548, "grad_norm": 1.7040772438049316, "learning_rate": 6.828755661797699e-05, "loss": 1.1171, "step": 119260 }, { "epoch": 0.7619820349334935, "grad_norm": 0.7591485977172852, "learning_rate": 6.828288650279448e-05, "loss": 0.8535, "step": 119270 }, { "epoch": 0.7620459220832322, "grad_norm": 1.0769449472427368, "learning_rate": 6.827821620348749e-05, "loss": 1.0974, "step": 119280 }, { "epoch": 0.7621098092329709, "grad_norm": 0.7819190621376038, "learning_rate": 6.827354572010303e-05, "loss": 0.9247, "step": 119290 }, { "epoch": 0.7621736963827096, "grad_norm": 0.7512619495391846, "learning_rate": 6.826887505268818e-05, "loss": 1.4029, "step": 119300 }, { "epoch": 0.7622375835324483, "grad_norm": 0.7051581740379333, "learning_rate": 6.826420420128993e-05, "loss": 0.7934, "step": 119310 }, { "epoch": 0.762301470682187, "grad_norm": 2.504819393157959, "learning_rate": 6.825953316595535e-05, "loss": 0.9571, "step": 119320 }, { "epoch": 0.7623653578319257, "grad_norm": 1.1130508184432983, "learning_rate": 6.825486194673148e-05, "loss": 0.8393, "step": 119330 }, { "epoch": 0.7624292449816644, "grad_norm": 0.8944107890129089, "learning_rate": 6.825019054366536e-05, "loss": 0.7698, "step": 119340 }, { "epoch": 0.7624931321314031, "grad_norm": 1.020553469657898, "learning_rate": 6.824551895680404e-05, "loss": 0.8631, "step": 119350 }, { "epoch": 0.7625570192811418, "grad_norm": 0.9540114402770996, "learning_rate": 6.824084718619454e-05, "loss": 0.8874, "step": 119360 }, { "epoch": 0.7626209064308805, "grad_norm": 0.6075379252433777, "learning_rate": 6.823617523188394e-05, "loss": 0.8942, "step": 119370 }, { "epoch": 0.7626847935806192, "grad_norm": 1.7551565170288086, "learning_rate": 6.823150309391928e-05, "loss": 1.041, "step": 119380 }, { "epoch": 0.7627486807303578, "grad_norm": 0.7924169898033142, "learning_rate": 6.82268307723476e-05, "loss": 0.8415, "step": 119390 }, { "epoch": 0.7628125678800965, "grad_norm": 0.9569699168205261, "learning_rate": 6.822215826721597e-05, "loss": 0.8566, "step": 119400 }, { "epoch": 0.7628764550298353, "grad_norm": 0.8898611068725586, "learning_rate": 6.821748557857144e-05, "loss": 0.6289, "step": 119410 }, { "epoch": 0.762940342179574, "grad_norm": 1.3441417217254639, "learning_rate": 6.821281270646106e-05, "loss": 0.7926, "step": 119420 }, { "epoch": 0.7630042293293127, "grad_norm": 0.8085065484046936, "learning_rate": 6.820813965093193e-05, "loss": 1.0383, "step": 119430 }, { "epoch": 0.7630681164790514, "grad_norm": 1.1543138027191162, "learning_rate": 6.820346641203106e-05, "loss": 0.769, "step": 119440 }, { "epoch": 0.7631320036287901, "grad_norm": 1.0326635837554932, "learning_rate": 6.819879298980553e-05, "loss": 0.6679, "step": 119450 }, { "epoch": 0.7631958907785288, "grad_norm": 0.7548609972000122, "learning_rate": 6.819411938430243e-05, "loss": 0.8651, "step": 119460 }, { "epoch": 0.7632597779282675, "grad_norm": 0.6115458607673645, "learning_rate": 6.818944559556879e-05, "loss": 0.9125, "step": 119470 }, { "epoch": 0.7633236650780062, "grad_norm": 0.8484747409820557, "learning_rate": 6.818477162365172e-05, "loss": 0.94, "step": 119480 }, { "epoch": 0.7633875522277449, "grad_norm": 0.9800739288330078, "learning_rate": 6.818009746859823e-05, "loss": 0.6768, "step": 119490 }, { "epoch": 0.7634514393774836, "grad_norm": 1.5265213251113892, "learning_rate": 6.817542313045547e-05, "loss": 1.0567, "step": 119500 }, { "epoch": 0.7635153265272223, "grad_norm": 1.5931601524353027, "learning_rate": 6.817074860927045e-05, "loss": 1.4033, "step": 119510 }, { "epoch": 0.763579213676961, "grad_norm": 0.976694643497467, "learning_rate": 6.816607390509028e-05, "loss": 0.8279, "step": 119520 }, { "epoch": 0.7636431008266997, "grad_norm": 0.9799617528915405, "learning_rate": 6.816139901796202e-05, "loss": 1.0672, "step": 119530 }, { "epoch": 0.7637069879764384, "grad_norm": 1.1323072910308838, "learning_rate": 6.815672394793277e-05, "loss": 0.9828, "step": 119540 }, { "epoch": 0.7637708751261771, "grad_norm": 1.2014492750167847, "learning_rate": 6.815204869504961e-05, "loss": 1.0682, "step": 119550 }, { "epoch": 0.7638347622759158, "grad_norm": 3.5408363342285156, "learning_rate": 6.81473732593596e-05, "loss": 1.0801, "step": 119560 }, { "epoch": 0.7638986494256546, "grad_norm": 0.9492976069450378, "learning_rate": 6.814269764090986e-05, "loss": 0.9406, "step": 119570 }, { "epoch": 0.7639625365753933, "grad_norm": 0.7474743723869324, "learning_rate": 6.813802183974745e-05, "loss": 0.8298, "step": 119580 }, { "epoch": 0.764026423725132, "grad_norm": 1.4195810556411743, "learning_rate": 6.813334585591946e-05, "loss": 1.1686, "step": 119590 }, { "epoch": 0.7640903108748707, "grad_norm": 1.6396797895431519, "learning_rate": 6.8128669689473e-05, "loss": 0.7154, "step": 119600 }, { "epoch": 0.7641541980246094, "grad_norm": 1.0308012962341309, "learning_rate": 6.812399334045514e-05, "loss": 0.6851, "step": 119610 }, { "epoch": 0.7642180851743481, "grad_norm": 0.7701680064201355, "learning_rate": 6.8119316808913e-05, "loss": 0.987, "step": 119620 }, { "epoch": 0.7642819723240867, "grad_norm": 0.8354985117912292, "learning_rate": 6.811464009489365e-05, "loss": 0.8276, "step": 119630 }, { "epoch": 0.7643458594738254, "grad_norm": 1.0913121700286865, "learning_rate": 6.810996319844422e-05, "loss": 0.8687, "step": 119640 }, { "epoch": 0.7644097466235641, "grad_norm": 0.845710039138794, "learning_rate": 6.81052861196118e-05, "loss": 0.9144, "step": 119650 }, { "epoch": 0.7644736337733028, "grad_norm": 0.9504249095916748, "learning_rate": 6.810060885844346e-05, "loss": 0.9089, "step": 119660 }, { "epoch": 0.7645375209230415, "grad_norm": 0.846555769443512, "learning_rate": 6.809593141498633e-05, "loss": 0.9722, "step": 119670 }, { "epoch": 0.7646014080727802, "grad_norm": 1.736290693283081, "learning_rate": 6.809125378928754e-05, "loss": 0.8593, "step": 119680 }, { "epoch": 0.7646652952225189, "grad_norm": 0.732244610786438, "learning_rate": 6.808657598139416e-05, "loss": 0.8176, "step": 119690 }, { "epoch": 0.7647291823722576, "grad_norm": 0.5221996307373047, "learning_rate": 6.80818979913533e-05, "loss": 0.7639, "step": 119700 }, { "epoch": 0.7647930695219963, "grad_norm": 0.8514750003814697, "learning_rate": 6.80772198192121e-05, "loss": 0.9472, "step": 119710 }, { "epoch": 0.764856956671735, "grad_norm": 0.9706042408943176, "learning_rate": 6.807254146501766e-05, "loss": 0.8994, "step": 119720 }, { "epoch": 0.7649208438214737, "grad_norm": 0.792775571346283, "learning_rate": 6.806786292881708e-05, "loss": 0.9944, "step": 119730 }, { "epoch": 0.7649847309712124, "grad_norm": 0.786178469657898, "learning_rate": 6.80631842106575e-05, "loss": 0.8212, "step": 119740 }, { "epoch": 0.7650486181209512, "grad_norm": 0.7634421586990356, "learning_rate": 6.805850531058604e-05, "loss": 0.896, "step": 119750 }, { "epoch": 0.7651125052706899, "grad_norm": 1.2600396871566772, "learning_rate": 6.805382622864978e-05, "loss": 0.8976, "step": 119760 }, { "epoch": 0.7651763924204286, "grad_norm": 0.9852913618087769, "learning_rate": 6.804914696489587e-05, "loss": 0.807, "step": 119770 }, { "epoch": 0.7652402795701673, "grad_norm": 0.7352543473243713, "learning_rate": 6.804446751937146e-05, "loss": 0.9483, "step": 119780 }, { "epoch": 0.765304166719906, "grad_norm": 0.6477217674255371, "learning_rate": 6.803978789212363e-05, "loss": 0.7509, "step": 119790 }, { "epoch": 0.7653680538696447, "grad_norm": 0.7691764831542969, "learning_rate": 6.803510808319954e-05, "loss": 0.9045, "step": 119800 }, { "epoch": 0.7654319410193834, "grad_norm": 1.1947227716445923, "learning_rate": 6.803042809264632e-05, "loss": 1.0757, "step": 119810 }, { "epoch": 0.7654958281691221, "grad_norm": 0.9047258496284485, "learning_rate": 6.802574792051107e-05, "loss": 0.8635, "step": 119820 }, { "epoch": 0.7655597153188608, "grad_norm": 0.8005874752998352, "learning_rate": 6.802106756684096e-05, "loss": 0.7446, "step": 119830 }, { "epoch": 0.7656236024685995, "grad_norm": 0.7462660670280457, "learning_rate": 6.80163870316831e-05, "loss": 0.6892, "step": 119840 }, { "epoch": 0.7656874896183382, "grad_norm": 0.7342929244041443, "learning_rate": 6.801170631508465e-05, "loss": 0.8575, "step": 119850 }, { "epoch": 0.7657513767680769, "grad_norm": 0.6299241781234741, "learning_rate": 6.800702541709272e-05, "loss": 1.0322, "step": 119860 }, { "epoch": 0.7658152639178156, "grad_norm": 2.7845346927642822, "learning_rate": 6.800234433775448e-05, "loss": 0.9482, "step": 119870 }, { "epoch": 0.7658791510675542, "grad_norm": 0.7888658046722412, "learning_rate": 6.799766307711704e-05, "loss": 0.9034, "step": 119880 }, { "epoch": 0.7659430382172929, "grad_norm": 1.2494713068008423, "learning_rate": 6.799298163522757e-05, "loss": 0.7792, "step": 119890 }, { "epoch": 0.7660069253670316, "grad_norm": 0.8245709538459778, "learning_rate": 6.79883000121332e-05, "loss": 0.7697, "step": 119900 }, { "epoch": 0.7660708125167703, "grad_norm": 0.6942414045333862, "learning_rate": 6.79836182078811e-05, "loss": 0.9865, "step": 119910 }, { "epoch": 0.766134699666509, "grad_norm": 0.8170196413993835, "learning_rate": 6.797893622251841e-05, "loss": 0.8344, "step": 119920 }, { "epoch": 0.7661985868162478, "grad_norm": 1.3100725412368774, "learning_rate": 6.797425405609225e-05, "loss": 0.8997, "step": 119930 }, { "epoch": 0.7662624739659865, "grad_norm": 0.8601463437080383, "learning_rate": 6.796957170864984e-05, "loss": 0.7987, "step": 119940 }, { "epoch": 0.7663263611157252, "grad_norm": 0.9720836877822876, "learning_rate": 6.796488918023827e-05, "loss": 0.725, "step": 119950 }, { "epoch": 0.7663902482654639, "grad_norm": 0.8214823007583618, "learning_rate": 6.796020647090472e-05, "loss": 0.6698, "step": 119960 }, { "epoch": 0.7664541354152026, "grad_norm": 1.443429946899414, "learning_rate": 6.795552358069637e-05, "loss": 0.9442, "step": 119970 }, { "epoch": 0.7665180225649413, "grad_norm": 0.8205900192260742, "learning_rate": 6.795084050966038e-05, "loss": 0.9229, "step": 119980 }, { "epoch": 0.76658190971468, "grad_norm": 0.7707697153091431, "learning_rate": 6.794615725784386e-05, "loss": 1.2311, "step": 119990 }, { "epoch": 0.7666457968644187, "grad_norm": 0.9643944501876831, "learning_rate": 6.794147382529403e-05, "loss": 0.8979, "step": 120000 }, { "epoch": 0.7667096840141574, "grad_norm": 0.6266613006591797, "learning_rate": 6.793679021205804e-05, "loss": 0.8486, "step": 120010 }, { "epoch": 0.7667735711638961, "grad_norm": 0.7396105527877808, "learning_rate": 6.793210641818305e-05, "loss": 0.6949, "step": 120020 }, { "epoch": 0.7668374583136348, "grad_norm": 1.156002402305603, "learning_rate": 6.792742244371624e-05, "loss": 0.8869, "step": 120030 }, { "epoch": 0.7669013454633735, "grad_norm": 0.8425427079200745, "learning_rate": 6.792273828870477e-05, "loss": 0.6861, "step": 120040 }, { "epoch": 0.7669652326131122, "grad_norm": 0.8769024610519409, "learning_rate": 6.791805395319582e-05, "loss": 0.7712, "step": 120050 }, { "epoch": 0.7670291197628509, "grad_norm": 0.7793144583702087, "learning_rate": 6.791336943723657e-05, "loss": 0.8332, "step": 120060 }, { "epoch": 0.7670930069125896, "grad_norm": 0.6223095655441284, "learning_rate": 6.790868474087419e-05, "loss": 0.8934, "step": 120070 }, { "epoch": 0.7671568940623283, "grad_norm": 1.2079180479049683, "learning_rate": 6.790399986415587e-05, "loss": 1.0223, "step": 120080 }, { "epoch": 0.767220781212067, "grad_norm": 3.4866483211517334, "learning_rate": 6.789931480712876e-05, "loss": 0.9603, "step": 120090 }, { "epoch": 0.7672846683618058, "grad_norm": 0.7265200614929199, "learning_rate": 6.789462956984008e-05, "loss": 0.8821, "step": 120100 }, { "epoch": 0.7673485555115445, "grad_norm": 0.47647228837013245, "learning_rate": 6.788994415233699e-05, "loss": 0.8709, "step": 120110 }, { "epoch": 0.7674124426612831, "grad_norm": 0.7587248682975769, "learning_rate": 6.78852585546667e-05, "loss": 0.9765, "step": 120120 }, { "epoch": 0.7674763298110218, "grad_norm": 1.0275150537490845, "learning_rate": 6.788057277687638e-05, "loss": 0.9257, "step": 120130 }, { "epoch": 0.7675402169607605, "grad_norm": 2.4082839488983154, "learning_rate": 6.787588681901321e-05, "loss": 0.7645, "step": 120140 }, { "epoch": 0.7676041041104992, "grad_norm": 0.8140583634376526, "learning_rate": 6.78712006811244e-05, "loss": 0.9093, "step": 120150 }, { "epoch": 0.7676679912602379, "grad_norm": 0.5556838512420654, "learning_rate": 6.786651436325715e-05, "loss": 0.8601, "step": 120160 }, { "epoch": 0.7677318784099766, "grad_norm": 0.8413486480712891, "learning_rate": 6.786182786545863e-05, "loss": 0.7334, "step": 120170 }, { "epoch": 0.7677957655597153, "grad_norm": 0.7864370346069336, "learning_rate": 6.785714118777607e-05, "loss": 1.0277, "step": 120180 }, { "epoch": 0.767859652709454, "grad_norm": 0.9981165528297424, "learning_rate": 6.785245433025662e-05, "loss": 0.7542, "step": 120190 }, { "epoch": 0.7679235398591927, "grad_norm": 2.119781255722046, "learning_rate": 6.784776729294752e-05, "loss": 0.6569, "step": 120200 }, { "epoch": 0.7679874270089314, "grad_norm": 1.0580250024795532, "learning_rate": 6.784308007589598e-05, "loss": 0.7881, "step": 120210 }, { "epoch": 0.7680513141586701, "grad_norm": 0.9876987338066101, "learning_rate": 6.783839267914918e-05, "loss": 0.7479, "step": 120220 }, { "epoch": 0.7681152013084088, "grad_norm": 0.8951814770698547, "learning_rate": 6.783370510275433e-05, "loss": 0.8872, "step": 120230 }, { "epoch": 0.7681790884581475, "grad_norm": 2.5809152126312256, "learning_rate": 6.782901734675864e-05, "loss": 0.8542, "step": 120240 }, { "epoch": 0.7682429756078862, "grad_norm": 0.884162962436676, "learning_rate": 6.782432941120932e-05, "loss": 0.7915, "step": 120250 }, { "epoch": 0.7683068627576249, "grad_norm": 0.7958625555038452, "learning_rate": 6.781964129615359e-05, "loss": 0.9709, "step": 120260 }, { "epoch": 0.7683707499073636, "grad_norm": 0.8712575435638428, "learning_rate": 6.781495300163865e-05, "loss": 0.8752, "step": 120270 }, { "epoch": 0.7684346370571024, "grad_norm": 0.8485830426216125, "learning_rate": 6.781026452771172e-05, "loss": 1.0295, "step": 120280 }, { "epoch": 0.7684985242068411, "grad_norm": 0.7899221777915955, "learning_rate": 6.780557587442001e-05, "loss": 0.7579, "step": 120290 }, { "epoch": 0.7685624113565798, "grad_norm": 0.5380288362503052, "learning_rate": 6.780088704181075e-05, "loss": 1.2273, "step": 120300 }, { "epoch": 0.7686262985063185, "grad_norm": 0.8067999482154846, "learning_rate": 6.779619802993118e-05, "loss": 0.9209, "step": 120310 }, { "epoch": 0.7686901856560572, "grad_norm": 0.7237452268600464, "learning_rate": 6.779150883882848e-05, "loss": 1.0752, "step": 120320 }, { "epoch": 0.7687540728057959, "grad_norm": 0.6322763562202454, "learning_rate": 6.77868194685499e-05, "loss": 0.7969, "step": 120330 }, { "epoch": 0.7688179599555346, "grad_norm": 1.1552351713180542, "learning_rate": 6.778212991914266e-05, "loss": 0.9154, "step": 120340 }, { "epoch": 0.7688818471052733, "grad_norm": 1.1436083316802979, "learning_rate": 6.777744019065399e-05, "loss": 0.9167, "step": 120350 }, { "epoch": 0.7689457342550119, "grad_norm": 1.0631415843963623, "learning_rate": 6.77727502831311e-05, "loss": 0.8223, "step": 120360 }, { "epoch": 0.7690096214047506, "grad_norm": 0.9322156310081482, "learning_rate": 6.776806019662127e-05, "loss": 0.9355, "step": 120370 }, { "epoch": 0.7690735085544893, "grad_norm": 0.9718419909477234, "learning_rate": 6.776336993117168e-05, "loss": 0.8536, "step": 120380 }, { "epoch": 0.769137395704228, "grad_norm": 1.3241702318191528, "learning_rate": 6.775867948682959e-05, "loss": 0.9899, "step": 120390 }, { "epoch": 0.7692012828539667, "grad_norm": 1.2391200065612793, "learning_rate": 6.775398886364224e-05, "loss": 0.7317, "step": 120400 }, { "epoch": 0.7692651700037054, "grad_norm": 0.8078621029853821, "learning_rate": 6.774929806165686e-05, "loss": 1.015, "step": 120410 }, { "epoch": 0.7693290571534441, "grad_norm": 0.7837865948677063, "learning_rate": 6.77446070809207e-05, "loss": 0.8237, "step": 120420 }, { "epoch": 0.7693929443031828, "grad_norm": 0.9741398096084595, "learning_rate": 6.773991592148098e-05, "loss": 0.8702, "step": 120430 }, { "epoch": 0.7694568314529215, "grad_norm": 1.1501209735870361, "learning_rate": 6.773522458338497e-05, "loss": 0.9291, "step": 120440 }, { "epoch": 0.7695207186026602, "grad_norm": 0.5776450634002686, "learning_rate": 6.77305330666799e-05, "loss": 1.0505, "step": 120450 }, { "epoch": 0.769584605752399, "grad_norm": 0.7684696316719055, "learning_rate": 6.772584137141302e-05, "loss": 0.7328, "step": 120460 }, { "epoch": 0.7696484929021377, "grad_norm": 0.680523157119751, "learning_rate": 6.772114949763158e-05, "loss": 0.9261, "step": 120470 }, { "epoch": 0.7697123800518764, "grad_norm": 0.8536334037780762, "learning_rate": 6.771645744538284e-05, "loss": 1.0571, "step": 120480 }, { "epoch": 0.7697762672016151, "grad_norm": 0.7580819129943848, "learning_rate": 6.771176521471405e-05, "loss": 0.8517, "step": 120490 }, { "epoch": 0.7698401543513538, "grad_norm": 0.9832444190979004, "learning_rate": 6.770707280567247e-05, "loss": 0.9181, "step": 120500 }, { "epoch": 0.7699040415010925, "grad_norm": 0.7702086567878723, "learning_rate": 6.770238021830532e-05, "loss": 0.9504, "step": 120510 }, { "epoch": 0.7699679286508312, "grad_norm": 1.5449334383010864, "learning_rate": 6.769768745265991e-05, "loss": 0.7662, "step": 120520 }, { "epoch": 0.7700318158005699, "grad_norm": 1.5101966857910156, "learning_rate": 6.769299450878349e-05, "loss": 0.8513, "step": 120530 }, { "epoch": 0.7700957029503086, "grad_norm": 0.6847185492515564, "learning_rate": 6.768830138672327e-05, "loss": 0.8803, "step": 120540 }, { "epoch": 0.7701595901000473, "grad_norm": 0.6780611276626587, "learning_rate": 6.768360808652659e-05, "loss": 0.7456, "step": 120550 }, { "epoch": 0.770223477249786, "grad_norm": 1.0173395872116089, "learning_rate": 6.767891460824066e-05, "loss": 0.7139, "step": 120560 }, { "epoch": 0.7702873643995247, "grad_norm": 0.6809027791023254, "learning_rate": 6.767422095191277e-05, "loss": 1.1505, "step": 120570 }, { "epoch": 0.7703512515492634, "grad_norm": 0.9474708437919617, "learning_rate": 6.766952711759018e-05, "loss": 0.8336, "step": 120580 }, { "epoch": 0.7704151386990021, "grad_norm": 0.5041736960411072, "learning_rate": 6.766483310532017e-05, "loss": 0.7056, "step": 120590 }, { "epoch": 0.7704790258487408, "grad_norm": 1.975868582725525, "learning_rate": 6.766013891515e-05, "loss": 0.9774, "step": 120600 }, { "epoch": 0.7705429129984794, "grad_norm": 0.8999959826469421, "learning_rate": 6.765544454712696e-05, "loss": 0.8933, "step": 120610 }, { "epoch": 0.7706068001482181, "grad_norm": 0.839335560798645, "learning_rate": 6.765075000129831e-05, "loss": 0.8328, "step": 120620 }, { "epoch": 0.7706706872979568, "grad_norm": 0.8537786602973938, "learning_rate": 6.764605527771133e-05, "loss": 0.8143, "step": 120630 }, { "epoch": 0.7707345744476956, "grad_norm": 0.7219642996788025, "learning_rate": 6.764136037641333e-05, "loss": 0.7989, "step": 120640 }, { "epoch": 0.7707984615974343, "grad_norm": 0.6712138056755066, "learning_rate": 6.763666529745156e-05, "loss": 1.1548, "step": 120650 }, { "epoch": 0.770862348747173, "grad_norm": 0.8392811417579651, "learning_rate": 6.763197004087331e-05, "loss": 0.7134, "step": 120660 }, { "epoch": 0.7709262358969117, "grad_norm": 0.8870442509651184, "learning_rate": 6.762727460672586e-05, "loss": 0.7751, "step": 120670 }, { "epoch": 0.7709901230466504, "grad_norm": 1.1646859645843506, "learning_rate": 6.762257899505653e-05, "loss": 1.0547, "step": 120680 }, { "epoch": 0.7710540101963891, "grad_norm": 1.5529083013534546, "learning_rate": 6.761788320591257e-05, "loss": 0.8419, "step": 120690 }, { "epoch": 0.7711178973461278, "grad_norm": 2.7997336387634277, "learning_rate": 6.761318723934128e-05, "loss": 0.9536, "step": 120700 }, { "epoch": 0.7711817844958665, "grad_norm": 1.0143194198608398, "learning_rate": 6.760849109538996e-05, "loss": 0.9442, "step": 120710 }, { "epoch": 0.7712456716456052, "grad_norm": 1.0322544574737549, "learning_rate": 6.76037947741059e-05, "loss": 0.951, "step": 120720 }, { "epoch": 0.7713095587953439, "grad_norm": 0.9332642555236816, "learning_rate": 6.759956793336986e-05, "loss": 0.7949, "step": 120730 }, { "epoch": 0.7713734459450826, "grad_norm": 0.8280695080757141, "learning_rate": 6.759487127528388e-05, "loss": 0.7483, "step": 120740 }, { "epoch": 0.7714373330948213, "grad_norm": 0.6056891679763794, "learning_rate": 6.759017444000235e-05, "loss": 0.8854, "step": 120750 }, { "epoch": 0.77150122024456, "grad_norm": 1.0320556163787842, "learning_rate": 6.758547742757254e-05, "loss": 0.9241, "step": 120760 }, { "epoch": 0.7715651073942987, "grad_norm": 1.820793867111206, "learning_rate": 6.758078023804176e-05, "loss": 1.0678, "step": 120770 }, { "epoch": 0.7716289945440374, "grad_norm": 2.8395168781280518, "learning_rate": 6.757608287145731e-05, "loss": 0.7885, "step": 120780 }, { "epoch": 0.7716928816937761, "grad_norm": 0.7798264026641846, "learning_rate": 6.75713853278665e-05, "loss": 0.9502, "step": 120790 }, { "epoch": 0.7717567688435149, "grad_norm": 0.8113306164741516, "learning_rate": 6.756668760731665e-05, "loss": 0.7035, "step": 120800 }, { "epoch": 0.7718206559932536, "grad_norm": 0.7610470056533813, "learning_rate": 6.756198970985506e-05, "loss": 0.9429, "step": 120810 }, { "epoch": 0.7718845431429923, "grad_norm": 0.8831712603569031, "learning_rate": 6.755729163552902e-05, "loss": 0.9622, "step": 120820 }, { "epoch": 0.771948430292731, "grad_norm": 0.9428032040596008, "learning_rate": 6.755259338438588e-05, "loss": 0.9375, "step": 120830 }, { "epoch": 0.7720123174424697, "grad_norm": 1.5266450643539429, "learning_rate": 6.754789495647293e-05, "loss": 0.9392, "step": 120840 }, { "epoch": 0.7720762045922083, "grad_norm": 1.08087956905365, "learning_rate": 6.75431963518375e-05, "loss": 1.0333, "step": 120850 }, { "epoch": 0.772140091741947, "grad_norm": 1.0593822002410889, "learning_rate": 6.75384975705269e-05, "loss": 0.9502, "step": 120860 }, { "epoch": 0.7722039788916857, "grad_norm": 0.902668297290802, "learning_rate": 6.753379861258846e-05, "loss": 0.5924, "step": 120870 }, { "epoch": 0.7722678660414244, "grad_norm": 1.1227551698684692, "learning_rate": 6.752909947806951e-05, "loss": 0.7154, "step": 120880 }, { "epoch": 0.7723317531911631, "grad_norm": 0.7121851444244385, "learning_rate": 6.752440016701736e-05, "loss": 0.8883, "step": 120890 }, { "epoch": 0.7723956403409018, "grad_norm": 0.878093421459198, "learning_rate": 6.751970067947932e-05, "loss": 1.0066, "step": 120900 }, { "epoch": 0.7724595274906405, "grad_norm": 2.005844831466675, "learning_rate": 6.751500101550275e-05, "loss": 0.8736, "step": 120910 }, { "epoch": 0.7725234146403792, "grad_norm": 1.369321346282959, "learning_rate": 6.751030117513497e-05, "loss": 1.1788, "step": 120920 }, { "epoch": 0.7725873017901179, "grad_norm": 0.6035107374191284, "learning_rate": 6.750560115842332e-05, "loss": 1.1607, "step": 120930 }, { "epoch": 0.7726511889398566, "grad_norm": 1.0282695293426514, "learning_rate": 6.750090096541511e-05, "loss": 0.7348, "step": 120940 }, { "epoch": 0.7727150760895953, "grad_norm": 0.5575137734413147, "learning_rate": 6.749620059615768e-05, "loss": 0.8886, "step": 120950 }, { "epoch": 0.772778963239334, "grad_norm": 0.8261436223983765, "learning_rate": 6.749150005069838e-05, "loss": 0.928, "step": 120960 }, { "epoch": 0.7728428503890727, "grad_norm": 0.8338256478309631, "learning_rate": 6.748679932908454e-05, "loss": 0.646, "step": 120970 }, { "epoch": 0.7729067375388115, "grad_norm": 0.7634387612342834, "learning_rate": 6.74820984313635e-05, "loss": 0.9501, "step": 120980 }, { "epoch": 0.7729706246885502, "grad_norm": 0.8158954977989197, "learning_rate": 6.747739735758262e-05, "loss": 0.937, "step": 120990 }, { "epoch": 0.7730345118382889, "grad_norm": 0.8353099226951599, "learning_rate": 6.747269610778922e-05, "loss": 0.9787, "step": 121000 }, { "epoch": 0.7730983989880276, "grad_norm": 1.1733039617538452, "learning_rate": 6.746799468203064e-05, "loss": 0.924, "step": 121010 }, { "epoch": 0.7731622861377663, "grad_norm": 0.6754859685897827, "learning_rate": 6.746329308035426e-05, "loss": 1.1289, "step": 121020 }, { "epoch": 0.773226173287505, "grad_norm": 0.7313271164894104, "learning_rate": 6.745859130280741e-05, "loss": 0.8438, "step": 121030 }, { "epoch": 0.7732900604372437, "grad_norm": 1.6041016578674316, "learning_rate": 6.745388934943743e-05, "loss": 0.7458, "step": 121040 }, { "epoch": 0.7733539475869824, "grad_norm": 0.7553384900093079, "learning_rate": 6.744918722029169e-05, "loss": 0.8966, "step": 121050 }, { "epoch": 0.7734178347367211, "grad_norm": 0.4830940365791321, "learning_rate": 6.744448491541754e-05, "loss": 0.6584, "step": 121060 }, { "epoch": 0.7734817218864598, "grad_norm": 0.8653696179389954, "learning_rate": 6.743978243486233e-05, "loss": 1.2337, "step": 121070 }, { "epoch": 0.7735456090361985, "grad_norm": 0.8902184963226318, "learning_rate": 6.743507977867342e-05, "loss": 0.8364, "step": 121080 }, { "epoch": 0.7736094961859371, "grad_norm": 0.9658520817756653, "learning_rate": 6.74303769468982e-05, "loss": 0.9397, "step": 121090 }, { "epoch": 0.7736733833356758, "grad_norm": 0.7507711052894592, "learning_rate": 6.742567393958398e-05, "loss": 0.723, "step": 121100 }, { "epoch": 0.7737372704854145, "grad_norm": 0.6307206153869629, "learning_rate": 6.742097075677815e-05, "loss": 0.7924, "step": 121110 }, { "epoch": 0.7738011576351532, "grad_norm": 1.159859538078308, "learning_rate": 6.741626739852806e-05, "loss": 0.8277, "step": 121120 }, { "epoch": 0.7738650447848919, "grad_norm": 0.7750802636146545, "learning_rate": 6.741156386488112e-05, "loss": 1.0919, "step": 121130 }, { "epoch": 0.7739289319346306, "grad_norm": 0.9529350399971008, "learning_rate": 6.740686015588465e-05, "loss": 1.0912, "step": 121140 }, { "epoch": 0.7739928190843693, "grad_norm": 0.8599395751953125, "learning_rate": 6.740215627158605e-05, "loss": 1.0332, "step": 121150 }, { "epoch": 0.774056706234108, "grad_norm": 0.9793898463249207, "learning_rate": 6.739745221203268e-05, "loss": 0.7607, "step": 121160 }, { "epoch": 0.7741205933838468, "grad_norm": 0.8916863799095154, "learning_rate": 6.739274797727191e-05, "loss": 1.0146, "step": 121170 }, { "epoch": 0.7741844805335855, "grad_norm": 1.1108578443527222, "learning_rate": 6.738804356735113e-05, "loss": 0.9828, "step": 121180 }, { "epoch": 0.7742483676833242, "grad_norm": 1.299629807472229, "learning_rate": 6.73833389823177e-05, "loss": 1.1273, "step": 121190 }, { "epoch": 0.7743122548330629, "grad_norm": 0.9776250123977661, "learning_rate": 6.737863422221902e-05, "loss": 0.7339, "step": 121200 }, { "epoch": 0.7743761419828016, "grad_norm": 0.8308240175247192, "learning_rate": 6.737392928710245e-05, "loss": 0.9728, "step": 121210 }, { "epoch": 0.7744400291325403, "grad_norm": 1.0700846910476685, "learning_rate": 6.736922417701537e-05, "loss": 0.8842, "step": 121220 }, { "epoch": 0.774503916282279, "grad_norm": 0.8977962732315063, "learning_rate": 6.736451889200518e-05, "loss": 0.8738, "step": 121230 }, { "epoch": 0.7745678034320177, "grad_norm": 0.8981806039810181, "learning_rate": 6.735981343211927e-05, "loss": 0.82, "step": 121240 }, { "epoch": 0.7746316905817564, "grad_norm": 1.0636060237884521, "learning_rate": 6.735510779740502e-05, "loss": 0.8949, "step": 121250 }, { "epoch": 0.7746955777314951, "grad_norm": 0.8184270858764648, "learning_rate": 6.735040198790982e-05, "loss": 0.8559, "step": 121260 }, { "epoch": 0.7747594648812338, "grad_norm": 1.0047454833984375, "learning_rate": 6.734569600368105e-05, "loss": 1.2097, "step": 121270 }, { "epoch": 0.7748233520309725, "grad_norm": 1.6020781993865967, "learning_rate": 6.734098984476612e-05, "loss": 0.757, "step": 121280 }, { "epoch": 0.7748872391807112, "grad_norm": 0.7962193489074707, "learning_rate": 6.733628351121243e-05, "loss": 0.8267, "step": 121290 }, { "epoch": 0.7749511263304499, "grad_norm": 1.1019634008407593, "learning_rate": 6.733157700306737e-05, "loss": 0.869, "step": 121300 }, { "epoch": 0.7750150134801886, "grad_norm": 0.6633391976356506, "learning_rate": 6.732687032037832e-05, "loss": 0.843, "step": 121310 }, { "epoch": 0.7750789006299273, "grad_norm": 1.0275635719299316, "learning_rate": 6.73221634631927e-05, "loss": 0.9104, "step": 121320 }, { "epoch": 0.7751427877796659, "grad_norm": 0.7791745662689209, "learning_rate": 6.73174564315579e-05, "loss": 0.9885, "step": 121330 }, { "epoch": 0.7752066749294046, "grad_norm": 1.2673611640930176, "learning_rate": 6.731274922552135e-05, "loss": 0.6765, "step": 121340 }, { "epoch": 0.7752705620791434, "grad_norm": 0.8525099754333496, "learning_rate": 6.730804184513044e-05, "loss": 0.8447, "step": 121350 }, { "epoch": 0.7753344492288821, "grad_norm": 0.8787998557090759, "learning_rate": 6.730333429043256e-05, "loss": 0.8673, "step": 121360 }, { "epoch": 0.7753983363786208, "grad_norm": 0.7278786897659302, "learning_rate": 6.729862656147514e-05, "loss": 0.7846, "step": 121370 }, { "epoch": 0.7754622235283595, "grad_norm": 1.0714443922042847, "learning_rate": 6.729391865830559e-05, "loss": 0.809, "step": 121380 }, { "epoch": 0.7755261106780982, "grad_norm": 0.820010244846344, "learning_rate": 6.72892105809713e-05, "loss": 0.8847, "step": 121390 }, { "epoch": 0.7755899978278369, "grad_norm": 1.3069791793823242, "learning_rate": 6.728450232951972e-05, "loss": 0.8478, "step": 121400 }, { "epoch": 0.7756538849775756, "grad_norm": 0.992739737033844, "learning_rate": 6.727979390399825e-05, "loss": 1.0541, "step": 121410 }, { "epoch": 0.7757177721273143, "grad_norm": 0.8804332613945007, "learning_rate": 6.72750853044543e-05, "loss": 0.9293, "step": 121420 }, { "epoch": 0.775781659277053, "grad_norm": 0.9958817958831787, "learning_rate": 6.72703765309353e-05, "loss": 0.959, "step": 121430 }, { "epoch": 0.7758455464267917, "grad_norm": 0.8248307704925537, "learning_rate": 6.726566758348867e-05, "loss": 0.8786, "step": 121440 }, { "epoch": 0.7759094335765304, "grad_norm": 0.9788550138473511, "learning_rate": 6.726095846216181e-05, "loss": 0.6713, "step": 121450 }, { "epoch": 0.7759733207262691, "grad_norm": 1.0201257467269897, "learning_rate": 6.725624916700218e-05, "loss": 1.1351, "step": 121460 }, { "epoch": 0.7760372078760078, "grad_norm": 0.550969123840332, "learning_rate": 6.72515396980572e-05, "loss": 0.9425, "step": 121470 }, { "epoch": 0.7761010950257465, "grad_norm": 0.7929662466049194, "learning_rate": 6.724683005537427e-05, "loss": 1.0115, "step": 121480 }, { "epoch": 0.7761649821754852, "grad_norm": 1.1260933876037598, "learning_rate": 6.724212023900086e-05, "loss": 0.8828, "step": 121490 }, { "epoch": 0.776228869325224, "grad_norm": 1.8724021911621094, "learning_rate": 6.723741024898438e-05, "loss": 1.1478, "step": 121500 }, { "epoch": 0.7762927564749627, "grad_norm": 0.7166033387184143, "learning_rate": 6.723270008537225e-05, "loss": 0.9861, "step": 121510 }, { "epoch": 0.7763566436247014, "grad_norm": 0.6689345240592957, "learning_rate": 6.722798974821193e-05, "loss": 1.0694, "step": 121520 }, { "epoch": 0.7764205307744401, "grad_norm": 0.7042884230613708, "learning_rate": 6.722327923755086e-05, "loss": 1.0029, "step": 121530 }, { "epoch": 0.7764844179241788, "grad_norm": 0.6996636390686035, "learning_rate": 6.721856855343647e-05, "loss": 0.7168, "step": 121540 }, { "epoch": 0.7765483050739175, "grad_norm": 0.7664635181427002, "learning_rate": 6.721385769591618e-05, "loss": 1.13, "step": 121550 }, { "epoch": 0.7766121922236562, "grad_norm": 1.2732270956039429, "learning_rate": 6.720914666503746e-05, "loss": 0.7654, "step": 121560 }, { "epoch": 0.7766760793733949, "grad_norm": 1.799688458442688, "learning_rate": 6.720443546084775e-05, "loss": 0.9057, "step": 121570 }, { "epoch": 0.7767399665231335, "grad_norm": 0.6729174852371216, "learning_rate": 6.719972408339447e-05, "loss": 0.8531, "step": 121580 }, { "epoch": 0.7768038536728722, "grad_norm": 0.8907155394554138, "learning_rate": 6.719501253272513e-05, "loss": 0.6519, "step": 121590 }, { "epoch": 0.7768677408226109, "grad_norm": 0.7867339253425598, "learning_rate": 6.71903008088871e-05, "loss": 0.7657, "step": 121600 }, { "epoch": 0.7769316279723496, "grad_norm": 0.8111919164657593, "learning_rate": 6.718558891192788e-05, "loss": 0.89, "step": 121610 }, { "epoch": 0.7769955151220883, "grad_norm": 1.4637147188186646, "learning_rate": 6.718087684189491e-05, "loss": 0.7084, "step": 121620 }, { "epoch": 0.777059402271827, "grad_norm": 0.9606701731681824, "learning_rate": 6.717616459883564e-05, "loss": 0.8442, "step": 121630 }, { "epoch": 0.7771232894215657, "grad_norm": 0.5550901293754578, "learning_rate": 6.717145218279755e-05, "loss": 0.785, "step": 121640 }, { "epoch": 0.7771871765713044, "grad_norm": 0.8698827028274536, "learning_rate": 6.716673959382806e-05, "loss": 0.9919, "step": 121650 }, { "epoch": 0.7772510637210431, "grad_norm": 1.0764882564544678, "learning_rate": 6.716202683197468e-05, "loss": 0.8303, "step": 121660 }, { "epoch": 0.7773149508707818, "grad_norm": 0.6857743263244629, "learning_rate": 6.715731389728484e-05, "loss": 0.8815, "step": 121670 }, { "epoch": 0.7773788380205205, "grad_norm": 1.6733158826828003, "learning_rate": 6.715260078980599e-05, "loss": 0.7927, "step": 121680 }, { "epoch": 0.7774427251702593, "grad_norm": 0.7211989164352417, "learning_rate": 6.714788750958561e-05, "loss": 0.9503, "step": 121690 }, { "epoch": 0.777506612319998, "grad_norm": 1.002265453338623, "learning_rate": 6.714317405667118e-05, "loss": 1.0882, "step": 121700 }, { "epoch": 0.7775704994697367, "grad_norm": 0.9164408445358276, "learning_rate": 6.713846043111014e-05, "loss": 0.886, "step": 121710 }, { "epoch": 0.7776343866194754, "grad_norm": 0.5526295304298401, "learning_rate": 6.713374663294999e-05, "loss": 0.7163, "step": 121720 }, { "epoch": 0.7776982737692141, "grad_norm": 1.0541777610778809, "learning_rate": 6.712903266223818e-05, "loss": 1.219, "step": 121730 }, { "epoch": 0.7777621609189528, "grad_norm": 1.3423256874084473, "learning_rate": 6.71243185190222e-05, "loss": 1.2439, "step": 121740 }, { "epoch": 0.7778260480686915, "grad_norm": 0.900256335735321, "learning_rate": 6.711960420334951e-05, "loss": 0.9215, "step": 121750 }, { "epoch": 0.7778899352184302, "grad_norm": 0.7287362813949585, "learning_rate": 6.71148897152676e-05, "loss": 0.7935, "step": 121760 }, { "epoch": 0.7779538223681689, "grad_norm": 0.6165835857391357, "learning_rate": 6.711017505482395e-05, "loss": 0.8651, "step": 121770 }, { "epoch": 0.7780177095179076, "grad_norm": 1.222276210784912, "learning_rate": 6.710546022206603e-05, "loss": 0.7607, "step": 121780 }, { "epoch": 0.7780815966676463, "grad_norm": 0.9571607112884521, "learning_rate": 6.71007452170413e-05, "loss": 0.9691, "step": 121790 }, { "epoch": 0.778145483817385, "grad_norm": 0.7661402821540833, "learning_rate": 6.709603003979729e-05, "loss": 0.8724, "step": 121800 }, { "epoch": 0.7782093709671237, "grad_norm": 0.9721023440361023, "learning_rate": 6.709131469038149e-05, "loss": 0.6902, "step": 121810 }, { "epoch": 0.7782732581168623, "grad_norm": 0.9388052821159363, "learning_rate": 6.708659916884135e-05, "loss": 0.8722, "step": 121820 }, { "epoch": 0.778337145266601, "grad_norm": 1.3385436534881592, "learning_rate": 6.708188347522438e-05, "loss": 0.9258, "step": 121830 }, { "epoch": 0.7784010324163397, "grad_norm": 1.1021685600280762, "learning_rate": 6.707716760957805e-05, "loss": 1.1272, "step": 121840 }, { "epoch": 0.7784649195660784, "grad_norm": 0.7849000096321106, "learning_rate": 6.707245157194987e-05, "loss": 0.8731, "step": 121850 }, { "epoch": 0.7785288067158171, "grad_norm": 0.7973129153251648, "learning_rate": 6.706773536238734e-05, "loss": 1.0259, "step": 121860 }, { "epoch": 0.7785926938655559, "grad_norm": 0.8367007970809937, "learning_rate": 6.706301898093795e-05, "loss": 0.8157, "step": 121870 }, { "epoch": 0.7786565810152946, "grad_norm": 0.8635137677192688, "learning_rate": 6.70583024276492e-05, "loss": 0.907, "step": 121880 }, { "epoch": 0.7787204681650333, "grad_norm": 0.8511916399002075, "learning_rate": 6.705358570256858e-05, "loss": 0.9413, "step": 121890 }, { "epoch": 0.778784355314772, "grad_norm": 0.8517649173736572, "learning_rate": 6.70488688057436e-05, "loss": 0.9937, "step": 121900 }, { "epoch": 0.7788482424645107, "grad_norm": 0.8689191341400146, "learning_rate": 6.704415173722176e-05, "loss": 0.9242, "step": 121910 }, { "epoch": 0.7789121296142494, "grad_norm": 0.7940566539764404, "learning_rate": 6.70394344970506e-05, "loss": 0.7244, "step": 121920 }, { "epoch": 0.7789760167639881, "grad_norm": 1.276955008506775, "learning_rate": 6.703471708527756e-05, "loss": 1.1358, "step": 121930 }, { "epoch": 0.7790399039137268, "grad_norm": 1.2477837800979614, "learning_rate": 6.702999950195017e-05, "loss": 1.0286, "step": 121940 }, { "epoch": 0.7791037910634655, "grad_norm": 0.9349541664123535, "learning_rate": 6.702528174711597e-05, "loss": 0.9723, "step": 121950 }, { "epoch": 0.7791676782132042, "grad_norm": 0.8674134612083435, "learning_rate": 6.702056382082245e-05, "loss": 1.0462, "step": 121960 }, { "epoch": 0.7792315653629429, "grad_norm": 1.3984166383743286, "learning_rate": 6.701584572311712e-05, "loss": 0.9276, "step": 121970 }, { "epoch": 0.7792954525126816, "grad_norm": 1.5692760944366455, "learning_rate": 6.701112745404752e-05, "loss": 0.7578, "step": 121980 }, { "epoch": 0.7793593396624203, "grad_norm": 1.1319806575775146, "learning_rate": 6.700640901366113e-05, "loss": 0.927, "step": 121990 }, { "epoch": 0.779423226812159, "grad_norm": 1.1491388082504272, "learning_rate": 6.700169040200551e-05, "loss": 0.8779, "step": 122000 }, { "epoch": 0.7794871139618977, "grad_norm": 0.9306029081344604, "learning_rate": 6.699697161912815e-05, "loss": 0.7728, "step": 122010 }, { "epoch": 0.7795510011116364, "grad_norm": 2.3695061206817627, "learning_rate": 6.699225266507658e-05, "loss": 1.0062, "step": 122020 }, { "epoch": 0.7796148882613751, "grad_norm": 0.8340387344360352, "learning_rate": 6.698753353989831e-05, "loss": 0.7666, "step": 122030 }, { "epoch": 0.7796787754111139, "grad_norm": 1.7601200342178345, "learning_rate": 6.69828142436409e-05, "loss": 0.9149, "step": 122040 }, { "epoch": 0.7797426625608526, "grad_norm": 0.7318232655525208, "learning_rate": 6.697809477635187e-05, "loss": 0.8447, "step": 122050 }, { "epoch": 0.7798065497105912, "grad_norm": 1.044084906578064, "learning_rate": 6.697384710959896e-05, "loss": 1.3071, "step": 122060 }, { "epoch": 0.7798704368603299, "grad_norm": 0.5054813623428345, "learning_rate": 6.696912731748075e-05, "loss": 0.8569, "step": 122070 }, { "epoch": 0.7799343240100686, "grad_norm": 0.8287333846092224, "learning_rate": 6.696440735446876e-05, "loss": 1.0033, "step": 122080 }, { "epoch": 0.7799982111598073, "grad_norm": 0.8262792825698853, "learning_rate": 6.695968722061052e-05, "loss": 0.8806, "step": 122090 }, { "epoch": 0.780062098309546, "grad_norm": 0.9939790964126587, "learning_rate": 6.695496691595354e-05, "loss": 1.0262, "step": 122100 }, { "epoch": 0.7801259854592847, "grad_norm": 0.7839484810829163, "learning_rate": 6.695024644054537e-05, "loss": 1.0321, "step": 122110 }, { "epoch": 0.7801898726090234, "grad_norm": 0.7679548859596252, "learning_rate": 6.694552579443358e-05, "loss": 0.8247, "step": 122120 }, { "epoch": 0.7802537597587621, "grad_norm": 1.0078667402267456, "learning_rate": 6.694080497766567e-05, "loss": 0.8459, "step": 122130 }, { "epoch": 0.7803176469085008, "grad_norm": 0.964644730091095, "learning_rate": 6.69360839902892e-05, "loss": 0.9135, "step": 122140 }, { "epoch": 0.7803815340582395, "grad_norm": 1.4966347217559814, "learning_rate": 6.69313628323517e-05, "loss": 0.756, "step": 122150 }, { "epoch": 0.7804454212079782, "grad_norm": 0.8424573540687561, "learning_rate": 6.692664150390073e-05, "loss": 0.8184, "step": 122160 }, { "epoch": 0.7805093083577169, "grad_norm": 0.6761122345924377, "learning_rate": 6.692192000498385e-05, "loss": 1.1223, "step": 122170 }, { "epoch": 0.7805731955074556, "grad_norm": 1.4543062448501587, "learning_rate": 6.69171983356486e-05, "loss": 0.7813, "step": 122180 }, { "epoch": 0.7806370826571943, "grad_norm": 0.830903172492981, "learning_rate": 6.691247649594251e-05, "loss": 0.9245, "step": 122190 }, { "epoch": 0.780700969806933, "grad_norm": 0.994420051574707, "learning_rate": 6.690775448591316e-05, "loss": 0.8715, "step": 122200 }, { "epoch": 0.7807648569566717, "grad_norm": 0.7622717618942261, "learning_rate": 6.69030323056081e-05, "loss": 0.9868, "step": 122210 }, { "epoch": 0.7808287441064105, "grad_norm": 0.8121097683906555, "learning_rate": 6.689830995507487e-05, "loss": 1.1996, "step": 122220 }, { "epoch": 0.7808926312561492, "grad_norm": 0.6650993227958679, "learning_rate": 6.689358743436105e-05, "loss": 0.6315, "step": 122230 }, { "epoch": 0.7809565184058879, "grad_norm": 1.005804419517517, "learning_rate": 6.68888647435142e-05, "loss": 0.9496, "step": 122240 }, { "epoch": 0.7810204055556266, "grad_norm": 0.9154719114303589, "learning_rate": 6.688414188258185e-05, "loss": 0.7727, "step": 122250 }, { "epoch": 0.7810842927053653, "grad_norm": 0.7355092167854309, "learning_rate": 6.687941885161158e-05, "loss": 0.84, "step": 122260 }, { "epoch": 0.781148179855104, "grad_norm": 0.5671817064285278, "learning_rate": 6.687469565065096e-05, "loss": 0.9975, "step": 122270 }, { "epoch": 0.7812120670048427, "grad_norm": 1.7897629737854004, "learning_rate": 6.686997227974756e-05, "loss": 0.9906, "step": 122280 }, { "epoch": 0.7812759541545814, "grad_norm": 0.9264022707939148, "learning_rate": 6.686524873894894e-05, "loss": 0.9234, "step": 122290 }, { "epoch": 0.7813398413043201, "grad_norm": 1.2895312309265137, "learning_rate": 6.68605250283027e-05, "loss": 0.8688, "step": 122300 }, { "epoch": 0.7814037284540587, "grad_norm": 1.5715874433517456, "learning_rate": 6.685580114785638e-05, "loss": 0.951, "step": 122310 }, { "epoch": 0.7814676156037974, "grad_norm": 0.6482036709785461, "learning_rate": 6.685107709765755e-05, "loss": 0.9561, "step": 122320 }, { "epoch": 0.7815315027535361, "grad_norm": 0.810217559337616, "learning_rate": 6.684635287775381e-05, "loss": 1.0825, "step": 122330 }, { "epoch": 0.7815953899032748, "grad_norm": 0.8282271027565002, "learning_rate": 6.68416284881927e-05, "loss": 0.8896, "step": 122340 }, { "epoch": 0.7816592770530135, "grad_norm": 0.6604433059692383, "learning_rate": 6.683690392902184e-05, "loss": 0.8456, "step": 122350 }, { "epoch": 0.7817231642027522, "grad_norm": 0.8533942699432373, "learning_rate": 6.683217920028876e-05, "loss": 0.8331, "step": 122360 }, { "epoch": 0.7817870513524909, "grad_norm": 1.0768920183181763, "learning_rate": 6.68274543020411e-05, "loss": 0.9044, "step": 122370 }, { "epoch": 0.7818509385022296, "grad_norm": 1.2052894830703735, "learning_rate": 6.682272923432643e-05, "loss": 0.9181, "step": 122380 }, { "epoch": 0.7819148256519683, "grad_norm": 1.163033127784729, "learning_rate": 6.681800399719229e-05, "loss": 0.9498, "step": 122390 }, { "epoch": 0.781978712801707, "grad_norm": 1.0256508588790894, "learning_rate": 6.681327859068633e-05, "loss": 0.9087, "step": 122400 }, { "epoch": 0.7820425999514458, "grad_norm": 1.141444444656372, "learning_rate": 6.680855301485609e-05, "loss": 0.8819, "step": 122410 }, { "epoch": 0.7821064871011845, "grad_norm": 1.1306743621826172, "learning_rate": 6.680382726974918e-05, "loss": 0.9268, "step": 122420 }, { "epoch": 0.7821703742509232, "grad_norm": 0.9543069005012512, "learning_rate": 6.67991013554132e-05, "loss": 0.8443, "step": 122430 }, { "epoch": 0.7822342614006619, "grad_norm": 0.49687138199806213, "learning_rate": 6.679437527189571e-05, "loss": 0.9003, "step": 122440 }, { "epoch": 0.7822981485504006, "grad_norm": 1.3681707382202148, "learning_rate": 6.678964901924435e-05, "loss": 1.0405, "step": 122450 }, { "epoch": 0.7823620357001393, "grad_norm": 2.7566139698028564, "learning_rate": 6.678492259750672e-05, "loss": 1.1397, "step": 122460 }, { "epoch": 0.782425922849878, "grad_norm": 1.4756008386611938, "learning_rate": 6.678019600673037e-05, "loss": 0.8729, "step": 122470 }, { "epoch": 0.7824898099996167, "grad_norm": 1.502285122871399, "learning_rate": 6.677546924696295e-05, "loss": 0.7695, "step": 122480 }, { "epoch": 0.7825536971493554, "grad_norm": 0.9394874572753906, "learning_rate": 6.677074231825203e-05, "loss": 0.8499, "step": 122490 }, { "epoch": 0.7826175842990941, "grad_norm": 0.6078043580055237, "learning_rate": 6.676601522064522e-05, "loss": 0.7862, "step": 122500 }, { "epoch": 0.7826814714488328, "grad_norm": 0.6710939407348633, "learning_rate": 6.676128795419015e-05, "loss": 1.0033, "step": 122510 }, { "epoch": 0.7827453585985715, "grad_norm": 1.7409946918487549, "learning_rate": 6.67565605189344e-05, "loss": 0.7387, "step": 122520 }, { "epoch": 0.7828092457483102, "grad_norm": 1.1592122316360474, "learning_rate": 6.67518329149256e-05, "loss": 0.8663, "step": 122530 }, { "epoch": 0.7828731328980489, "grad_norm": 0.7959754467010498, "learning_rate": 6.674710514221133e-05, "loss": 0.9003, "step": 122540 }, { "epoch": 0.7829370200477875, "grad_norm": 1.363761305809021, "learning_rate": 6.674237720083924e-05, "loss": 0.7244, "step": 122550 }, { "epoch": 0.7830009071975262, "grad_norm": 0.603500247001648, "learning_rate": 6.673764909085692e-05, "loss": 0.7391, "step": 122560 }, { "epoch": 0.783064794347265, "grad_norm": 1.3545539379119873, "learning_rate": 6.6732920812312e-05, "loss": 0.8183, "step": 122570 }, { "epoch": 0.7831286814970037, "grad_norm": 0.8553158044815063, "learning_rate": 6.672819236525208e-05, "loss": 0.8083, "step": 122580 }, { "epoch": 0.7831925686467424, "grad_norm": 1.2060186862945557, "learning_rate": 6.67234637497248e-05, "loss": 1.0407, "step": 122590 }, { "epoch": 0.7832564557964811, "grad_norm": 0.7966336607933044, "learning_rate": 6.671873496577777e-05, "loss": 1.004, "step": 122600 }, { "epoch": 0.7833203429462198, "grad_norm": 1.0027638673782349, "learning_rate": 6.671400601345861e-05, "loss": 1.013, "step": 122610 }, { "epoch": 0.7833842300959585, "grad_norm": 0.8682675957679749, "learning_rate": 6.670927689281494e-05, "loss": 1.0311, "step": 122620 }, { "epoch": 0.7834481172456972, "grad_norm": 1.4984140396118164, "learning_rate": 6.670454760389442e-05, "loss": 0.6875, "step": 122630 }, { "epoch": 0.7835120043954359, "grad_norm": 0.7457048892974854, "learning_rate": 6.669981814674464e-05, "loss": 0.6236, "step": 122640 }, { "epoch": 0.7835758915451746, "grad_norm": 0.7693182826042175, "learning_rate": 6.669508852141325e-05, "loss": 0.7395, "step": 122650 }, { "epoch": 0.7836397786949133, "grad_norm": 2.3714489936828613, "learning_rate": 6.669035872794786e-05, "loss": 1.1034, "step": 122660 }, { "epoch": 0.783703665844652, "grad_norm": 0.9617331027984619, "learning_rate": 6.668562876639614e-05, "loss": 0.9657, "step": 122670 }, { "epoch": 0.7837675529943907, "grad_norm": 1.0759632587432861, "learning_rate": 6.66808986368057e-05, "loss": 0.6798, "step": 122680 }, { "epoch": 0.7838314401441294, "grad_norm": 0.8608376383781433, "learning_rate": 6.667616833922416e-05, "loss": 0.8573, "step": 122690 }, { "epoch": 0.7838953272938681, "grad_norm": 0.802783727645874, "learning_rate": 6.66714378736992e-05, "loss": 0.8653, "step": 122700 }, { "epoch": 0.7839592144436068, "grad_norm": 1.2049453258514404, "learning_rate": 6.666670724027844e-05, "loss": 0.8514, "step": 122710 }, { "epoch": 0.7840231015933455, "grad_norm": 1.9265187978744507, "learning_rate": 6.66619764390095e-05, "loss": 0.8429, "step": 122720 }, { "epoch": 0.7840869887430842, "grad_norm": 1.0026494264602661, "learning_rate": 6.665724546994005e-05, "loss": 1.0991, "step": 122730 }, { "epoch": 0.784150875892823, "grad_norm": 1.0881091356277466, "learning_rate": 6.665251433311773e-05, "loss": 0.8073, "step": 122740 }, { "epoch": 0.7842147630425617, "grad_norm": 0.8942010998725891, "learning_rate": 6.664778302859018e-05, "loss": 0.9164, "step": 122750 }, { "epoch": 0.7842786501923004, "grad_norm": 1.0254307985305786, "learning_rate": 6.664305155640507e-05, "loss": 0.8306, "step": 122760 }, { "epoch": 0.7843425373420391, "grad_norm": 1.6653152704238892, "learning_rate": 6.663831991661002e-05, "loss": 1.1331, "step": 122770 }, { "epoch": 0.7844064244917778, "grad_norm": 0.8588860034942627, "learning_rate": 6.663358810925269e-05, "loss": 0.9881, "step": 122780 }, { "epoch": 0.7844703116415164, "grad_norm": 0.5761985778808594, "learning_rate": 6.662885613438074e-05, "loss": 0.6635, "step": 122790 }, { "epoch": 0.7845341987912551, "grad_norm": 0.762401282787323, "learning_rate": 6.662412399204182e-05, "loss": 0.9458, "step": 122800 }, { "epoch": 0.7845980859409938, "grad_norm": 1.0655889511108398, "learning_rate": 6.661939168228359e-05, "loss": 0.9319, "step": 122810 }, { "epoch": 0.7846619730907325, "grad_norm": 0.7457488775253296, "learning_rate": 6.66146592051537e-05, "loss": 0.6604, "step": 122820 }, { "epoch": 0.7847258602404712, "grad_norm": 0.5781500935554504, "learning_rate": 6.660992656069984e-05, "loss": 0.9065, "step": 122830 }, { "epoch": 0.7847897473902099, "grad_norm": 1.0758085250854492, "learning_rate": 6.660519374896964e-05, "loss": 0.8102, "step": 122840 }, { "epoch": 0.7848536345399486, "grad_norm": 3.073899745941162, "learning_rate": 6.660046077001076e-05, "loss": 0.961, "step": 122850 }, { "epoch": 0.7849175216896873, "grad_norm": 1.818791389465332, "learning_rate": 6.65957276238709e-05, "loss": 0.7407, "step": 122860 }, { "epoch": 0.784981408839426, "grad_norm": 1.0063798427581787, "learning_rate": 6.65909943105977e-05, "loss": 0.8685, "step": 122870 }, { "epoch": 0.7850452959891647, "grad_norm": 0.8023037314414978, "learning_rate": 6.658626083023883e-05, "loss": 0.6783, "step": 122880 }, { "epoch": 0.7851091831389034, "grad_norm": 0.8545927405357361, "learning_rate": 6.658152718284197e-05, "loss": 0.8662, "step": 122890 }, { "epoch": 0.7851730702886421, "grad_norm": 0.9020628929138184, "learning_rate": 6.657679336845478e-05, "loss": 0.8088, "step": 122900 }, { "epoch": 0.7852369574383808, "grad_norm": 1.1107025146484375, "learning_rate": 6.657205938712492e-05, "loss": 0.9401, "step": 122910 }, { "epoch": 0.7853008445881196, "grad_norm": 0.7026088833808899, "learning_rate": 6.656732523890012e-05, "loss": 0.7771, "step": 122920 }, { "epoch": 0.7853647317378583, "grad_norm": 1.3951656818389893, "learning_rate": 6.656259092382801e-05, "loss": 1.0259, "step": 122930 }, { "epoch": 0.785428618887597, "grad_norm": 1.104836106300354, "learning_rate": 6.655785644195627e-05, "loss": 0.9918, "step": 122940 }, { "epoch": 0.7854925060373357, "grad_norm": 0.6789342761039734, "learning_rate": 6.655312179333259e-05, "loss": 0.8964, "step": 122950 }, { "epoch": 0.7855563931870744, "grad_norm": 0.7267434597015381, "learning_rate": 6.654838697800467e-05, "loss": 1.1021, "step": 122960 }, { "epoch": 0.7856202803368131, "grad_norm": 0.9028590321540833, "learning_rate": 6.654365199602016e-05, "loss": 0.9037, "step": 122970 }, { "epoch": 0.7856841674865518, "grad_norm": 1.3182995319366455, "learning_rate": 6.653891684742677e-05, "loss": 1.2179, "step": 122980 }, { "epoch": 0.7857480546362905, "grad_norm": 0.6001270413398743, "learning_rate": 6.653418153227218e-05, "loss": 0.7268, "step": 122990 }, { "epoch": 0.7858119417860292, "grad_norm": 0.8544936776161194, "learning_rate": 6.652944605060409e-05, "loss": 0.8634, "step": 123000 }, { "epoch": 0.7858758289357679, "grad_norm": 0.7163207530975342, "learning_rate": 6.652471040247016e-05, "loss": 0.7325, "step": 123010 }, { "epoch": 0.7859397160855066, "grad_norm": 1.0763355493545532, "learning_rate": 6.65199745879181e-05, "loss": 1.0254, "step": 123020 }, { "epoch": 0.7860036032352452, "grad_norm": 0.8155850172042847, "learning_rate": 6.651523860699562e-05, "loss": 0.9767, "step": 123030 }, { "epoch": 0.7860674903849839, "grad_norm": 0.845534086227417, "learning_rate": 6.651050245975039e-05, "loss": 0.9393, "step": 123040 }, { "epoch": 0.7861313775347226, "grad_norm": 1.392137885093689, "learning_rate": 6.650576614623012e-05, "loss": 0.7875, "step": 123050 }, { "epoch": 0.7861952646844613, "grad_norm": 0.9796945452690125, "learning_rate": 6.65010296664825e-05, "loss": 0.7574, "step": 123060 }, { "epoch": 0.7862591518342, "grad_norm": 0.8178929686546326, "learning_rate": 6.649629302055524e-05, "loss": 0.746, "step": 123070 }, { "epoch": 0.7863230389839387, "grad_norm": 0.6431455612182617, "learning_rate": 6.649155620849605e-05, "loss": 0.8506, "step": 123080 }, { "epoch": 0.7863869261336774, "grad_norm": 0.8767764568328857, "learning_rate": 6.648681923035261e-05, "loss": 0.7546, "step": 123090 }, { "epoch": 0.7864508132834162, "grad_norm": 2.9324655532836914, "learning_rate": 6.648208208617262e-05, "loss": 0.7744, "step": 123100 }, { "epoch": 0.7865147004331549, "grad_norm": 1.0613309144973755, "learning_rate": 6.647734477600383e-05, "loss": 0.7883, "step": 123110 }, { "epoch": 0.7865785875828936, "grad_norm": 1.001646637916565, "learning_rate": 6.647260729989391e-05, "loss": 1.2337, "step": 123120 }, { "epoch": 0.7866424747326323, "grad_norm": 1.7810345888137817, "learning_rate": 6.646786965789057e-05, "loss": 0.8567, "step": 123130 }, { "epoch": 0.786706361882371, "grad_norm": 0.8368769288063049, "learning_rate": 6.646313185004155e-05, "loss": 0.935, "step": 123140 }, { "epoch": 0.7867702490321097, "grad_norm": 0.6469975709915161, "learning_rate": 6.645839387639456e-05, "loss": 0.7664, "step": 123150 }, { "epoch": 0.7868341361818484, "grad_norm": 0.7839746475219727, "learning_rate": 6.645365573699729e-05, "loss": 1.0381, "step": 123160 }, { "epoch": 0.7868980233315871, "grad_norm": 1.2214899063110352, "learning_rate": 6.644891743189749e-05, "loss": 0.94, "step": 123170 }, { "epoch": 0.7869619104813258, "grad_norm": 1.0104191303253174, "learning_rate": 6.644417896114285e-05, "loss": 0.9163, "step": 123180 }, { "epoch": 0.7870257976310645, "grad_norm": 0.9507836699485779, "learning_rate": 6.643944032478109e-05, "loss": 0.9046, "step": 123190 }, { "epoch": 0.7870896847808032, "grad_norm": 1.0643188953399658, "learning_rate": 6.643470152285995e-05, "loss": 0.9449, "step": 123200 }, { "epoch": 0.7871535719305419, "grad_norm": 0.8711426258087158, "learning_rate": 6.642996255542717e-05, "loss": 0.7908, "step": 123210 }, { "epoch": 0.7872174590802806, "grad_norm": 1.0922789573669434, "learning_rate": 6.642522342253042e-05, "loss": 0.8448, "step": 123220 }, { "epoch": 0.7872813462300193, "grad_norm": 0.9862490892410278, "learning_rate": 6.642048412421749e-05, "loss": 0.758, "step": 123230 }, { "epoch": 0.787345233379758, "grad_norm": 0.6439590454101562, "learning_rate": 6.641574466053607e-05, "loss": 0.6805, "step": 123240 }, { "epoch": 0.7874091205294967, "grad_norm": 0.9654756784439087, "learning_rate": 6.641100503153388e-05, "loss": 0.7238, "step": 123250 }, { "epoch": 0.7874730076792354, "grad_norm": 0.7384721636772156, "learning_rate": 6.64062652372587e-05, "loss": 0.9328, "step": 123260 }, { "epoch": 0.7875368948289742, "grad_norm": 1.083748698234558, "learning_rate": 6.640152527775821e-05, "loss": 0.9526, "step": 123270 }, { "epoch": 0.7876007819787127, "grad_norm": 1.286015510559082, "learning_rate": 6.63967851530802e-05, "loss": 0.9245, "step": 123280 }, { "epoch": 0.7876646691284515, "grad_norm": 0.5310730338096619, "learning_rate": 6.639204486327236e-05, "loss": 0.8381, "step": 123290 }, { "epoch": 0.7877285562781902, "grad_norm": 1.1618332862854004, "learning_rate": 6.638730440838244e-05, "loss": 1.1095, "step": 123300 }, { "epoch": 0.7877924434279289, "grad_norm": 0.8446438908576965, "learning_rate": 6.63825637884582e-05, "loss": 0.9877, "step": 123310 }, { "epoch": 0.7878563305776676, "grad_norm": 0.6772144436836243, "learning_rate": 6.637782300354737e-05, "loss": 0.6984, "step": 123320 }, { "epoch": 0.7879202177274063, "grad_norm": 0.6928181052207947, "learning_rate": 6.63730820536977e-05, "loss": 1.007, "step": 123330 }, { "epoch": 0.787984104877145, "grad_norm": 1.1651209592819214, "learning_rate": 6.63683409389569e-05, "loss": 0.889, "step": 123340 }, { "epoch": 0.7880479920268837, "grad_norm": 1.0764139890670776, "learning_rate": 6.636359965937278e-05, "loss": 0.8781, "step": 123350 }, { "epoch": 0.7881118791766224, "grad_norm": 0.5415597558021545, "learning_rate": 6.635885821499304e-05, "loss": 0.9395, "step": 123360 }, { "epoch": 0.7881757663263611, "grad_norm": 1.080687165260315, "learning_rate": 6.635411660586543e-05, "loss": 0.9444, "step": 123370 }, { "epoch": 0.7882396534760998, "grad_norm": 0.8479616045951843, "learning_rate": 6.634937483203773e-05, "loss": 0.914, "step": 123380 }, { "epoch": 0.7883035406258385, "grad_norm": 0.7916926145553589, "learning_rate": 6.634463289355768e-05, "loss": 1.037, "step": 123390 }, { "epoch": 0.7883674277755772, "grad_norm": 0.9710598587989807, "learning_rate": 6.633989079047306e-05, "loss": 0.8785, "step": 123400 }, { "epoch": 0.7884313149253159, "grad_norm": 1.142552137374878, "learning_rate": 6.633514852283159e-05, "loss": 0.8654, "step": 123410 }, { "epoch": 0.7884952020750546, "grad_norm": 0.7896512746810913, "learning_rate": 6.633040609068103e-05, "loss": 0.755, "step": 123420 }, { "epoch": 0.7885590892247933, "grad_norm": 1.4209386110305786, "learning_rate": 6.632566349406916e-05, "loss": 0.797, "step": 123430 }, { "epoch": 0.788622976374532, "grad_norm": 0.843025267124176, "learning_rate": 6.632092073304374e-05, "loss": 1.0926, "step": 123440 }, { "epoch": 0.7886868635242708, "grad_norm": 0.7612596750259399, "learning_rate": 6.631617780765252e-05, "loss": 0.979, "step": 123450 }, { "epoch": 0.7887507506740095, "grad_norm": 1.0904258489608765, "learning_rate": 6.631143471794328e-05, "loss": 0.9336, "step": 123460 }, { "epoch": 0.7888146378237482, "grad_norm": 1.2260910272598267, "learning_rate": 6.630669146396376e-05, "loss": 1.0369, "step": 123470 }, { "epoch": 0.7888785249734869, "grad_norm": 1.343691349029541, "learning_rate": 6.630194804576177e-05, "loss": 0.7612, "step": 123480 }, { "epoch": 0.7889424121232256, "grad_norm": 0.8414347171783447, "learning_rate": 6.629720446338506e-05, "loss": 0.788, "step": 123490 }, { "epoch": 0.7890062992729643, "grad_norm": 0.814517080783844, "learning_rate": 6.62924607168814e-05, "loss": 0.931, "step": 123500 }, { "epoch": 0.789070186422703, "grad_norm": 1.7236671447753906, "learning_rate": 6.628771680629856e-05, "loss": 0.8326, "step": 123510 }, { "epoch": 0.7891340735724416, "grad_norm": 0.738429605960846, "learning_rate": 6.628297273168433e-05, "loss": 0.8309, "step": 123520 }, { "epoch": 0.7891979607221803, "grad_norm": 0.8377928137779236, "learning_rate": 6.627822849308648e-05, "loss": 0.8578, "step": 123530 }, { "epoch": 0.789261847871919, "grad_norm": 3.6376450061798096, "learning_rate": 6.627348409055278e-05, "loss": 0.8637, "step": 123540 }, { "epoch": 0.7893257350216577, "grad_norm": 1.038316011428833, "learning_rate": 6.626873952413102e-05, "loss": 1.0122, "step": 123550 }, { "epoch": 0.7893896221713964, "grad_norm": 0.8108795881271362, "learning_rate": 6.626399479386898e-05, "loss": 1.0062, "step": 123560 }, { "epoch": 0.7894535093211351, "grad_norm": 0.9439957737922668, "learning_rate": 6.625924989981444e-05, "loss": 0.8456, "step": 123570 }, { "epoch": 0.7895173964708738, "grad_norm": 0.6934586763381958, "learning_rate": 6.625450484201519e-05, "loss": 0.7537, "step": 123580 }, { "epoch": 0.7895812836206125, "grad_norm": 0.6399053335189819, "learning_rate": 6.6249759620519e-05, "loss": 0.6623, "step": 123590 }, { "epoch": 0.7896451707703512, "grad_norm": 0.7249269485473633, "learning_rate": 6.624501423537368e-05, "loss": 0.8404, "step": 123600 }, { "epoch": 0.7897090579200899, "grad_norm": 0.655685305595398, "learning_rate": 6.624026868662701e-05, "loss": 0.8336, "step": 123610 }, { "epoch": 0.7897729450698286, "grad_norm": 0.9134299159049988, "learning_rate": 6.623552297432679e-05, "loss": 0.6717, "step": 123620 }, { "epoch": 0.7898368322195674, "grad_norm": 0.7788447737693787, "learning_rate": 6.623077709852081e-05, "loss": 0.8576, "step": 123630 }, { "epoch": 0.7899007193693061, "grad_norm": 0.6765179634094238, "learning_rate": 6.622603105925686e-05, "loss": 1.0578, "step": 123640 }, { "epoch": 0.7899646065190448, "grad_norm": 0.7470600605010986, "learning_rate": 6.622128485658273e-05, "loss": 0.7213, "step": 123650 }, { "epoch": 0.7900284936687835, "grad_norm": 0.9108386635780334, "learning_rate": 6.621653849054623e-05, "loss": 1.0726, "step": 123660 }, { "epoch": 0.7900923808185222, "grad_norm": 1.1410408020019531, "learning_rate": 6.621179196119518e-05, "loss": 0.8654, "step": 123670 }, { "epoch": 0.7901562679682609, "grad_norm": 1.901923656463623, "learning_rate": 6.620704526857734e-05, "loss": 1.0014, "step": 123680 }, { "epoch": 0.7902201551179996, "grad_norm": 1.0020592212677002, "learning_rate": 6.620229841274054e-05, "loss": 0.7709, "step": 123690 }, { "epoch": 0.7902840422677383, "grad_norm": 1.4192286729812622, "learning_rate": 6.619755139373257e-05, "loss": 0.8418, "step": 123700 }, { "epoch": 0.790347929417477, "grad_norm": 0.7686444520950317, "learning_rate": 6.619280421160125e-05, "loss": 0.8845, "step": 123710 }, { "epoch": 0.7904118165672157, "grad_norm": 1.1990658044815063, "learning_rate": 6.618805686639439e-05, "loss": 0.9405, "step": 123720 }, { "epoch": 0.7904757037169544, "grad_norm": 1.1820399761199951, "learning_rate": 6.618330935815979e-05, "loss": 0.9327, "step": 123730 }, { "epoch": 0.7905395908666931, "grad_norm": 1.1175780296325684, "learning_rate": 6.617856168694526e-05, "loss": 0.756, "step": 123740 }, { "epoch": 0.7906034780164318, "grad_norm": 1.217108130455017, "learning_rate": 6.617381385279862e-05, "loss": 0.9991, "step": 123750 }, { "epoch": 0.7906673651661704, "grad_norm": 0.42774638533592224, "learning_rate": 6.616906585576768e-05, "loss": 0.7166, "step": 123760 }, { "epoch": 0.7907312523159091, "grad_norm": 0.831774890422821, "learning_rate": 6.616431769590027e-05, "loss": 0.9865, "step": 123770 }, { "epoch": 0.7907951394656478, "grad_norm": 0.9391918778419495, "learning_rate": 6.615956937324418e-05, "loss": 0.8336, "step": 123780 }, { "epoch": 0.7908590266153865, "grad_norm": 1.3029203414916992, "learning_rate": 6.615482088784726e-05, "loss": 1.0872, "step": 123790 }, { "epoch": 0.7909229137651252, "grad_norm": 0.6850435733795166, "learning_rate": 6.615007223975732e-05, "loss": 0.6906, "step": 123800 }, { "epoch": 0.790986800914864, "grad_norm": 1.2866231203079224, "learning_rate": 6.614532342902216e-05, "loss": 0.9472, "step": 123810 }, { "epoch": 0.7910506880646027, "grad_norm": 2.2075138092041016, "learning_rate": 6.614057445568961e-05, "loss": 0.9311, "step": 123820 }, { "epoch": 0.7911145752143414, "grad_norm": 0.7719504833221436, "learning_rate": 6.613582531980755e-05, "loss": 0.8272, "step": 123830 }, { "epoch": 0.7911784623640801, "grad_norm": 0.922818660736084, "learning_rate": 6.613107602142376e-05, "loss": 0.9421, "step": 123840 }, { "epoch": 0.7912423495138188, "grad_norm": 0.7339285612106323, "learning_rate": 6.612632656058608e-05, "loss": 0.8638, "step": 123850 }, { "epoch": 0.7913062366635575, "grad_norm": 0.9554028511047363, "learning_rate": 6.612157693734233e-05, "loss": 0.7373, "step": 123860 }, { "epoch": 0.7913701238132962, "grad_norm": 1.1785390377044678, "learning_rate": 6.611682715174036e-05, "loss": 0.6176, "step": 123870 }, { "epoch": 0.7914340109630349, "grad_norm": 0.7205845713615417, "learning_rate": 6.6112077203828e-05, "loss": 0.844, "step": 123880 }, { "epoch": 0.7914978981127736, "grad_norm": 0.773068368434906, "learning_rate": 6.610732709365308e-05, "loss": 0.7221, "step": 123890 }, { "epoch": 0.7915617852625123, "grad_norm": 0.8639894127845764, "learning_rate": 6.610257682126344e-05, "loss": 0.8949, "step": 123900 }, { "epoch": 0.791625672412251, "grad_norm": 0.8907663822174072, "learning_rate": 6.609782638670692e-05, "loss": 1.0061, "step": 123910 }, { "epoch": 0.7916895595619897, "grad_norm": 0.8259413242340088, "learning_rate": 6.609307579003136e-05, "loss": 1.1113, "step": 123920 }, { "epoch": 0.7917534467117284, "grad_norm": 0.9798697233200073, "learning_rate": 6.608832503128461e-05, "loss": 0.607, "step": 123930 }, { "epoch": 0.7918173338614671, "grad_norm": 0.7070125937461853, "learning_rate": 6.608357411051451e-05, "loss": 0.7719, "step": 123940 }, { "epoch": 0.7918812210112058, "grad_norm": 1.0658955574035645, "learning_rate": 6.607882302776892e-05, "loss": 0.9488, "step": 123950 }, { "epoch": 0.7919451081609445, "grad_norm": 0.6668451428413391, "learning_rate": 6.607407178309564e-05, "loss": 0.8527, "step": 123960 }, { "epoch": 0.7920089953106833, "grad_norm": 0.6440159678459167, "learning_rate": 6.606932037654256e-05, "loss": 0.9539, "step": 123970 }, { "epoch": 0.792072882460422, "grad_norm": 0.7459390759468079, "learning_rate": 6.606456880815754e-05, "loss": 0.8549, "step": 123980 }, { "epoch": 0.7921367696101607, "grad_norm": 0.9598776698112488, "learning_rate": 6.60598170779884e-05, "loss": 1.0577, "step": 123990 }, { "epoch": 0.7922006567598994, "grad_norm": 1.091723084449768, "learning_rate": 6.6055065186083e-05, "loss": 0.8536, "step": 124000 }, { "epoch": 0.792264543909638, "grad_norm": 1.7928060293197632, "learning_rate": 6.605031313248922e-05, "loss": 0.8274, "step": 124010 }, { "epoch": 0.7923284310593767, "grad_norm": 1.0446691513061523, "learning_rate": 6.604556091725489e-05, "loss": 1.192, "step": 124020 }, { "epoch": 0.7923923182091154, "grad_norm": 0.7123937010765076, "learning_rate": 6.604080854042789e-05, "loss": 0.9224, "step": 124030 }, { "epoch": 0.7924562053588541, "grad_norm": 0.9317911863327026, "learning_rate": 6.603605600205606e-05, "loss": 0.8837, "step": 124040 }, { "epoch": 0.7925200925085928, "grad_norm": 0.8845491409301758, "learning_rate": 6.603130330218727e-05, "loss": 0.9611, "step": 124050 }, { "epoch": 0.7925839796583315, "grad_norm": 1.66157865524292, "learning_rate": 6.60265504408694e-05, "loss": 0.6554, "step": 124060 }, { "epoch": 0.7926478668080702, "grad_norm": 0.9395459294319153, "learning_rate": 6.60217974181503e-05, "loss": 0.8756, "step": 124070 }, { "epoch": 0.7927117539578089, "grad_norm": 1.385488748550415, "learning_rate": 6.601704423407784e-05, "loss": 0.9138, "step": 124080 }, { "epoch": 0.7927756411075476, "grad_norm": 0.8289713263511658, "learning_rate": 6.601229088869988e-05, "loss": 0.8807, "step": 124090 }, { "epoch": 0.7928395282572863, "grad_norm": 0.7335384488105774, "learning_rate": 6.60075373820643e-05, "loss": 0.8432, "step": 124100 }, { "epoch": 0.792903415407025, "grad_norm": 0.8279372453689575, "learning_rate": 6.600278371421898e-05, "loss": 0.9776, "step": 124110 }, { "epoch": 0.7929673025567637, "grad_norm": 0.7263229489326477, "learning_rate": 6.599802988521178e-05, "loss": 0.7568, "step": 124120 }, { "epoch": 0.7930311897065024, "grad_norm": 0.8773966431617737, "learning_rate": 6.599327589509056e-05, "loss": 0.7536, "step": 124130 }, { "epoch": 0.7930950768562411, "grad_norm": 0.7500774264335632, "learning_rate": 6.598852174390324e-05, "loss": 0.8422, "step": 124140 }, { "epoch": 0.7931589640059798, "grad_norm": 0.6933243274688721, "learning_rate": 6.598376743169767e-05, "loss": 0.8359, "step": 124150 }, { "epoch": 0.7932228511557186, "grad_norm": 0.848579466342926, "learning_rate": 6.597901295852172e-05, "loss": 0.8249, "step": 124160 }, { "epoch": 0.7932867383054573, "grad_norm": 1.0843689441680908, "learning_rate": 6.59742583244233e-05, "loss": 0.8095, "step": 124170 }, { "epoch": 0.793350625455196, "grad_norm": 0.6765615940093994, "learning_rate": 6.596950352945026e-05, "loss": 0.8884, "step": 124180 }, { "epoch": 0.7934145126049347, "grad_norm": 0.7143245935440063, "learning_rate": 6.596474857365052e-05, "loss": 0.8451, "step": 124190 }, { "epoch": 0.7934783997546734, "grad_norm": 0.9047801494598389, "learning_rate": 6.595999345707195e-05, "loss": 1.2198, "step": 124200 }, { "epoch": 0.7935422869044121, "grad_norm": 0.6656885147094727, "learning_rate": 6.595523817976243e-05, "loss": 0.8917, "step": 124210 }, { "epoch": 0.7936061740541508, "grad_norm": 0.7534079551696777, "learning_rate": 6.595048274176986e-05, "loss": 1.0604, "step": 124220 }, { "epoch": 0.7936700612038895, "grad_norm": 1.1482276916503906, "learning_rate": 6.594572714314213e-05, "loss": 0.8904, "step": 124230 }, { "epoch": 0.7937339483536282, "grad_norm": 0.8140796422958374, "learning_rate": 6.594097138392715e-05, "loss": 0.7813, "step": 124240 }, { "epoch": 0.7937978355033668, "grad_norm": 1.066030502319336, "learning_rate": 6.593621546417279e-05, "loss": 0.6392, "step": 124250 }, { "epoch": 0.7938617226531055, "grad_norm": 0.5749644041061401, "learning_rate": 6.593145938392694e-05, "loss": 1.107, "step": 124260 }, { "epoch": 0.7939256098028442, "grad_norm": 0.8076421022415161, "learning_rate": 6.592670314323753e-05, "loss": 0.7277, "step": 124270 }, { "epoch": 0.7939894969525829, "grad_norm": 0.9243035316467285, "learning_rate": 6.592194674215242e-05, "loss": 0.8893, "step": 124280 }, { "epoch": 0.7940533841023216, "grad_norm": 0.696916401386261, "learning_rate": 6.591719018071955e-05, "loss": 0.848, "step": 124290 }, { "epoch": 0.7941172712520603, "grad_norm": 1.0850187540054321, "learning_rate": 6.591243345898679e-05, "loss": 0.9137, "step": 124300 }, { "epoch": 0.794181158401799, "grad_norm": 0.7324548363685608, "learning_rate": 6.590767657700207e-05, "loss": 0.9306, "step": 124310 }, { "epoch": 0.7942450455515377, "grad_norm": 1.1049119234085083, "learning_rate": 6.590291953481326e-05, "loss": 0.9547, "step": 124320 }, { "epoch": 0.7943089327012764, "grad_norm": 0.8178719878196716, "learning_rate": 6.589816233246832e-05, "loss": 0.9391, "step": 124330 }, { "epoch": 0.7943728198510152, "grad_norm": 0.793376088142395, "learning_rate": 6.589340497001511e-05, "loss": 0.9583, "step": 124340 }, { "epoch": 0.7944367070007539, "grad_norm": 0.9736217856407166, "learning_rate": 6.588864744750158e-05, "loss": 0.624, "step": 124350 }, { "epoch": 0.7945005941504926, "grad_norm": 1.2875404357910156, "learning_rate": 6.588388976497563e-05, "loss": 0.866, "step": 124360 }, { "epoch": 0.7945644813002313, "grad_norm": 1.0183568000793457, "learning_rate": 6.587913192248515e-05, "loss": 0.8561, "step": 124370 }, { "epoch": 0.79462836844997, "grad_norm": 0.6129552125930786, "learning_rate": 6.587437392007809e-05, "loss": 0.8037, "step": 124380 }, { "epoch": 0.7946922555997087, "grad_norm": 1.3483234643936157, "learning_rate": 6.586961575780233e-05, "loss": 0.7024, "step": 124390 }, { "epoch": 0.7947561427494474, "grad_norm": 0.7584971189498901, "learning_rate": 6.586485743570583e-05, "loss": 0.9416, "step": 124400 }, { "epoch": 0.7948200298991861, "grad_norm": 0.6718287467956543, "learning_rate": 6.58600989538365e-05, "loss": 0.9615, "step": 124410 }, { "epoch": 0.7948839170489248, "grad_norm": 1.4930849075317383, "learning_rate": 6.585534031224223e-05, "loss": 1.3212, "step": 124420 }, { "epoch": 0.7949478041986635, "grad_norm": 0.9690805673599243, "learning_rate": 6.585058151097097e-05, "loss": 0.9029, "step": 124430 }, { "epoch": 0.7950116913484022, "grad_norm": 0.528648316860199, "learning_rate": 6.584582255007065e-05, "loss": 0.6965, "step": 124440 }, { "epoch": 0.7950755784981409, "grad_norm": 0.8390137553215027, "learning_rate": 6.584106342958917e-05, "loss": 0.6836, "step": 124450 }, { "epoch": 0.7951394656478796, "grad_norm": 0.4811376929283142, "learning_rate": 6.583630414957449e-05, "loss": 1.126, "step": 124460 }, { "epoch": 0.7952033527976183, "grad_norm": 0.8852686882019043, "learning_rate": 6.583154471007453e-05, "loss": 0.8745, "step": 124470 }, { "epoch": 0.795267239947357, "grad_norm": 1.2516735792160034, "learning_rate": 6.582678511113722e-05, "loss": 1.1301, "step": 124480 }, { "epoch": 0.7953311270970956, "grad_norm": 0.831717848777771, "learning_rate": 6.58220253528105e-05, "loss": 0.8727, "step": 124490 }, { "epoch": 0.7953950142468343, "grad_norm": 0.7440255284309387, "learning_rate": 6.581726543514227e-05, "loss": 0.8061, "step": 124500 }, { "epoch": 0.795458901396573, "grad_norm": 1.1145399808883667, "learning_rate": 6.581250535818051e-05, "loss": 1.0605, "step": 124510 }, { "epoch": 0.7955227885463118, "grad_norm": 0.678453266620636, "learning_rate": 6.580774512197314e-05, "loss": 0.9662, "step": 124520 }, { "epoch": 0.7955866756960505, "grad_norm": 1.0213743448257446, "learning_rate": 6.58029847265681e-05, "loss": 0.915, "step": 124530 }, { "epoch": 0.7956505628457892, "grad_norm": 0.8792978525161743, "learning_rate": 6.579822417201333e-05, "loss": 0.8723, "step": 124540 }, { "epoch": 0.7957144499955279, "grad_norm": 0.9746803045272827, "learning_rate": 6.579346345835677e-05, "loss": 0.8153, "step": 124550 }, { "epoch": 0.7957783371452666, "grad_norm": 0.7419812083244324, "learning_rate": 6.578870258564637e-05, "loss": 0.7329, "step": 124560 }, { "epoch": 0.7958422242950053, "grad_norm": 0.9181807041168213, "learning_rate": 6.57839415539301e-05, "loss": 0.8517, "step": 124570 }, { "epoch": 0.795906111444744, "grad_norm": 0.7871003746986389, "learning_rate": 6.577918036325586e-05, "loss": 0.7752, "step": 124580 }, { "epoch": 0.7959699985944827, "grad_norm": 0.8576268553733826, "learning_rate": 6.577441901367163e-05, "loss": 0.94, "step": 124590 }, { "epoch": 0.7960338857442214, "grad_norm": 1.1811336278915405, "learning_rate": 6.576965750522534e-05, "loss": 0.9644, "step": 124600 }, { "epoch": 0.7960977728939601, "grad_norm": 1.184383511543274, "learning_rate": 6.576489583796498e-05, "loss": 1.1323, "step": 124610 }, { "epoch": 0.7961616600436988, "grad_norm": 0.9622499346733093, "learning_rate": 6.576013401193846e-05, "loss": 0.8139, "step": 124620 }, { "epoch": 0.7962255471934375, "grad_norm": 1.1531530618667603, "learning_rate": 6.575537202719377e-05, "loss": 0.7081, "step": 124630 }, { "epoch": 0.7962894343431762, "grad_norm": 1.1562443971633911, "learning_rate": 6.575060988377885e-05, "loss": 0.9157, "step": 124640 }, { "epoch": 0.7963533214929149, "grad_norm": 0.8580940365791321, "learning_rate": 6.574584758174166e-05, "loss": 1.0154, "step": 124650 }, { "epoch": 0.7964172086426536, "grad_norm": 0.7232387065887451, "learning_rate": 6.574108512113016e-05, "loss": 0.8085, "step": 124660 }, { "epoch": 0.7964810957923923, "grad_norm": 0.9032987952232361, "learning_rate": 6.573632250199234e-05, "loss": 1.0046, "step": 124670 }, { "epoch": 0.796544982942131, "grad_norm": 0.9355868697166443, "learning_rate": 6.57315597243761e-05, "loss": 0.7971, "step": 124680 }, { "epoch": 0.7966088700918698, "grad_norm": 0.8538186550140381, "learning_rate": 6.572679678832946e-05, "loss": 0.9047, "step": 124690 }, { "epoch": 0.7966727572416085, "grad_norm": 0.8761003017425537, "learning_rate": 6.572203369390038e-05, "loss": 0.7487, "step": 124700 }, { "epoch": 0.7967366443913472, "grad_norm": 0.643221914768219, "learning_rate": 6.571727044113679e-05, "loss": 0.8214, "step": 124710 }, { "epoch": 0.7968005315410859, "grad_norm": 0.8245800137519836, "learning_rate": 6.571250703008671e-05, "loss": 0.996, "step": 124720 }, { "epoch": 0.7968644186908246, "grad_norm": 0.692182719707489, "learning_rate": 6.57077434607981e-05, "loss": 0.8932, "step": 124730 }, { "epoch": 0.7969283058405632, "grad_norm": 0.6998267769813538, "learning_rate": 6.570297973331892e-05, "loss": 0.8643, "step": 124740 }, { "epoch": 0.7969921929903019, "grad_norm": 0.6680889129638672, "learning_rate": 6.569821584769714e-05, "loss": 0.8156, "step": 124750 }, { "epoch": 0.7970560801400406, "grad_norm": 0.7822675704956055, "learning_rate": 6.569345180398075e-05, "loss": 0.8655, "step": 124760 }, { "epoch": 0.7971199672897793, "grad_norm": 0.9974295496940613, "learning_rate": 6.568868760221773e-05, "loss": 0.6725, "step": 124770 }, { "epoch": 0.797183854439518, "grad_norm": 0.6198078989982605, "learning_rate": 6.568392324245605e-05, "loss": 0.6848, "step": 124780 }, { "epoch": 0.7972477415892567, "grad_norm": 1.088592767715454, "learning_rate": 6.567915872474368e-05, "loss": 1.1632, "step": 124790 }, { "epoch": 0.7973116287389954, "grad_norm": 0.636913537979126, "learning_rate": 6.567439404912864e-05, "loss": 0.8826, "step": 124800 }, { "epoch": 0.7973755158887341, "grad_norm": 0.7936016321182251, "learning_rate": 6.566962921565886e-05, "loss": 0.737, "step": 124810 }, { "epoch": 0.7974394030384728, "grad_norm": 0.8261633515357971, "learning_rate": 6.566486422438238e-05, "loss": 0.8341, "step": 124820 }, { "epoch": 0.7975032901882115, "grad_norm": 0.8050313591957092, "learning_rate": 6.566009907534717e-05, "loss": 0.7059, "step": 124830 }, { "epoch": 0.7975671773379502, "grad_norm": 1.183261513710022, "learning_rate": 6.565533376860121e-05, "loss": 0.9832, "step": 124840 }, { "epoch": 0.797631064487689, "grad_norm": 0.7606709003448486, "learning_rate": 6.565056830419249e-05, "loss": 0.9096, "step": 124850 }, { "epoch": 0.7976949516374277, "grad_norm": 1.1363462209701538, "learning_rate": 6.564580268216901e-05, "loss": 0.6826, "step": 124860 }, { "epoch": 0.7977588387871664, "grad_norm": 1.563015103340149, "learning_rate": 6.564103690257875e-05, "loss": 1.0033, "step": 124870 }, { "epoch": 0.7978227259369051, "grad_norm": 1.6602332592010498, "learning_rate": 6.563627096546973e-05, "loss": 0.7878, "step": 124880 }, { "epoch": 0.7978866130866438, "grad_norm": 1.8670592308044434, "learning_rate": 6.563150487088994e-05, "loss": 1.1101, "step": 124890 }, { "epoch": 0.7979505002363825, "grad_norm": 0.8670040369033813, "learning_rate": 6.562673861888735e-05, "loss": 1.1995, "step": 124900 }, { "epoch": 0.7980143873861212, "grad_norm": 0.7766642570495605, "learning_rate": 6.562197220951e-05, "loss": 0.8249, "step": 124910 }, { "epoch": 0.7980782745358599, "grad_norm": 1.183617115020752, "learning_rate": 6.561720564280588e-05, "loss": 0.8269, "step": 124920 }, { "epoch": 0.7981421616855986, "grad_norm": 0.9451962113380432, "learning_rate": 6.561243891882298e-05, "loss": 0.8455, "step": 124930 }, { "epoch": 0.7982060488353373, "grad_norm": 1.272316813468933, "learning_rate": 6.560767203760932e-05, "loss": 0.9672, "step": 124940 }, { "epoch": 0.798269935985076, "grad_norm": 0.775259256362915, "learning_rate": 6.560290499921288e-05, "loss": 0.7095, "step": 124950 }, { "epoch": 0.7983338231348147, "grad_norm": 0.8193401098251343, "learning_rate": 6.559813780368172e-05, "loss": 0.8389, "step": 124960 }, { "epoch": 0.7983977102845534, "grad_norm": 0.6283045411109924, "learning_rate": 6.55933704510638e-05, "loss": 0.969, "step": 124970 }, { "epoch": 0.798461597434292, "grad_norm": 0.7653422951698303, "learning_rate": 6.558860294140715e-05, "loss": 0.9878, "step": 124980 }, { "epoch": 0.7985254845840307, "grad_norm": 0.6775907874107361, "learning_rate": 6.558383527475978e-05, "loss": 0.8479, "step": 124990 }, { "epoch": 0.7985893717337694, "grad_norm": 1.2088565826416016, "learning_rate": 6.557906745116972e-05, "loss": 1.0976, "step": 125000 }, { "epoch": 0.7986532588835081, "grad_norm": 1.290498971939087, "learning_rate": 6.557429947068496e-05, "loss": 1.044, "step": 125010 }, { "epoch": 0.7987171460332468, "grad_norm": 1.1517268419265747, "learning_rate": 6.556953133335353e-05, "loss": 1.1142, "step": 125020 }, { "epoch": 0.7987810331829855, "grad_norm": 0.6305286884307861, "learning_rate": 6.556476303922344e-05, "loss": 0.9341, "step": 125030 }, { "epoch": 0.7988449203327243, "grad_norm": 1.3918240070343018, "learning_rate": 6.555999458834273e-05, "loss": 0.7964, "step": 125040 }, { "epoch": 0.798908807482463, "grad_norm": 1.885688304901123, "learning_rate": 6.555522598075943e-05, "loss": 0.9877, "step": 125050 }, { "epoch": 0.7989726946322017, "grad_norm": 0.8548856973648071, "learning_rate": 6.555045721652153e-05, "loss": 0.8515, "step": 125060 }, { "epoch": 0.7990365817819404, "grad_norm": 1.1888582706451416, "learning_rate": 6.554568829567708e-05, "loss": 0.8533, "step": 125070 }, { "epoch": 0.7991004689316791, "grad_norm": 0.8727964162826538, "learning_rate": 6.554091921827409e-05, "loss": 0.6094, "step": 125080 }, { "epoch": 0.7991643560814178, "grad_norm": 3.3259003162384033, "learning_rate": 6.55361499843606e-05, "loss": 1.0581, "step": 125090 }, { "epoch": 0.7992282432311565, "grad_norm": 0.6152466535568237, "learning_rate": 6.553138059398465e-05, "loss": 0.6899, "step": 125100 }, { "epoch": 0.7992921303808952, "grad_norm": 1.0475343465805054, "learning_rate": 6.552661104719426e-05, "loss": 0.9867, "step": 125110 }, { "epoch": 0.7993560175306339, "grad_norm": 0.5357400178909302, "learning_rate": 6.552184134403745e-05, "loss": 0.8811, "step": 125120 }, { "epoch": 0.7994199046803726, "grad_norm": 0.9087369441986084, "learning_rate": 6.551707148456229e-05, "loss": 0.8129, "step": 125130 }, { "epoch": 0.7994837918301113, "grad_norm": 0.9281877279281616, "learning_rate": 6.551230146881678e-05, "loss": 0.8924, "step": 125140 }, { "epoch": 0.79954767897985, "grad_norm": 1.1757920980453491, "learning_rate": 6.550753129684897e-05, "loss": 1.0507, "step": 125150 }, { "epoch": 0.7996115661295887, "grad_norm": 1.0355859994888306, "learning_rate": 6.550276096870692e-05, "loss": 0.7105, "step": 125160 }, { "epoch": 0.7996754532793274, "grad_norm": 1.1129631996154785, "learning_rate": 6.549799048443865e-05, "loss": 0.9878, "step": 125170 }, { "epoch": 0.7997393404290661, "grad_norm": 0.9657943248748779, "learning_rate": 6.549321984409221e-05, "loss": 0.8496, "step": 125180 }, { "epoch": 0.7998032275788048, "grad_norm": 0.8863055109977722, "learning_rate": 6.548844904771564e-05, "loss": 0.8804, "step": 125190 }, { "epoch": 0.7998671147285435, "grad_norm": 0.6890332698822021, "learning_rate": 6.548367809535699e-05, "loss": 0.766, "step": 125200 }, { "epoch": 0.7999310018782823, "grad_norm": 1.1075465679168701, "learning_rate": 6.54789069870643e-05, "loss": 0.8441, "step": 125210 }, { "epoch": 0.7999948890280209, "grad_norm": 0.981423020362854, "learning_rate": 6.547413572288564e-05, "loss": 0.7874, "step": 125220 }, { "epoch": 0.8000587761777596, "grad_norm": 0.9776214957237244, "learning_rate": 6.546936430286903e-05, "loss": 1.1112, "step": 125230 }, { "epoch": 0.8001226633274983, "grad_norm": 0.6384032964706421, "learning_rate": 6.546459272706254e-05, "loss": 0.781, "step": 125240 }, { "epoch": 0.800186550477237, "grad_norm": 1.0786020755767822, "learning_rate": 6.545982099551422e-05, "loss": 0.9097, "step": 125250 }, { "epoch": 0.8002504376269757, "grad_norm": 1.0218867063522339, "learning_rate": 6.545504910827214e-05, "loss": 0.8466, "step": 125260 }, { "epoch": 0.8003143247767144, "grad_norm": 0.5271647572517395, "learning_rate": 6.545027706538434e-05, "loss": 0.8278, "step": 125270 }, { "epoch": 0.8003782119264531, "grad_norm": 0.5940924286842346, "learning_rate": 6.544550486689889e-05, "loss": 0.7146, "step": 125280 }, { "epoch": 0.8004420990761918, "grad_norm": 1.223508596420288, "learning_rate": 6.544073251286383e-05, "loss": 0.9559, "step": 125290 }, { "epoch": 0.8005059862259305, "grad_norm": 0.8251738548278809, "learning_rate": 6.543596000332724e-05, "loss": 0.8241, "step": 125300 }, { "epoch": 0.8005698733756692, "grad_norm": 1.2967746257781982, "learning_rate": 6.543118733833719e-05, "loss": 0.7866, "step": 125310 }, { "epoch": 0.8006337605254079, "grad_norm": 1.4892044067382812, "learning_rate": 6.542641451794172e-05, "loss": 0.785, "step": 125320 }, { "epoch": 0.8006976476751466, "grad_norm": 0.7800642848014832, "learning_rate": 6.54216415421889e-05, "loss": 1.0806, "step": 125330 }, { "epoch": 0.8007615348248853, "grad_norm": 1.0607541799545288, "learning_rate": 6.541686841112685e-05, "loss": 0.8438, "step": 125340 }, { "epoch": 0.800825421974624, "grad_norm": 1.6348508596420288, "learning_rate": 6.541209512480355e-05, "loss": 0.9009, "step": 125350 }, { "epoch": 0.8008893091243627, "grad_norm": 0.7919349670410156, "learning_rate": 6.540732168326715e-05, "loss": 0.9583, "step": 125360 }, { "epoch": 0.8009531962741014, "grad_norm": 0.8712650537490845, "learning_rate": 6.540254808656567e-05, "loss": 0.7806, "step": 125370 }, { "epoch": 0.8010170834238401, "grad_norm": 0.9894066452980042, "learning_rate": 6.539825171690796e-05, "loss": 0.9936, "step": 125380 }, { "epoch": 0.8010809705735789, "grad_norm": 1.3204667568206787, "learning_rate": 6.539347782552532e-05, "loss": 0.9534, "step": 125390 }, { "epoch": 0.8011448577233176, "grad_norm": 0.7939335107803345, "learning_rate": 6.538870377911706e-05, "loss": 0.9815, "step": 125400 }, { "epoch": 0.8012087448730563, "grad_norm": 0.7755239605903625, "learning_rate": 6.538392957773122e-05, "loss": 0.8787, "step": 125410 }, { "epoch": 0.801272632022795, "grad_norm": 1.006554126739502, "learning_rate": 6.53791552214159e-05, "loss": 0.9379, "step": 125420 }, { "epoch": 0.8013365191725337, "grad_norm": 0.7018999457359314, "learning_rate": 6.53743807102192e-05, "loss": 0.8513, "step": 125430 }, { "epoch": 0.8014004063222724, "grad_norm": 0.9612287878990173, "learning_rate": 6.536960604418918e-05, "loss": 1.0788, "step": 125440 }, { "epoch": 0.8014642934720111, "grad_norm": 0.7687857151031494, "learning_rate": 6.536483122337391e-05, "loss": 0.9172, "step": 125450 }, { "epoch": 0.8015281806217497, "grad_norm": 1.8492335081100464, "learning_rate": 6.536005624782152e-05, "loss": 0.8897, "step": 125460 }, { "epoch": 0.8015920677714884, "grad_norm": 0.7783719301223755, "learning_rate": 6.535528111758006e-05, "loss": 1.1489, "step": 125470 }, { "epoch": 0.8016559549212271, "grad_norm": 1.056986927986145, "learning_rate": 6.535050583269764e-05, "loss": 0.7073, "step": 125480 }, { "epoch": 0.8017198420709658, "grad_norm": 0.8337403535842896, "learning_rate": 6.534573039322235e-05, "loss": 0.8441, "step": 125490 }, { "epoch": 0.8017837292207045, "grad_norm": 0.8885868191719055, "learning_rate": 6.534095479920227e-05, "loss": 0.7835, "step": 125500 }, { "epoch": 0.8018476163704432, "grad_norm": 1.2602735757827759, "learning_rate": 6.533617905068549e-05, "loss": 0.7678, "step": 125510 }, { "epoch": 0.8019115035201819, "grad_norm": 1.2273060083389282, "learning_rate": 6.533140314772015e-05, "loss": 1.3602, "step": 125520 }, { "epoch": 0.8019753906699206, "grad_norm": 0.9865765571594238, "learning_rate": 6.532662709035431e-05, "loss": 1.0048, "step": 125530 }, { "epoch": 0.8020392778196593, "grad_norm": 0.8420624136924744, "learning_rate": 6.532185087863607e-05, "loss": 0.7444, "step": 125540 }, { "epoch": 0.802103164969398, "grad_norm": 4.6297712326049805, "learning_rate": 6.531707451261354e-05, "loss": 0.921, "step": 125550 }, { "epoch": 0.8021670521191367, "grad_norm": 0.9108629822731018, "learning_rate": 6.531229799233482e-05, "loss": 1.1591, "step": 125560 }, { "epoch": 0.8022309392688755, "grad_norm": 1.6921429634094238, "learning_rate": 6.530752131784801e-05, "loss": 0.8015, "step": 125570 }, { "epoch": 0.8022948264186142, "grad_norm": 0.6055482029914856, "learning_rate": 6.530274448920122e-05, "loss": 0.8499, "step": 125580 }, { "epoch": 0.8023587135683529, "grad_norm": 1.0739818811416626, "learning_rate": 6.529796750644255e-05, "loss": 0.8019, "step": 125590 }, { "epoch": 0.8024226007180916, "grad_norm": 1.0607513189315796, "learning_rate": 6.52931903696201e-05, "loss": 0.9353, "step": 125600 }, { "epoch": 0.8024864878678303, "grad_norm": 0.9030999541282654, "learning_rate": 6.528841307878201e-05, "loss": 1.1511, "step": 125610 }, { "epoch": 0.802550375017569, "grad_norm": 1.075486183166504, "learning_rate": 6.528363563397638e-05, "loss": 0.8964, "step": 125620 }, { "epoch": 0.8026142621673077, "grad_norm": 1.8785415887832642, "learning_rate": 6.527885803525131e-05, "loss": 0.7627, "step": 125630 }, { "epoch": 0.8026781493170464, "grad_norm": 0.9460232853889465, "learning_rate": 6.527408028265491e-05, "loss": 0.8828, "step": 125640 }, { "epoch": 0.8027420364667851, "grad_norm": 0.7924548387527466, "learning_rate": 6.526930237623533e-05, "loss": 0.7436, "step": 125650 }, { "epoch": 0.8028059236165238, "grad_norm": 0.8257904052734375, "learning_rate": 6.526452431604065e-05, "loss": 1.0438, "step": 125660 }, { "epoch": 0.8028698107662625, "grad_norm": 1.1398284435272217, "learning_rate": 6.5259746102119e-05, "loss": 0.8756, "step": 125670 }, { "epoch": 0.8029336979160012, "grad_norm": 0.955585777759552, "learning_rate": 6.52549677345185e-05, "loss": 1.0752, "step": 125680 }, { "epoch": 0.8029975850657399, "grad_norm": 0.8351637721061707, "learning_rate": 6.525018921328729e-05, "loss": 0.9187, "step": 125690 }, { "epoch": 0.8030614722154786, "grad_norm": 0.9746791124343872, "learning_rate": 6.524541053847349e-05, "loss": 0.6942, "step": 125700 }, { "epoch": 0.8031253593652172, "grad_norm": 0.697482705116272, "learning_rate": 6.52406317101252e-05, "loss": 0.7528, "step": 125710 }, { "epoch": 0.8031892465149559, "grad_norm": 0.9149326682090759, "learning_rate": 6.523585272829056e-05, "loss": 0.85, "step": 125720 }, { "epoch": 0.8032531336646946, "grad_norm": 0.9170807003974915, "learning_rate": 6.52310735930177e-05, "loss": 1.1702, "step": 125730 }, { "epoch": 0.8033170208144333, "grad_norm": 0.8044551014900208, "learning_rate": 6.522629430435479e-05, "loss": 1.0825, "step": 125740 }, { "epoch": 0.803380907964172, "grad_norm": 1.228047490119934, "learning_rate": 6.522151486234989e-05, "loss": 0.7574, "step": 125750 }, { "epoch": 0.8034447951139108, "grad_norm": 0.9429476857185364, "learning_rate": 6.521673526705116e-05, "loss": 1.0447, "step": 125760 }, { "epoch": 0.8035086822636495, "grad_norm": 1.0148427486419678, "learning_rate": 6.521195551850676e-05, "loss": 1.0113, "step": 125770 }, { "epoch": 0.8035725694133882, "grad_norm": 0.9460819959640503, "learning_rate": 6.520717561676481e-05, "loss": 1.0225, "step": 125780 }, { "epoch": 0.8036364565631269, "grad_norm": 1.2216135263442993, "learning_rate": 6.520239556187345e-05, "loss": 0.999, "step": 125790 }, { "epoch": 0.8037003437128656, "grad_norm": 0.7542139887809753, "learning_rate": 6.519761535388079e-05, "loss": 1.1307, "step": 125800 }, { "epoch": 0.8037642308626043, "grad_norm": 0.6314334273338318, "learning_rate": 6.519283499283502e-05, "loss": 0.8114, "step": 125810 }, { "epoch": 0.803828118012343, "grad_norm": 1.1564096212387085, "learning_rate": 6.518805447878425e-05, "loss": 0.8931, "step": 125820 }, { "epoch": 0.8038920051620817, "grad_norm": 0.7837060689926147, "learning_rate": 6.518327381177663e-05, "loss": 0.7861, "step": 125830 }, { "epoch": 0.8039558923118204, "grad_norm": 0.8681246042251587, "learning_rate": 6.51784929918603e-05, "loss": 1.194, "step": 125840 }, { "epoch": 0.8040197794615591, "grad_norm": 1.4381413459777832, "learning_rate": 6.517371201908342e-05, "loss": 0.8307, "step": 125850 }, { "epoch": 0.8040836666112978, "grad_norm": 1.4342834949493408, "learning_rate": 6.516893089349414e-05, "loss": 0.7483, "step": 125860 }, { "epoch": 0.8041475537610365, "grad_norm": 0.9879970550537109, "learning_rate": 6.516414961514059e-05, "loss": 1.1164, "step": 125870 }, { "epoch": 0.8042114409107752, "grad_norm": 1.0189735889434814, "learning_rate": 6.515936818407095e-05, "loss": 0.9046, "step": 125880 }, { "epoch": 0.8042753280605139, "grad_norm": 1.1108025312423706, "learning_rate": 6.515458660033335e-05, "loss": 0.913, "step": 125890 }, { "epoch": 0.8043392152102526, "grad_norm": 0.862022876739502, "learning_rate": 6.514980486397595e-05, "loss": 0.913, "step": 125900 }, { "epoch": 0.8044031023599914, "grad_norm": 0.8951718807220459, "learning_rate": 6.51450229750469e-05, "loss": 0.8194, "step": 125910 }, { "epoch": 0.8044669895097301, "grad_norm": 0.9488630890846252, "learning_rate": 6.514024093359438e-05, "loss": 0.9198, "step": 125920 }, { "epoch": 0.8045308766594688, "grad_norm": 1.038546085357666, "learning_rate": 6.513545873966654e-05, "loss": 0.8252, "step": 125930 }, { "epoch": 0.8045947638092075, "grad_norm": 0.8957170844078064, "learning_rate": 6.513067639331151e-05, "loss": 0.9968, "step": 125940 }, { "epoch": 0.8046586509589461, "grad_norm": 0.7613710761070251, "learning_rate": 6.512589389457751e-05, "loss": 0.9103, "step": 125950 }, { "epoch": 0.8047225381086848, "grad_norm": 1.0033246278762817, "learning_rate": 6.512111124351265e-05, "loss": 0.9965, "step": 125960 }, { "epoch": 0.8047864252584235, "grad_norm": 0.5386576652526855, "learning_rate": 6.511632844016512e-05, "loss": 0.7118, "step": 125970 }, { "epoch": 0.8048503124081622, "grad_norm": 0.74485844373703, "learning_rate": 6.511154548458312e-05, "loss": 0.7851, "step": 125980 }, { "epoch": 0.8049141995579009, "grad_norm": 0.9282761812210083, "learning_rate": 6.510676237681475e-05, "loss": 0.9678, "step": 125990 }, { "epoch": 0.8049780867076396, "grad_norm": 2.0792996883392334, "learning_rate": 6.510197911690822e-05, "loss": 1.4649, "step": 126000 }, { "epoch": 0.8050419738573783, "grad_norm": 0.6187208294868469, "learning_rate": 6.50971957049117e-05, "loss": 0.9499, "step": 126010 }, { "epoch": 0.805105861007117, "grad_norm": 0.8118966221809387, "learning_rate": 6.509241214087334e-05, "loss": 0.7766, "step": 126020 }, { "epoch": 0.8051697481568557, "grad_norm": 0.8239946365356445, "learning_rate": 6.508762842484135e-05, "loss": 0.8789, "step": 126030 }, { "epoch": 0.8052336353065944, "grad_norm": 1.1240622997283936, "learning_rate": 6.508284455686388e-05, "loss": 0.6257, "step": 126040 }, { "epoch": 0.8052975224563331, "grad_norm": 1.1769168376922607, "learning_rate": 6.507806053698912e-05, "loss": 0.8184, "step": 126050 }, { "epoch": 0.8053614096060718, "grad_norm": 1.071930170059204, "learning_rate": 6.507327636526526e-05, "loss": 0.778, "step": 126060 }, { "epoch": 0.8054252967558105, "grad_norm": 0.9074715375900269, "learning_rate": 6.506849204174045e-05, "loss": 1.0664, "step": 126070 }, { "epoch": 0.8054891839055492, "grad_norm": 1.1464279890060425, "learning_rate": 6.50637075664629e-05, "loss": 0.9875, "step": 126080 }, { "epoch": 0.805553071055288, "grad_norm": 1.2106982469558716, "learning_rate": 6.505892293948077e-05, "loss": 0.9013, "step": 126090 }, { "epoch": 0.8056169582050267, "grad_norm": 0.763820230960846, "learning_rate": 6.505413816084227e-05, "loss": 0.8071, "step": 126100 }, { "epoch": 0.8056808453547654, "grad_norm": 1.4662244319915771, "learning_rate": 6.504935323059558e-05, "loss": 0.7963, "step": 126110 }, { "epoch": 0.8057447325045041, "grad_norm": 1.1400254964828491, "learning_rate": 6.504456814878888e-05, "loss": 0.8126, "step": 126120 }, { "epoch": 0.8058086196542428, "grad_norm": 0.844118595123291, "learning_rate": 6.503978291547035e-05, "loss": 0.8508, "step": 126130 }, { "epoch": 0.8058725068039815, "grad_norm": 0.9051877856254578, "learning_rate": 6.50349975306882e-05, "loss": 0.788, "step": 126140 }, { "epoch": 0.8059363939537202, "grad_norm": 0.8042912483215332, "learning_rate": 6.503021199449063e-05, "loss": 0.9271, "step": 126150 }, { "epoch": 0.8060002811034589, "grad_norm": 0.8122944235801697, "learning_rate": 6.50254263069258e-05, "loss": 1.2381, "step": 126160 }, { "epoch": 0.8060641682531976, "grad_norm": 0.9089512228965759, "learning_rate": 6.502064046804193e-05, "loss": 0.8603, "step": 126170 }, { "epoch": 0.8061280554029363, "grad_norm": 0.9631441235542297, "learning_rate": 6.501585447788724e-05, "loss": 1.0211, "step": 126180 }, { "epoch": 0.8061919425526749, "grad_norm": 0.5467532873153687, "learning_rate": 6.501106833650989e-05, "loss": 0.7911, "step": 126190 }, { "epoch": 0.8062558297024136, "grad_norm": 0.8144317269325256, "learning_rate": 6.500628204395809e-05, "loss": 1.0271, "step": 126200 }, { "epoch": 0.8063197168521523, "grad_norm": 0.6521165370941162, "learning_rate": 6.500149560028005e-05, "loss": 0.9794, "step": 126210 }, { "epoch": 0.806383604001891, "grad_norm": 0.6328021287918091, "learning_rate": 6.499670900552397e-05, "loss": 0.8287, "step": 126220 }, { "epoch": 0.8064474911516297, "grad_norm": 0.565212607383728, "learning_rate": 6.499192225973806e-05, "loss": 0.8106, "step": 126230 }, { "epoch": 0.8065113783013684, "grad_norm": 0.8968755602836609, "learning_rate": 6.498713536297053e-05, "loss": 0.7972, "step": 126240 }, { "epoch": 0.8065752654511071, "grad_norm": 0.9558743834495544, "learning_rate": 6.498234831526957e-05, "loss": 0.8166, "step": 126250 }, { "epoch": 0.8066391526008458, "grad_norm": 0.8803595900535583, "learning_rate": 6.497756111668342e-05, "loss": 0.7788, "step": 126260 }, { "epoch": 0.8067030397505845, "grad_norm": 0.6762875914573669, "learning_rate": 6.497277376726025e-05, "loss": 0.9889, "step": 126270 }, { "epoch": 0.8067669269003233, "grad_norm": 0.7682203650474548, "learning_rate": 6.496798626704831e-05, "loss": 1.016, "step": 126280 }, { "epoch": 0.806830814050062, "grad_norm": 0.8153054714202881, "learning_rate": 6.496319861609579e-05, "loss": 0.8005, "step": 126290 }, { "epoch": 0.8068947011998007, "grad_norm": 0.767785906791687, "learning_rate": 6.495841081445091e-05, "loss": 0.678, "step": 126300 }, { "epoch": 0.8069585883495394, "grad_norm": 1.054632544517517, "learning_rate": 6.495362286216191e-05, "loss": 0.8752, "step": 126310 }, { "epoch": 0.8070224754992781, "grad_norm": 1.0451246500015259, "learning_rate": 6.494883475927698e-05, "loss": 0.8354, "step": 126320 }, { "epoch": 0.8070863626490168, "grad_norm": 0.6930572986602783, "learning_rate": 6.494404650584435e-05, "loss": 0.9319, "step": 126330 }, { "epoch": 0.8071502497987555, "grad_norm": 0.9584304094314575, "learning_rate": 6.493925810191226e-05, "loss": 0.924, "step": 126340 }, { "epoch": 0.8072141369484942, "grad_norm": 1.0455424785614014, "learning_rate": 6.49344695475289e-05, "loss": 1.0299, "step": 126350 }, { "epoch": 0.8072780240982329, "grad_norm": 1.3468433618545532, "learning_rate": 6.49296808427425e-05, "loss": 0.7191, "step": 126360 }, { "epoch": 0.8073419112479716, "grad_norm": 1.8125096559524536, "learning_rate": 6.492489198760131e-05, "loss": 0.8762, "step": 126370 }, { "epoch": 0.8074057983977103, "grad_norm": 1.148374080657959, "learning_rate": 6.492010298215355e-05, "loss": 0.8672, "step": 126380 }, { "epoch": 0.807469685547449, "grad_norm": 0.7599702477455139, "learning_rate": 6.491531382644744e-05, "loss": 0.9156, "step": 126390 }, { "epoch": 0.8075335726971877, "grad_norm": 1.1603766679763794, "learning_rate": 6.491052452053123e-05, "loss": 0.7981, "step": 126400 }, { "epoch": 0.8075974598469264, "grad_norm": 0.9405614733695984, "learning_rate": 6.490573506445312e-05, "loss": 0.8393, "step": 126410 }, { "epoch": 0.8076613469966651, "grad_norm": 1.048951268196106, "learning_rate": 6.490094545826137e-05, "loss": 1.0174, "step": 126420 }, { "epoch": 0.8077252341464038, "grad_norm": 2.152139663696289, "learning_rate": 6.48961557020042e-05, "loss": 1.0473, "step": 126430 }, { "epoch": 0.8077891212961424, "grad_norm": 0.6861464977264404, "learning_rate": 6.489136579572987e-05, "loss": 0.7224, "step": 126440 }, { "epoch": 0.8078530084458811, "grad_norm": 0.8665691018104553, "learning_rate": 6.48865757394866e-05, "loss": 1.0713, "step": 126450 }, { "epoch": 0.8079168955956199, "grad_norm": 0.651671826839447, "learning_rate": 6.488178553332262e-05, "loss": 1.0617, "step": 126460 }, { "epoch": 0.8079807827453586, "grad_norm": 1.4242401123046875, "learning_rate": 6.487699517728621e-05, "loss": 0.8041, "step": 126470 }, { "epoch": 0.8080446698950973, "grad_norm": 0.5821726322174072, "learning_rate": 6.487220467142556e-05, "loss": 0.9239, "step": 126480 }, { "epoch": 0.808108557044836, "grad_norm": 0.5187436938285828, "learning_rate": 6.486741401578897e-05, "loss": 0.7294, "step": 126490 }, { "epoch": 0.8081724441945747, "grad_norm": 0.7180354595184326, "learning_rate": 6.486262321042465e-05, "loss": 0.8105, "step": 126500 }, { "epoch": 0.8082363313443134, "grad_norm": 1.0905053615570068, "learning_rate": 6.485783225538084e-05, "loss": 0.7558, "step": 126510 }, { "epoch": 0.8083002184940521, "grad_norm": 0.7358648777008057, "learning_rate": 6.485304115070582e-05, "loss": 0.7867, "step": 126520 }, { "epoch": 0.8083641056437908, "grad_norm": 0.6395271420478821, "learning_rate": 6.484824989644783e-05, "loss": 0.8508, "step": 126530 }, { "epoch": 0.8084279927935295, "grad_norm": 0.877444863319397, "learning_rate": 6.48434584926551e-05, "loss": 1.0844, "step": 126540 }, { "epoch": 0.8084918799432682, "grad_norm": 0.5219199061393738, "learning_rate": 6.483866693937591e-05, "loss": 0.7814, "step": 126550 }, { "epoch": 0.8085557670930069, "grad_norm": 2.268413543701172, "learning_rate": 6.483387523665852e-05, "loss": 0.8243, "step": 126560 }, { "epoch": 0.8086196542427456, "grad_norm": 0.6467793583869934, "learning_rate": 6.482908338455113e-05, "loss": 0.8682, "step": 126570 }, { "epoch": 0.8086835413924843, "grad_norm": 1.109560489654541, "learning_rate": 6.48242913831021e-05, "loss": 0.7325, "step": 126580 }, { "epoch": 0.808747428542223, "grad_norm": 2.1238925457000732, "learning_rate": 6.48194992323596e-05, "loss": 0.7621, "step": 126590 }, { "epoch": 0.8088113156919617, "grad_norm": 0.7818292379379272, "learning_rate": 6.481470693237193e-05, "loss": 0.8513, "step": 126600 }, { "epoch": 0.8088752028417004, "grad_norm": 0.8651500344276428, "learning_rate": 6.480991448318735e-05, "loss": 1.1925, "step": 126610 }, { "epoch": 0.8089390899914392, "grad_norm": 1.0120964050292969, "learning_rate": 6.48051218848541e-05, "loss": 0.7734, "step": 126620 }, { "epoch": 0.8090029771411779, "grad_norm": 0.6055595278739929, "learning_rate": 6.480032913742047e-05, "loss": 0.8752, "step": 126630 }, { "epoch": 0.8090668642909166, "grad_norm": 0.9761593341827393, "learning_rate": 6.479553624093473e-05, "loss": 1.0153, "step": 126640 }, { "epoch": 0.8091307514406553, "grad_norm": 0.926140308380127, "learning_rate": 6.479074319544513e-05, "loss": 0.7519, "step": 126650 }, { "epoch": 0.809194638590394, "grad_norm": 1.0344536304473877, "learning_rate": 6.478595000099996e-05, "loss": 0.8786, "step": 126660 }, { "epoch": 0.8092585257401327, "grad_norm": 1.2882970571517944, "learning_rate": 6.478115665764748e-05, "loss": 0.8631, "step": 126670 }, { "epoch": 0.8093224128898713, "grad_norm": 0.98709636926651, "learning_rate": 6.477636316543596e-05, "loss": 0.9382, "step": 126680 }, { "epoch": 0.80938630003961, "grad_norm": 0.9741780161857605, "learning_rate": 6.477156952441368e-05, "loss": 0.7388, "step": 126690 }, { "epoch": 0.8094501871893487, "grad_norm": 0.7120775580406189, "learning_rate": 6.476677573462893e-05, "loss": 0.8167, "step": 126700 }, { "epoch": 0.8095140743390874, "grad_norm": 0.9984919428825378, "learning_rate": 6.476198179612995e-05, "loss": 0.897, "step": 126710 }, { "epoch": 0.8095779614888261, "grad_norm": 2.221468925476074, "learning_rate": 6.475718770896505e-05, "loss": 1.0463, "step": 126720 }, { "epoch": 0.8096418486385648, "grad_norm": 0.9233216643333435, "learning_rate": 6.47523934731825e-05, "loss": 0.8333, "step": 126730 }, { "epoch": 0.8097057357883035, "grad_norm": 0.7584207057952881, "learning_rate": 6.47475990888306e-05, "loss": 1.0903, "step": 126740 }, { "epoch": 0.8097696229380422, "grad_norm": 0.913167417049408, "learning_rate": 6.474280455595761e-05, "loss": 0.9977, "step": 126750 }, { "epoch": 0.8098335100877809, "grad_norm": 0.8217071890830994, "learning_rate": 6.473800987461182e-05, "loss": 0.8709, "step": 126760 }, { "epoch": 0.8098973972375196, "grad_norm": 0.8127371072769165, "learning_rate": 6.473321504484152e-05, "loss": 0.9532, "step": 126770 }, { "epoch": 0.8099612843872583, "grad_norm": 0.5659823417663574, "learning_rate": 6.4728420066695e-05, "loss": 0.9879, "step": 126780 }, { "epoch": 0.810025171536997, "grad_norm": 4.89599084854126, "learning_rate": 6.472362494022055e-05, "loss": 0.973, "step": 126790 }, { "epoch": 0.8100890586867358, "grad_norm": 2.1333658695220947, "learning_rate": 6.471882966546647e-05, "loss": 0.6452, "step": 126800 }, { "epoch": 0.8101529458364745, "grad_norm": 0.8865774869918823, "learning_rate": 6.471403424248102e-05, "loss": 0.7954, "step": 126810 }, { "epoch": 0.8102168329862132, "grad_norm": 0.8974156975746155, "learning_rate": 6.470923867131254e-05, "loss": 0.9977, "step": 126820 }, { "epoch": 0.8102807201359519, "grad_norm": 1.3754866123199463, "learning_rate": 6.47044429520093e-05, "loss": 0.8972, "step": 126830 }, { "epoch": 0.8103446072856906, "grad_norm": 0.8997700214385986, "learning_rate": 6.469964708461957e-05, "loss": 0.6717, "step": 126840 }, { "epoch": 0.8104084944354293, "grad_norm": 0.7010866403579712, "learning_rate": 6.469485106919171e-05, "loss": 0.688, "step": 126850 }, { "epoch": 0.810472381585168, "grad_norm": 1.2997609376907349, "learning_rate": 6.469005490577397e-05, "loss": 1.0006, "step": 126860 }, { "epoch": 0.8105362687349067, "grad_norm": 0.8053306937217712, "learning_rate": 6.468525859441466e-05, "loss": 0.8973, "step": 126870 }, { "epoch": 0.8106001558846454, "grad_norm": 0.7065293192863464, "learning_rate": 6.46804621351621e-05, "loss": 1.0594, "step": 126880 }, { "epoch": 0.8106640430343841, "grad_norm": 1.1768196821212769, "learning_rate": 6.467566552806458e-05, "loss": 0.9089, "step": 126890 }, { "epoch": 0.8107279301841228, "grad_norm": 1.5799227952957153, "learning_rate": 6.467086877317042e-05, "loss": 0.7268, "step": 126900 }, { "epoch": 0.8107918173338615, "grad_norm": 0.7918219566345215, "learning_rate": 6.466607187052791e-05, "loss": 1.016, "step": 126910 }, { "epoch": 0.8108557044836001, "grad_norm": 1.188558578491211, "learning_rate": 6.466127482018538e-05, "loss": 1.0635, "step": 126920 }, { "epoch": 0.8109195916333388, "grad_norm": 0.8027870059013367, "learning_rate": 6.465647762219113e-05, "loss": 0.7027, "step": 126930 }, { "epoch": 0.8109834787830775, "grad_norm": 0.84566330909729, "learning_rate": 6.465168027659347e-05, "loss": 0.8105, "step": 126940 }, { "epoch": 0.8110473659328162, "grad_norm": 0.6688374280929565, "learning_rate": 6.46468827834407e-05, "loss": 0.7993, "step": 126950 }, { "epoch": 0.8111112530825549, "grad_norm": 0.7672613263130188, "learning_rate": 6.464208514278117e-05, "loss": 0.9798, "step": 126960 }, { "epoch": 0.8111751402322936, "grad_norm": 1.0003461837768555, "learning_rate": 6.463728735466316e-05, "loss": 0.8659, "step": 126970 }, { "epoch": 0.8112390273820324, "grad_norm": 1.0807254314422607, "learning_rate": 6.4632489419135e-05, "loss": 0.9423, "step": 126980 }, { "epoch": 0.8113029145317711, "grad_norm": 0.6084434390068054, "learning_rate": 6.462769133624502e-05, "loss": 0.6477, "step": 126990 }, { "epoch": 0.8113668016815098, "grad_norm": 0.7331100106239319, "learning_rate": 6.462289310604152e-05, "loss": 1.0194, "step": 127000 }, { "epoch": 0.8114306888312485, "grad_norm": 1.0678889751434326, "learning_rate": 6.461809472857287e-05, "loss": 0.7349, "step": 127010 }, { "epoch": 0.8114945759809872, "grad_norm": 1.4180760383605957, "learning_rate": 6.461329620388733e-05, "loss": 0.7278, "step": 127020 }, { "epoch": 0.8115584631307259, "grad_norm": 0.904155433177948, "learning_rate": 6.460849753203326e-05, "loss": 0.8996, "step": 127030 }, { "epoch": 0.8116223502804646, "grad_norm": 0.8179849982261658, "learning_rate": 6.460369871305899e-05, "loss": 0.8029, "step": 127040 }, { "epoch": 0.8116862374302033, "grad_norm": 0.8025046586990356, "learning_rate": 6.459889974701284e-05, "loss": 1.1434, "step": 127050 }, { "epoch": 0.811750124579942, "grad_norm": 0.9315536022186279, "learning_rate": 6.459410063394314e-05, "loss": 0.7199, "step": 127060 }, { "epoch": 0.8118140117296807, "grad_norm": 1.0310189723968506, "learning_rate": 6.458930137389821e-05, "loss": 0.8107, "step": 127070 }, { "epoch": 0.8118778988794194, "grad_norm": 0.897158682346344, "learning_rate": 6.45845019669264e-05, "loss": 0.9007, "step": 127080 }, { "epoch": 0.8119417860291581, "grad_norm": 0.8485071659088135, "learning_rate": 6.457970241307603e-05, "loss": 0.9143, "step": 127090 }, { "epoch": 0.8120056731788968, "grad_norm": 1.0846539735794067, "learning_rate": 6.457490271239546e-05, "loss": 0.5329, "step": 127100 }, { "epoch": 0.8120695603286355, "grad_norm": 1.0660455226898193, "learning_rate": 6.457010286493299e-05, "loss": 0.8025, "step": 127110 }, { "epoch": 0.8121334474783742, "grad_norm": 0.7800552248954773, "learning_rate": 6.4565302870737e-05, "loss": 0.7744, "step": 127120 }, { "epoch": 0.8121973346281129, "grad_norm": 1.9460397958755493, "learning_rate": 6.45605027298558e-05, "loss": 1.0076, "step": 127130 }, { "epoch": 0.8122612217778516, "grad_norm": 0.7242342233657837, "learning_rate": 6.455570244233774e-05, "loss": 0.878, "step": 127140 }, { "epoch": 0.8123251089275904, "grad_norm": 1.0195945501327515, "learning_rate": 6.455090200823117e-05, "loss": 1.0594, "step": 127150 }, { "epoch": 0.812388996077329, "grad_norm": 0.4291139841079712, "learning_rate": 6.454610142758442e-05, "loss": 0.8271, "step": 127160 }, { "epoch": 0.8124528832270677, "grad_norm": 0.8189600110054016, "learning_rate": 6.454130070044584e-05, "loss": 1.0931, "step": 127170 }, { "epoch": 0.8125167703768064, "grad_norm": 0.6839133501052856, "learning_rate": 6.45364998268638e-05, "loss": 0.9161, "step": 127180 }, { "epoch": 0.8125806575265451, "grad_norm": 0.835392951965332, "learning_rate": 6.45316988068866e-05, "loss": 0.9721, "step": 127190 }, { "epoch": 0.8126445446762838, "grad_norm": 0.5460143089294434, "learning_rate": 6.452689764056265e-05, "loss": 0.9177, "step": 127200 }, { "epoch": 0.8127084318260225, "grad_norm": 1.0494486093521118, "learning_rate": 6.452209632794027e-05, "loss": 0.8844, "step": 127210 }, { "epoch": 0.8127723189757612, "grad_norm": 0.6247775554656982, "learning_rate": 6.451729486906781e-05, "loss": 0.9528, "step": 127220 }, { "epoch": 0.8128362061254999, "grad_norm": 1.8002761602401733, "learning_rate": 6.451249326399364e-05, "loss": 1.1712, "step": 127230 }, { "epoch": 0.8129000932752386, "grad_norm": 0.9478850960731506, "learning_rate": 6.45076915127661e-05, "loss": 0.8959, "step": 127240 }, { "epoch": 0.8129639804249773, "grad_norm": 0.707378089427948, "learning_rate": 6.450288961543355e-05, "loss": 0.8968, "step": 127250 }, { "epoch": 0.813027867574716, "grad_norm": 0.9674128890037537, "learning_rate": 6.449808757204435e-05, "loss": 0.862, "step": 127260 }, { "epoch": 0.8130917547244547, "grad_norm": 1.1867669820785522, "learning_rate": 6.449328538264687e-05, "loss": 0.808, "step": 127270 }, { "epoch": 0.8131556418741934, "grad_norm": 1.1251099109649658, "learning_rate": 6.448848304728949e-05, "loss": 0.8379, "step": 127280 }, { "epoch": 0.8132195290239321, "grad_norm": 0.891304612159729, "learning_rate": 6.448368056602053e-05, "loss": 0.9116, "step": 127290 }, { "epoch": 0.8132834161736708, "grad_norm": 1.0595531463623047, "learning_rate": 6.447887793888838e-05, "loss": 0.8859, "step": 127300 }, { "epoch": 0.8133473033234095, "grad_norm": 0.8898464441299438, "learning_rate": 6.447407516594142e-05, "loss": 0.982, "step": 127310 }, { "epoch": 0.8134111904731482, "grad_norm": 2.1470937728881836, "learning_rate": 6.446927224722799e-05, "loss": 0.8127, "step": 127320 }, { "epoch": 0.813475077622887, "grad_norm": 1.043031096458435, "learning_rate": 6.446446918279647e-05, "loss": 0.8647, "step": 127330 }, { "epoch": 0.8135389647726257, "grad_norm": 0.8971779942512512, "learning_rate": 6.445966597269522e-05, "loss": 1.0, "step": 127340 }, { "epoch": 0.8136028519223644, "grad_norm": 0.8842697739601135, "learning_rate": 6.445486261697263e-05, "loss": 0.8011, "step": 127350 }, { "epoch": 0.8136667390721031, "grad_norm": 0.8753737211227417, "learning_rate": 6.445005911567707e-05, "loss": 0.9761, "step": 127360 }, { "epoch": 0.8137306262218418, "grad_norm": 0.7797544598579407, "learning_rate": 6.444525546885692e-05, "loss": 0.7789, "step": 127370 }, { "epoch": 0.8137945133715805, "grad_norm": 0.49460268020629883, "learning_rate": 6.444045167656055e-05, "loss": 0.836, "step": 127380 }, { "epoch": 0.8138584005213192, "grad_norm": 1.005393385887146, "learning_rate": 6.443564773883634e-05, "loss": 0.8948, "step": 127390 }, { "epoch": 0.8139222876710579, "grad_norm": 0.7220799922943115, "learning_rate": 6.443084365573265e-05, "loss": 0.8677, "step": 127400 }, { "epoch": 0.8139861748207965, "grad_norm": 0.8531742691993713, "learning_rate": 6.44260394272979e-05, "loss": 0.7786, "step": 127410 }, { "epoch": 0.8140500619705352, "grad_norm": 1.4867233037948608, "learning_rate": 6.442123505358043e-05, "loss": 0.8496, "step": 127420 }, { "epoch": 0.8141139491202739, "grad_norm": 0.6640691161155701, "learning_rate": 6.441643053462867e-05, "loss": 0.847, "step": 127430 }, { "epoch": 0.8141778362700126, "grad_norm": 0.5438361763954163, "learning_rate": 6.441162587049096e-05, "loss": 0.7101, "step": 127440 }, { "epoch": 0.8142417234197513, "grad_norm": 0.879038393497467, "learning_rate": 6.440682106121574e-05, "loss": 0.9942, "step": 127450 }, { "epoch": 0.81430561056949, "grad_norm": 0.6721540689468384, "learning_rate": 6.440201610685135e-05, "loss": 0.6765, "step": 127460 }, { "epoch": 0.8143694977192287, "grad_norm": 0.627669095993042, "learning_rate": 6.43972110074462e-05, "loss": 0.8154, "step": 127470 }, { "epoch": 0.8144333848689674, "grad_norm": 1.5187098979949951, "learning_rate": 6.439240576304868e-05, "loss": 1.1218, "step": 127480 }, { "epoch": 0.8144972720187061, "grad_norm": 0.519985556602478, "learning_rate": 6.438760037370719e-05, "loss": 0.7047, "step": 127490 }, { "epoch": 0.8145611591684448, "grad_norm": 0.7375752329826355, "learning_rate": 6.43827948394701e-05, "loss": 0.8807, "step": 127500 }, { "epoch": 0.8146250463181836, "grad_norm": 0.5669057369232178, "learning_rate": 6.437798916038584e-05, "loss": 0.8591, "step": 127510 }, { "epoch": 0.8146889334679223, "grad_norm": 0.9515382051467896, "learning_rate": 6.437318333650279e-05, "loss": 0.8639, "step": 127520 }, { "epoch": 0.814752820617661, "grad_norm": 0.9715726971626282, "learning_rate": 6.436837736786934e-05, "loss": 0.6827, "step": 127530 }, { "epoch": 0.8148167077673997, "grad_norm": 1.2894679307937622, "learning_rate": 6.43635712545339e-05, "loss": 0.7838, "step": 127540 }, { "epoch": 0.8148805949171384, "grad_norm": 0.9113032817840576, "learning_rate": 6.43587649965449e-05, "loss": 0.9437, "step": 127550 }, { "epoch": 0.8149444820668771, "grad_norm": 0.8050090074539185, "learning_rate": 6.435395859395068e-05, "loss": 1.0804, "step": 127560 }, { "epoch": 0.8150083692166158, "grad_norm": 1.1734331846237183, "learning_rate": 6.434915204679969e-05, "loss": 0.9166, "step": 127570 }, { "epoch": 0.8150722563663545, "grad_norm": 1.3602896928787231, "learning_rate": 6.434434535514031e-05, "loss": 0.8164, "step": 127580 }, { "epoch": 0.8151361435160932, "grad_norm": 0.9085065722465515, "learning_rate": 6.433953851902097e-05, "loss": 1.0613, "step": 127590 }, { "epoch": 0.8152000306658319, "grad_norm": 0.8501441478729248, "learning_rate": 6.433473153849007e-05, "loss": 0.7548, "step": 127600 }, { "epoch": 0.8152639178155706, "grad_norm": 1.4549124240875244, "learning_rate": 6.432992441359605e-05, "loss": 0.8768, "step": 127610 }, { "epoch": 0.8153278049653093, "grad_norm": 0.9004676938056946, "learning_rate": 6.432511714438727e-05, "loss": 0.9546, "step": 127620 }, { "epoch": 0.815391692115048, "grad_norm": 0.6986418962478638, "learning_rate": 6.432030973091216e-05, "loss": 0.8329, "step": 127630 }, { "epoch": 0.8154555792647867, "grad_norm": 1.1968231201171875, "learning_rate": 6.431550217321916e-05, "loss": 0.8781, "step": 127640 }, { "epoch": 0.8155194664145253, "grad_norm": 0.9904518723487854, "learning_rate": 6.431069447135665e-05, "loss": 0.7686, "step": 127650 }, { "epoch": 0.815583353564264, "grad_norm": 0.846964955329895, "learning_rate": 6.43058866253731e-05, "loss": 0.7053, "step": 127660 }, { "epoch": 0.8156472407140027, "grad_norm": 0.9893980026245117, "learning_rate": 6.430107863531685e-05, "loss": 0.7232, "step": 127670 }, { "epoch": 0.8157111278637414, "grad_norm": 0.9716863632202148, "learning_rate": 6.42962705012364e-05, "loss": 0.9921, "step": 127680 }, { "epoch": 0.8157750150134802, "grad_norm": 0.5748932361602783, "learning_rate": 6.429146222318013e-05, "loss": 0.7242, "step": 127690 }, { "epoch": 0.8158389021632189, "grad_norm": 0.6904158592224121, "learning_rate": 6.428665380119648e-05, "loss": 1.1946, "step": 127700 }, { "epoch": 0.8159027893129576, "grad_norm": 0.8382551074028015, "learning_rate": 6.428184523533384e-05, "loss": 0.9143, "step": 127710 }, { "epoch": 0.8159666764626963, "grad_norm": 1.1233938932418823, "learning_rate": 6.427703652564067e-05, "loss": 1.239, "step": 127720 }, { "epoch": 0.816030563612435, "grad_norm": 0.6471089720726013, "learning_rate": 6.42722276721654e-05, "loss": 0.9955, "step": 127730 }, { "epoch": 0.8160944507621737, "grad_norm": 0.796449601650238, "learning_rate": 6.426741867495645e-05, "loss": 1.1798, "step": 127740 }, { "epoch": 0.8161583379119124, "grad_norm": 1.216551423072815, "learning_rate": 6.426260953406225e-05, "loss": 0.9472, "step": 127750 }, { "epoch": 0.8162222250616511, "grad_norm": 0.4935864508152008, "learning_rate": 6.425780024953124e-05, "loss": 1.0413, "step": 127760 }, { "epoch": 0.8162861122113898, "grad_norm": 1.2737202644348145, "learning_rate": 6.425299082141184e-05, "loss": 1.1372, "step": 127770 }, { "epoch": 0.8163499993611285, "grad_norm": 1.7472068071365356, "learning_rate": 6.424818124975248e-05, "loss": 0.7832, "step": 127780 }, { "epoch": 0.8164138865108672, "grad_norm": 0.7582964897155762, "learning_rate": 6.424337153460162e-05, "loss": 0.8762, "step": 127790 }, { "epoch": 0.8164777736606059, "grad_norm": 1.1670618057250977, "learning_rate": 6.42385616760077e-05, "loss": 1.2763, "step": 127800 }, { "epoch": 0.8165416608103446, "grad_norm": 0.6307504773139954, "learning_rate": 6.423375167401912e-05, "loss": 0.6937, "step": 127810 }, { "epoch": 0.8166055479600833, "grad_norm": 1.9756511449813843, "learning_rate": 6.422894152868437e-05, "loss": 0.7304, "step": 127820 }, { "epoch": 0.816669435109822, "grad_norm": 1.0273828506469727, "learning_rate": 6.422413124005185e-05, "loss": 0.7999, "step": 127830 }, { "epoch": 0.8167333222595607, "grad_norm": 1.1379588842391968, "learning_rate": 6.421932080817003e-05, "loss": 0.8498, "step": 127840 }, { "epoch": 0.8167972094092995, "grad_norm": 0.7161405086517334, "learning_rate": 6.421451023308735e-05, "loss": 1.4902, "step": 127850 }, { "epoch": 0.8168610965590382, "grad_norm": 1.0441093444824219, "learning_rate": 6.420969951485225e-05, "loss": 0.8677, "step": 127860 }, { "epoch": 0.8169249837087769, "grad_norm": 0.8484379053115845, "learning_rate": 6.42048886535132e-05, "loss": 0.7921, "step": 127870 }, { "epoch": 0.8169888708585156, "grad_norm": 0.8539422750473022, "learning_rate": 6.420007764911861e-05, "loss": 0.7991, "step": 127880 }, { "epoch": 0.8170527580082542, "grad_norm": 0.5614151954650879, "learning_rate": 6.419526650171697e-05, "loss": 1.0511, "step": 127890 }, { "epoch": 0.8171166451579929, "grad_norm": 0.9628438949584961, "learning_rate": 6.41904552113567e-05, "loss": 0.6749, "step": 127900 }, { "epoch": 0.8171805323077316, "grad_norm": 0.8185387253761292, "learning_rate": 6.418564377808627e-05, "loss": 0.8436, "step": 127910 }, { "epoch": 0.8172444194574703, "grad_norm": 0.6179929971694946, "learning_rate": 6.418083220195414e-05, "loss": 0.7563, "step": 127920 }, { "epoch": 0.817308306607209, "grad_norm": 0.7823129892349243, "learning_rate": 6.417602048300877e-05, "loss": 0.8868, "step": 127930 }, { "epoch": 0.8173721937569477, "grad_norm": 1.1083999872207642, "learning_rate": 6.41712086212986e-05, "loss": 0.8661, "step": 127940 }, { "epoch": 0.8174360809066864, "grad_norm": 2.1160151958465576, "learning_rate": 6.41663966168721e-05, "loss": 0.8088, "step": 127950 }, { "epoch": 0.8174999680564251, "grad_norm": 0.6990865468978882, "learning_rate": 6.416158446977772e-05, "loss": 1.0852, "step": 127960 }, { "epoch": 0.8175638552061638, "grad_norm": 0.8294287919998169, "learning_rate": 6.415677218006395e-05, "loss": 0.8674, "step": 127970 }, { "epoch": 0.8176277423559025, "grad_norm": 1.1241607666015625, "learning_rate": 6.415195974777923e-05, "loss": 0.939, "step": 127980 }, { "epoch": 0.8176916295056412, "grad_norm": 0.8454298377037048, "learning_rate": 6.414714717297203e-05, "loss": 0.8675, "step": 127990 }, { "epoch": 0.8177555166553799, "grad_norm": 0.7560257315635681, "learning_rate": 6.414233445569083e-05, "loss": 1.0024, "step": 128000 }, { "epoch": 0.8178194038051186, "grad_norm": 0.7482271790504456, "learning_rate": 6.413752159598408e-05, "loss": 0.8029, "step": 128010 }, { "epoch": 0.8178832909548573, "grad_norm": 1.5129786729812622, "learning_rate": 6.413270859390026e-05, "loss": 1.0656, "step": 128020 }, { "epoch": 0.817947178104596, "grad_norm": 1.2773970365524292, "learning_rate": 6.412789544948782e-05, "loss": 0.9819, "step": 128030 }, { "epoch": 0.8180110652543348, "grad_norm": 0.8020282983779907, "learning_rate": 6.41230821627953e-05, "loss": 0.6803, "step": 128040 }, { "epoch": 0.8180749524040735, "grad_norm": 1.2904086112976074, "learning_rate": 6.411826873387108e-05, "loss": 1.1785, "step": 128050 }, { "epoch": 0.8181388395538122, "grad_norm": 0.6891657710075378, "learning_rate": 6.41134551627637e-05, "loss": 0.8503, "step": 128060 }, { "epoch": 0.8182027267035509, "grad_norm": 0.9676710963249207, "learning_rate": 6.41086414495216e-05, "loss": 0.8886, "step": 128070 }, { "epoch": 0.8182666138532896, "grad_norm": 0.881633460521698, "learning_rate": 6.410382759419328e-05, "loss": 0.8703, "step": 128080 }, { "epoch": 0.8183305010030283, "grad_norm": 2.246070146560669, "learning_rate": 6.409901359682722e-05, "loss": 0.9637, "step": 128090 }, { "epoch": 0.818394388152767, "grad_norm": 0.9771053194999695, "learning_rate": 6.409419945747189e-05, "loss": 0.8403, "step": 128100 }, { "epoch": 0.8184582753025057, "grad_norm": 0.9186480641365051, "learning_rate": 6.408938517617576e-05, "loss": 0.8156, "step": 128110 }, { "epoch": 0.8185221624522444, "grad_norm": 0.9104690551757812, "learning_rate": 6.408457075298734e-05, "loss": 0.9832, "step": 128120 }, { "epoch": 0.8185860496019831, "grad_norm": 0.849088191986084, "learning_rate": 6.407975618795514e-05, "loss": 1.0383, "step": 128130 }, { "epoch": 0.8186499367517217, "grad_norm": 0.667122483253479, "learning_rate": 6.40749414811276e-05, "loss": 1.0298, "step": 128140 }, { "epoch": 0.8187138239014604, "grad_norm": 0.7279898524284363, "learning_rate": 6.407012663255321e-05, "loss": 0.8889, "step": 128150 }, { "epoch": 0.8187777110511991, "grad_norm": 0.6628199219703674, "learning_rate": 6.406531164228048e-05, "loss": 1.0062, "step": 128160 }, { "epoch": 0.8188415982009378, "grad_norm": 0.8573051691055298, "learning_rate": 6.406049651035789e-05, "loss": 0.8815, "step": 128170 }, { "epoch": 0.8189054853506765, "grad_norm": 0.6094196438789368, "learning_rate": 6.405568123683395e-05, "loss": 1.0032, "step": 128180 }, { "epoch": 0.8189693725004152, "grad_norm": 1.0880091190338135, "learning_rate": 6.405086582175712e-05, "loss": 0.7361, "step": 128190 }, { "epoch": 0.8190332596501539, "grad_norm": 4.45365571975708, "learning_rate": 6.404605026517592e-05, "loss": 0.925, "step": 128200 }, { "epoch": 0.8190971467998927, "grad_norm": 1.8221222162246704, "learning_rate": 6.404123456713884e-05, "loss": 0.8888, "step": 128210 }, { "epoch": 0.8191610339496314, "grad_norm": 0.8215370774269104, "learning_rate": 6.403641872769439e-05, "loss": 0.806, "step": 128220 }, { "epoch": 0.8192249210993701, "grad_norm": 0.7593247890472412, "learning_rate": 6.403160274689107e-05, "loss": 0.7808, "step": 128230 }, { "epoch": 0.8192888082491088, "grad_norm": 0.8794440031051636, "learning_rate": 6.402678662477735e-05, "loss": 0.9681, "step": 128240 }, { "epoch": 0.8193526953988475, "grad_norm": 0.9610484838485718, "learning_rate": 6.402197036140176e-05, "loss": 0.6683, "step": 128250 }, { "epoch": 0.8194165825485862, "grad_norm": 1.84084153175354, "learning_rate": 6.40171539568128e-05, "loss": 0.8565, "step": 128260 }, { "epoch": 0.8194804696983249, "grad_norm": 0.8622168898582458, "learning_rate": 6.401233741105898e-05, "loss": 0.8958, "step": 128270 }, { "epoch": 0.8195443568480636, "grad_norm": 0.8887889981269836, "learning_rate": 6.400752072418878e-05, "loss": 0.6639, "step": 128280 }, { "epoch": 0.8196082439978023, "grad_norm": 1.2179750204086304, "learning_rate": 6.400270389625075e-05, "loss": 0.8348, "step": 128290 }, { "epoch": 0.819672131147541, "grad_norm": 0.7495495080947876, "learning_rate": 6.399788692729337e-05, "loss": 0.732, "step": 128300 }, { "epoch": 0.8197360182972797, "grad_norm": 0.9939058423042297, "learning_rate": 6.399306981736515e-05, "loss": 0.9292, "step": 128310 }, { "epoch": 0.8197999054470184, "grad_norm": 1.0423085689544678, "learning_rate": 6.398825256651463e-05, "loss": 1.0162, "step": 128320 }, { "epoch": 0.8198637925967571, "grad_norm": 1.109744668006897, "learning_rate": 6.398343517479029e-05, "loss": 0.8332, "step": 128330 }, { "epoch": 0.8199276797464958, "grad_norm": 0.8410364389419556, "learning_rate": 6.397861764224067e-05, "loss": 0.6665, "step": 128340 }, { "epoch": 0.8199915668962345, "grad_norm": 1.0010278224945068, "learning_rate": 6.397379996891426e-05, "loss": 0.8106, "step": 128350 }, { "epoch": 0.8200554540459732, "grad_norm": 0.9419941902160645, "learning_rate": 6.396898215485962e-05, "loss": 0.768, "step": 128360 }, { "epoch": 0.820119341195712, "grad_norm": 0.6813206076622009, "learning_rate": 6.396416420012523e-05, "loss": 0.7858, "step": 128370 }, { "epoch": 0.8201832283454505, "grad_norm": 1.2011756896972656, "learning_rate": 6.395934610475963e-05, "loss": 0.8183, "step": 128380 }, { "epoch": 0.8202471154951892, "grad_norm": 5.552373886108398, "learning_rate": 6.395452786881133e-05, "loss": 1.1759, "step": 128390 }, { "epoch": 0.820311002644928, "grad_norm": 0.658540666103363, "learning_rate": 6.394970949232887e-05, "loss": 0.7854, "step": 128400 }, { "epoch": 0.8203748897946667, "grad_norm": 1.2087280750274658, "learning_rate": 6.394489097536076e-05, "loss": 0.9018, "step": 128410 }, { "epoch": 0.8204387769444054, "grad_norm": 0.8210545182228088, "learning_rate": 6.394007231795554e-05, "loss": 0.809, "step": 128420 }, { "epoch": 0.8205026640941441, "grad_norm": 1.1109236478805542, "learning_rate": 6.393525352016174e-05, "loss": 0.8139, "step": 128430 }, { "epoch": 0.8205665512438828, "grad_norm": 0.5671748518943787, "learning_rate": 6.393043458202787e-05, "loss": 0.7467, "step": 128440 }, { "epoch": 0.8206304383936215, "grad_norm": 0.7894589304924011, "learning_rate": 6.392561550360247e-05, "loss": 0.7735, "step": 128450 }, { "epoch": 0.8206943255433602, "grad_norm": 1.4838154315948486, "learning_rate": 6.392079628493407e-05, "loss": 0.8314, "step": 128460 }, { "epoch": 0.8207582126930989, "grad_norm": 0.8588756918907166, "learning_rate": 6.391597692607121e-05, "loss": 0.9544, "step": 128470 }, { "epoch": 0.8208220998428376, "grad_norm": 1.500656247138977, "learning_rate": 6.391115742706243e-05, "loss": 0.7166, "step": 128480 }, { "epoch": 0.8208859869925763, "grad_norm": 0.9322123527526855, "learning_rate": 6.390633778795626e-05, "loss": 0.8543, "step": 128490 }, { "epoch": 0.820949874142315, "grad_norm": 0.8133841753005981, "learning_rate": 6.390151800880124e-05, "loss": 0.935, "step": 128500 }, { "epoch": 0.8210137612920537, "grad_norm": 1.0100511312484741, "learning_rate": 6.38966980896459e-05, "loss": 0.7153, "step": 128510 }, { "epoch": 0.8210776484417924, "grad_norm": 1.2132251262664795, "learning_rate": 6.38918780305388e-05, "loss": 0.8784, "step": 128520 }, { "epoch": 0.8211415355915311, "grad_norm": 0.8456812500953674, "learning_rate": 6.388705783152846e-05, "loss": 1.0612, "step": 128530 }, { "epoch": 0.8212054227412698, "grad_norm": 0.9363921284675598, "learning_rate": 6.388223749266344e-05, "loss": 1.0582, "step": 128540 }, { "epoch": 0.8212693098910085, "grad_norm": 2.2319607734680176, "learning_rate": 6.387741701399228e-05, "loss": 0.717, "step": 128550 }, { "epoch": 0.8213331970407473, "grad_norm": 1.1460880041122437, "learning_rate": 6.387259639556352e-05, "loss": 0.8053, "step": 128560 }, { "epoch": 0.821397084190486, "grad_norm": 0.7786633372306824, "learning_rate": 6.386777563742571e-05, "loss": 0.9295, "step": 128570 }, { "epoch": 0.8214609713402247, "grad_norm": 0.8172239661216736, "learning_rate": 6.38629547396274e-05, "loss": 0.8758, "step": 128580 }, { "epoch": 0.8215248584899634, "grad_norm": 0.6958255171775818, "learning_rate": 6.385813370221716e-05, "loss": 0.8538, "step": 128590 }, { "epoch": 0.8215887456397021, "grad_norm": 0.9050196409225464, "learning_rate": 6.38533125252435e-05, "loss": 0.8245, "step": 128600 }, { "epoch": 0.8216526327894408, "grad_norm": 0.9731044769287109, "learning_rate": 6.384849120875502e-05, "loss": 0.7545, "step": 128610 }, { "epoch": 0.8217165199391794, "grad_norm": 1.5019901990890503, "learning_rate": 6.384366975280024e-05, "loss": 0.8528, "step": 128620 }, { "epoch": 0.8217804070889181, "grad_norm": 0.9366435408592224, "learning_rate": 6.383884815742772e-05, "loss": 0.8599, "step": 128630 }, { "epoch": 0.8218442942386568, "grad_norm": 0.6195958852767944, "learning_rate": 6.383402642268603e-05, "loss": 0.8433, "step": 128640 }, { "epoch": 0.8219081813883955, "grad_norm": 0.6187912225723267, "learning_rate": 6.382920454862374e-05, "loss": 0.9079, "step": 128650 }, { "epoch": 0.8219720685381342, "grad_norm": 1.1059194803237915, "learning_rate": 6.382438253528939e-05, "loss": 0.7321, "step": 128660 }, { "epoch": 0.8220359556878729, "grad_norm": 0.8085727095603943, "learning_rate": 6.381956038273156e-05, "loss": 0.9373, "step": 128670 }, { "epoch": 0.8220998428376116, "grad_norm": 2.0732922554016113, "learning_rate": 6.381473809099878e-05, "loss": 0.8538, "step": 128680 }, { "epoch": 0.8221637299873503, "grad_norm": 0.6032936573028564, "learning_rate": 6.380991566013966e-05, "loss": 0.7414, "step": 128690 }, { "epoch": 0.822227617137089, "grad_norm": 1.1853028535842896, "learning_rate": 6.380509309020272e-05, "loss": 0.8288, "step": 128700 }, { "epoch": 0.8222915042868277, "grad_norm": 0.6363354325294495, "learning_rate": 6.380027038123654e-05, "loss": 0.8664, "step": 128710 }, { "epoch": 0.8223553914365664, "grad_norm": 1.0098567008972168, "learning_rate": 6.379544753328973e-05, "loss": 1.0163, "step": 128720 }, { "epoch": 0.8224192785863051, "grad_norm": 2.520233631134033, "learning_rate": 6.379062454641081e-05, "loss": 0.7209, "step": 128730 }, { "epoch": 0.8224831657360439, "grad_norm": 1.0344457626342773, "learning_rate": 6.378580142064838e-05, "loss": 0.8217, "step": 128740 }, { "epoch": 0.8225470528857826, "grad_norm": 0.8147494792938232, "learning_rate": 6.378097815605099e-05, "loss": 0.7642, "step": 128750 }, { "epoch": 0.8226109400355213, "grad_norm": 1.220113754272461, "learning_rate": 6.377615475266724e-05, "loss": 0.9745, "step": 128760 }, { "epoch": 0.82267482718526, "grad_norm": 0.7933838367462158, "learning_rate": 6.377133121054571e-05, "loss": 0.6175, "step": 128770 }, { "epoch": 0.8227387143349987, "grad_norm": 1.4493178129196167, "learning_rate": 6.376650752973493e-05, "loss": 0.8895, "step": 128780 }, { "epoch": 0.8228026014847374, "grad_norm": 0.9888495802879333, "learning_rate": 6.376168371028351e-05, "loss": 0.9483, "step": 128790 }, { "epoch": 0.8228664886344761, "grad_norm": 1.066873550415039, "learning_rate": 6.375685975224004e-05, "loss": 0.9853, "step": 128800 }, { "epoch": 0.8229303757842148, "grad_norm": 0.903079092502594, "learning_rate": 6.375203565565308e-05, "loss": 0.9505, "step": 128810 }, { "epoch": 0.8229942629339535, "grad_norm": 1.5976263284683228, "learning_rate": 6.374721142057125e-05, "loss": 0.9695, "step": 128820 }, { "epoch": 0.8230581500836922, "grad_norm": 0.6900495886802673, "learning_rate": 6.374238704704308e-05, "loss": 1.0342, "step": 128830 }, { "epoch": 0.8231220372334309, "grad_norm": 0.9816431403160095, "learning_rate": 6.37375625351172e-05, "loss": 0.9036, "step": 128840 }, { "epoch": 0.8231859243831696, "grad_norm": 1.096856713294983, "learning_rate": 6.373273788484217e-05, "loss": 0.7925, "step": 128850 }, { "epoch": 0.8232498115329082, "grad_norm": 0.6943714022636414, "learning_rate": 6.37279130962666e-05, "loss": 0.836, "step": 128860 }, { "epoch": 0.8233136986826469, "grad_norm": 1.0958727598190308, "learning_rate": 6.372308816943908e-05, "loss": 0.8033, "step": 128870 }, { "epoch": 0.8233775858323856, "grad_norm": 0.9813776016235352, "learning_rate": 6.371826310440816e-05, "loss": 1.1422, "step": 128880 }, { "epoch": 0.8234414729821243, "grad_norm": 1.0429863929748535, "learning_rate": 6.371343790122249e-05, "loss": 0.9248, "step": 128890 }, { "epoch": 0.823505360131863, "grad_norm": 1.0575244426727295, "learning_rate": 6.370861255993062e-05, "loss": 1.0187, "step": 128900 }, { "epoch": 0.8235692472816017, "grad_norm": 1.6961395740509033, "learning_rate": 6.370378708058115e-05, "loss": 0.7942, "step": 128910 }, { "epoch": 0.8236331344313405, "grad_norm": 1.0016497373580933, "learning_rate": 6.36989614632227e-05, "loss": 0.8602, "step": 128920 }, { "epoch": 0.8236970215810792, "grad_norm": 1.1897591352462769, "learning_rate": 6.369413570790386e-05, "loss": 0.8869, "step": 128930 }, { "epoch": 0.8237609087308179, "grad_norm": 1.0436471700668335, "learning_rate": 6.368930981467323e-05, "loss": 0.9429, "step": 128940 }, { "epoch": 0.8238247958805566, "grad_norm": 0.9144713878631592, "learning_rate": 6.368448378357941e-05, "loss": 0.7866, "step": 128950 }, { "epoch": 0.8238886830302953, "grad_norm": 1.058947205543518, "learning_rate": 6.367965761467098e-05, "loss": 0.8541, "step": 128960 }, { "epoch": 0.823952570180034, "grad_norm": 0.9938645958900452, "learning_rate": 6.367483130799659e-05, "loss": 0.9613, "step": 128970 }, { "epoch": 0.8240164573297727, "grad_norm": 0.8759385943412781, "learning_rate": 6.36700048636048e-05, "loss": 1.0366, "step": 128980 }, { "epoch": 0.8240803444795114, "grad_norm": 0.4528246223926544, "learning_rate": 6.366517828154424e-05, "loss": 1.0067, "step": 128990 }, { "epoch": 0.8241442316292501, "grad_norm": 0.7166324257850647, "learning_rate": 6.36603515618635e-05, "loss": 0.8826, "step": 129000 }, { "epoch": 0.8242081187789888, "grad_norm": 0.7682775259017944, "learning_rate": 6.365552470461122e-05, "loss": 0.8674, "step": 129010 }, { "epoch": 0.8242720059287275, "grad_norm": 1.0459059476852417, "learning_rate": 6.3650697709836e-05, "loss": 0.7448, "step": 129020 }, { "epoch": 0.8243358930784662, "grad_norm": 0.9293308854103088, "learning_rate": 6.364587057758642e-05, "loss": 1.0024, "step": 129030 }, { "epoch": 0.8243997802282049, "grad_norm": 0.9621434807777405, "learning_rate": 6.364104330791113e-05, "loss": 0.853, "step": 129040 }, { "epoch": 0.8244636673779436, "grad_norm": 0.5361142158508301, "learning_rate": 6.363621590085873e-05, "loss": 0.904, "step": 129050 }, { "epoch": 0.8245275545276823, "grad_norm": 1.060625672340393, "learning_rate": 6.363138835647784e-05, "loss": 0.9679, "step": 129060 }, { "epoch": 0.824591441677421, "grad_norm": 0.9136219024658203, "learning_rate": 6.362656067481708e-05, "loss": 1.0068, "step": 129070 }, { "epoch": 0.8246553288271598, "grad_norm": 0.9109070301055908, "learning_rate": 6.362173285592507e-05, "loss": 0.7127, "step": 129080 }, { "epoch": 0.8247192159768985, "grad_norm": 0.6838903427124023, "learning_rate": 6.361690489985041e-05, "loss": 0.806, "step": 129090 }, { "epoch": 0.8247831031266372, "grad_norm": 0.5887237787246704, "learning_rate": 6.361207680664174e-05, "loss": 0.6983, "step": 129100 }, { "epoch": 0.8248469902763758, "grad_norm": 1.1329760551452637, "learning_rate": 6.36072485763477e-05, "loss": 1.0387, "step": 129110 }, { "epoch": 0.8249108774261145, "grad_norm": 1.2478314638137817, "learning_rate": 6.360242020901688e-05, "loss": 0.8467, "step": 129120 }, { "epoch": 0.8249747645758532, "grad_norm": 1.1845048666000366, "learning_rate": 6.359759170469791e-05, "loss": 0.9406, "step": 129130 }, { "epoch": 0.8250386517255919, "grad_norm": 0.9721893668174744, "learning_rate": 6.359276306343944e-05, "loss": 0.9239, "step": 129140 }, { "epoch": 0.8251025388753306, "grad_norm": 0.7293797135353088, "learning_rate": 6.358793428529008e-05, "loss": 1.0397, "step": 129150 }, { "epoch": 0.8251664260250693, "grad_norm": 1.1368262767791748, "learning_rate": 6.358310537029847e-05, "loss": 0.7218, "step": 129160 }, { "epoch": 0.825230313174808, "grad_norm": 1.327905535697937, "learning_rate": 6.357827631851324e-05, "loss": 0.716, "step": 129170 }, { "epoch": 0.8252942003245467, "grad_norm": 1.3848083019256592, "learning_rate": 6.357344712998302e-05, "loss": 0.8165, "step": 129180 }, { "epoch": 0.8253580874742854, "grad_norm": 0.8067914843559265, "learning_rate": 6.356861780475645e-05, "loss": 0.8678, "step": 129190 }, { "epoch": 0.8254219746240241, "grad_norm": 0.8428422212600708, "learning_rate": 6.356378834288216e-05, "loss": 0.9308, "step": 129200 }, { "epoch": 0.8254858617737628, "grad_norm": 0.8563640713691711, "learning_rate": 6.355895874440878e-05, "loss": 1.295, "step": 129210 }, { "epoch": 0.8255497489235015, "grad_norm": 0.9378432035446167, "learning_rate": 6.355412900938496e-05, "loss": 1.1254, "step": 129220 }, { "epoch": 0.8256136360732402, "grad_norm": 1.2021596431732178, "learning_rate": 6.354929913785932e-05, "loss": 1.1068, "step": 129230 }, { "epoch": 0.8256775232229789, "grad_norm": 0.6594062447547913, "learning_rate": 6.354446912988053e-05, "loss": 0.7613, "step": 129240 }, { "epoch": 0.8257414103727176, "grad_norm": 0.6651197671890259, "learning_rate": 6.353963898549723e-05, "loss": 0.684, "step": 129250 }, { "epoch": 0.8258052975224563, "grad_norm": 0.7577025890350342, "learning_rate": 6.353480870475805e-05, "loss": 0.639, "step": 129260 }, { "epoch": 0.8258691846721951, "grad_norm": 1.147277593612671, "learning_rate": 6.352997828771162e-05, "loss": 1.1541, "step": 129270 }, { "epoch": 0.8259330718219338, "grad_norm": 0.7437235116958618, "learning_rate": 6.35251477344066e-05, "loss": 0.9665, "step": 129280 }, { "epoch": 0.8259969589716725, "grad_norm": 1.1867269277572632, "learning_rate": 6.352031704489166e-05, "loss": 1.0024, "step": 129290 }, { "epoch": 0.8260608461214112, "grad_norm": 0.9756374359130859, "learning_rate": 6.351548621921542e-05, "loss": 0.9174, "step": 129300 }, { "epoch": 0.8261247332711499, "grad_norm": 0.6619752049446106, "learning_rate": 6.351065525742655e-05, "loss": 1.0489, "step": 129310 }, { "epoch": 0.8261886204208886, "grad_norm": 1.198135256767273, "learning_rate": 6.350582415957367e-05, "loss": 0.8207, "step": 129320 }, { "epoch": 0.8262525075706273, "grad_norm": 1.183821678161621, "learning_rate": 6.350099292570547e-05, "loss": 1.1444, "step": 129330 }, { "epoch": 0.826316394720366, "grad_norm": 0.8684186935424805, "learning_rate": 6.349616155587059e-05, "loss": 0.7594, "step": 129340 }, { "epoch": 0.8263802818701046, "grad_norm": 0.7701572775840759, "learning_rate": 6.34913300501177e-05, "loss": 0.7771, "step": 129350 }, { "epoch": 0.8264441690198433, "grad_norm": 0.8787345290184021, "learning_rate": 6.348649840849543e-05, "loss": 0.9283, "step": 129360 }, { "epoch": 0.826508056169582, "grad_norm": 0.7705711126327515, "learning_rate": 6.348166663105247e-05, "loss": 0.908, "step": 129370 }, { "epoch": 0.8265719433193207, "grad_norm": 0.7562994956970215, "learning_rate": 6.347683471783744e-05, "loss": 0.8175, "step": 129380 }, { "epoch": 0.8266358304690594, "grad_norm": 0.6645038723945618, "learning_rate": 6.347200266889904e-05, "loss": 1.1337, "step": 129390 }, { "epoch": 0.8266997176187981, "grad_norm": 0.8476589322090149, "learning_rate": 6.346717048428592e-05, "loss": 0.8446, "step": 129400 }, { "epoch": 0.8267636047685368, "grad_norm": 0.7244347333908081, "learning_rate": 6.346233816404674e-05, "loss": 0.8702, "step": 129410 }, { "epoch": 0.8268274919182755, "grad_norm": 1.6287295818328857, "learning_rate": 6.345750570823017e-05, "loss": 0.7862, "step": 129420 }, { "epoch": 0.8268913790680142, "grad_norm": 0.9641634821891785, "learning_rate": 6.345267311688486e-05, "loss": 0.8193, "step": 129430 }, { "epoch": 0.826955266217753, "grad_norm": 0.7857980728149414, "learning_rate": 6.344784039005951e-05, "loss": 1.0366, "step": 129440 }, { "epoch": 0.8270191533674917, "grad_norm": 1.0319744348526, "learning_rate": 6.344300752780277e-05, "loss": 0.72, "step": 129450 }, { "epoch": 0.8270830405172304, "grad_norm": 1.3282662630081177, "learning_rate": 6.343817453016332e-05, "loss": 0.7655, "step": 129460 }, { "epoch": 0.8271469276669691, "grad_norm": 0.7755960822105408, "learning_rate": 6.343334139718982e-05, "loss": 0.7769, "step": 129470 }, { "epoch": 0.8272108148167078, "grad_norm": 1.4779821634292603, "learning_rate": 6.342850812893094e-05, "loss": 0.7363, "step": 129480 }, { "epoch": 0.8272747019664465, "grad_norm": 0.5803090929985046, "learning_rate": 6.342367472543537e-05, "loss": 0.7171, "step": 129490 }, { "epoch": 0.8273385891161852, "grad_norm": 0.5924243330955505, "learning_rate": 6.34188411867518e-05, "loss": 0.8442, "step": 129500 }, { "epoch": 0.8274024762659239, "grad_norm": 1.4736064672470093, "learning_rate": 6.341400751292888e-05, "loss": 0.8249, "step": 129510 }, { "epoch": 0.8274663634156626, "grad_norm": 0.9478740692138672, "learning_rate": 6.34091737040153e-05, "loss": 0.9925, "step": 129520 }, { "epoch": 0.8275302505654013, "grad_norm": 1.2648018598556519, "learning_rate": 6.340433976005975e-05, "loss": 0.8581, "step": 129530 }, { "epoch": 0.82759413771514, "grad_norm": 0.4751732349395752, "learning_rate": 6.339950568111088e-05, "loss": 0.8622, "step": 129540 }, { "epoch": 0.8276580248648787, "grad_norm": 0.8093522191047668, "learning_rate": 6.339467146721741e-05, "loss": 0.9958, "step": 129550 }, { "epoch": 0.8277219120146174, "grad_norm": 0.8439111709594727, "learning_rate": 6.3389837118428e-05, "loss": 0.9307, "step": 129560 }, { "epoch": 0.8277857991643561, "grad_norm": 0.615764319896698, "learning_rate": 6.338500263479136e-05, "loss": 0.9045, "step": 129570 }, { "epoch": 0.8278496863140948, "grad_norm": 0.8498043417930603, "learning_rate": 6.338016801635615e-05, "loss": 0.9691, "step": 129580 }, { "epoch": 0.8279135734638334, "grad_norm": 1.2179980278015137, "learning_rate": 6.337533326317108e-05, "loss": 0.6736, "step": 129590 }, { "epoch": 0.8279774606135721, "grad_norm": 0.5712013244628906, "learning_rate": 6.337049837528483e-05, "loss": 0.8732, "step": 129600 }, { "epoch": 0.8280413477633108, "grad_norm": 1.027066946029663, "learning_rate": 6.336566335274609e-05, "loss": 0.8487, "step": 129610 }, { "epoch": 0.8281052349130495, "grad_norm": 0.8761221170425415, "learning_rate": 6.336082819560357e-05, "loss": 0.8251, "step": 129620 }, { "epoch": 0.8281691220627883, "grad_norm": 0.9286890625953674, "learning_rate": 6.335599290390595e-05, "loss": 0.8774, "step": 129630 }, { "epoch": 0.828233009212527, "grad_norm": 1.0241472721099854, "learning_rate": 6.335115747770192e-05, "loss": 1.0086, "step": 129640 }, { "epoch": 0.8282968963622657, "grad_norm": 2.582179069519043, "learning_rate": 6.334632191704018e-05, "loss": 1.0137, "step": 129650 }, { "epoch": 0.8283607835120044, "grad_norm": 0.8017721176147461, "learning_rate": 6.334148622196945e-05, "loss": 0.8961, "step": 129660 }, { "epoch": 0.8284246706617431, "grad_norm": 1.0686819553375244, "learning_rate": 6.33366503925384e-05, "loss": 1.0203, "step": 129670 }, { "epoch": 0.8284885578114818, "grad_norm": 0.6839226484298706, "learning_rate": 6.333181442879573e-05, "loss": 1.018, "step": 129680 }, { "epoch": 0.8285524449612205, "grad_norm": 1.2881569862365723, "learning_rate": 6.332697833079017e-05, "loss": 0.7736, "step": 129690 }, { "epoch": 0.8286163321109592, "grad_norm": 1.0879745483398438, "learning_rate": 6.33221420985704e-05, "loss": 0.8662, "step": 129700 }, { "epoch": 0.8286802192606979, "grad_norm": 0.7041977643966675, "learning_rate": 6.331730573218514e-05, "loss": 0.6425, "step": 129710 }, { "epoch": 0.8287441064104366, "grad_norm": 1.2785156965255737, "learning_rate": 6.33124692316831e-05, "loss": 1.1758, "step": 129720 }, { "epoch": 0.8288079935601753, "grad_norm": 1.3215516805648804, "learning_rate": 6.330763259711295e-05, "loss": 0.901, "step": 129730 }, { "epoch": 0.828871880709914, "grad_norm": 0.8521394729614258, "learning_rate": 6.330279582852347e-05, "loss": 0.9565, "step": 129740 }, { "epoch": 0.8289357678596527, "grad_norm": 0.8731998205184937, "learning_rate": 6.32979589259633e-05, "loss": 0.9697, "step": 129750 }, { "epoch": 0.8289996550093914, "grad_norm": 0.8813868761062622, "learning_rate": 6.329312188948118e-05, "loss": 1.3262, "step": 129760 }, { "epoch": 0.8290635421591301, "grad_norm": 0.9669440984725952, "learning_rate": 6.328828471912582e-05, "loss": 1.0292, "step": 129770 }, { "epoch": 0.8291274293088688, "grad_norm": 1.7891266345977783, "learning_rate": 6.328344741494594e-05, "loss": 1.0843, "step": 129780 }, { "epoch": 0.8291913164586076, "grad_norm": 2.168109655380249, "learning_rate": 6.327860997699025e-05, "loss": 0.9686, "step": 129790 }, { "epoch": 0.8292552036083463, "grad_norm": 1.232346773147583, "learning_rate": 6.327377240530747e-05, "loss": 0.8645, "step": 129800 }, { "epoch": 0.829319090758085, "grad_norm": 0.9927831292152405, "learning_rate": 6.326893469994633e-05, "loss": 1.1181, "step": 129810 }, { "epoch": 0.8293829779078237, "grad_norm": 0.6466804146766663, "learning_rate": 6.326409686095553e-05, "loss": 0.9161, "step": 129820 }, { "epoch": 0.8294468650575624, "grad_norm": 1.5198026895523071, "learning_rate": 6.325925888838379e-05, "loss": 1.1528, "step": 129830 }, { "epoch": 0.829510752207301, "grad_norm": 0.6202889084815979, "learning_rate": 6.325442078227986e-05, "loss": 0.9987, "step": 129840 }, { "epoch": 0.8295746393570397, "grad_norm": 0.7182409167289734, "learning_rate": 6.324958254269243e-05, "loss": 0.9878, "step": 129850 }, { "epoch": 0.8296385265067784, "grad_norm": 0.626463770866394, "learning_rate": 6.324474416967024e-05, "loss": 1.0748, "step": 129860 }, { "epoch": 0.8297024136565171, "grad_norm": 1.071711540222168, "learning_rate": 6.323990566326203e-05, "loss": 1.0839, "step": 129870 }, { "epoch": 0.8297663008062558, "grad_norm": 0.5460740327835083, "learning_rate": 6.323506702351651e-05, "loss": 1.0518, "step": 129880 }, { "epoch": 0.8298301879559945, "grad_norm": 0.813046395778656, "learning_rate": 6.323022825048243e-05, "loss": 1.0735, "step": 129890 }, { "epoch": 0.8298940751057332, "grad_norm": 1.204576849937439, "learning_rate": 6.322538934420849e-05, "loss": 0.9745, "step": 129900 }, { "epoch": 0.8299579622554719, "grad_norm": 0.6870941519737244, "learning_rate": 6.322055030474345e-05, "loss": 0.7515, "step": 129910 }, { "epoch": 0.8300218494052106, "grad_norm": 0.5987921357154846, "learning_rate": 6.321571113213602e-05, "loss": 0.8308, "step": 129920 }, { "epoch": 0.8300857365549493, "grad_norm": 0.8666192889213562, "learning_rate": 6.321087182643495e-05, "loss": 1.0491, "step": 129930 }, { "epoch": 0.830149623704688, "grad_norm": 0.699948251247406, "learning_rate": 6.320603238768896e-05, "loss": 0.8764, "step": 129940 }, { "epoch": 0.8302135108544267, "grad_norm": 0.7432700991630554, "learning_rate": 6.320119281594681e-05, "loss": 0.7359, "step": 129950 }, { "epoch": 0.8302773980041654, "grad_norm": 0.7470651268959045, "learning_rate": 6.319635311125722e-05, "loss": 0.9569, "step": 129960 }, { "epoch": 0.8303412851539042, "grad_norm": 1.7274110317230225, "learning_rate": 6.319151327366894e-05, "loss": 0.6801, "step": 129970 }, { "epoch": 0.8304051723036429, "grad_norm": 1.1418910026550293, "learning_rate": 6.318667330323074e-05, "loss": 0.9272, "step": 129980 }, { "epoch": 0.8304690594533816, "grad_norm": 0.478630930185318, "learning_rate": 6.31818331999913e-05, "loss": 0.5905, "step": 129990 }, { "epoch": 0.8305329466031203, "grad_norm": 1.0310248136520386, "learning_rate": 6.317699296399939e-05, "loss": 1.3088, "step": 130000 }, { "epoch": 0.830596833752859, "grad_norm": 1.2773408889770508, "learning_rate": 6.317215259530377e-05, "loss": 0.7674, "step": 130010 }, { "epoch": 0.8306607209025977, "grad_norm": 0.8546721339225769, "learning_rate": 6.316731209395318e-05, "loss": 0.8664, "step": 130020 }, { "epoch": 0.8307246080523364, "grad_norm": 1.4127484560012817, "learning_rate": 6.316247145999636e-05, "loss": 0.7683, "step": 130030 }, { "epoch": 0.8307884952020751, "grad_norm": 1.9064104557037354, "learning_rate": 6.315763069348208e-05, "loss": 0.865, "step": 130040 }, { "epoch": 0.8308523823518138, "grad_norm": 1.1948989629745483, "learning_rate": 6.315278979445906e-05, "loss": 0.9118, "step": 130050 }, { "epoch": 0.8309162695015525, "grad_norm": 1.745757818222046, "learning_rate": 6.314794876297607e-05, "loss": 0.8204, "step": 130060 }, { "epoch": 0.8309801566512912, "grad_norm": 0.840900719165802, "learning_rate": 6.314310759908187e-05, "loss": 1.0959, "step": 130070 }, { "epoch": 0.8310440438010298, "grad_norm": 1.8418869972229004, "learning_rate": 6.313826630282521e-05, "loss": 1.1114, "step": 130080 }, { "epoch": 0.8311079309507685, "grad_norm": 1.024966835975647, "learning_rate": 6.313342487425483e-05, "loss": 0.7167, "step": 130090 }, { "epoch": 0.8311718181005072, "grad_norm": 0.7477736473083496, "learning_rate": 6.312858331341951e-05, "loss": 0.971, "step": 130100 }, { "epoch": 0.8312357052502459, "grad_norm": 0.9230465888977051, "learning_rate": 6.312374162036798e-05, "loss": 1.0544, "step": 130110 }, { "epoch": 0.8312995923999846, "grad_norm": 0.986064612865448, "learning_rate": 6.311889979514904e-05, "loss": 1.332, "step": 130120 }, { "epoch": 0.8313634795497233, "grad_norm": 0.894123375415802, "learning_rate": 6.311405783781141e-05, "loss": 1.1246, "step": 130130 }, { "epoch": 0.831427366699462, "grad_norm": 0.7910020351409912, "learning_rate": 6.310921574840389e-05, "loss": 0.7583, "step": 130140 }, { "epoch": 0.8314912538492008, "grad_norm": 1.064899206161499, "learning_rate": 6.310437352697522e-05, "loss": 0.7065, "step": 130150 }, { "epoch": 0.8315551409989395, "grad_norm": 0.8578174710273743, "learning_rate": 6.309953117357416e-05, "loss": 0.7956, "step": 130160 }, { "epoch": 0.8316190281486782, "grad_norm": 0.7391040325164795, "learning_rate": 6.30946886882495e-05, "loss": 0.801, "step": 130170 }, { "epoch": 0.8316829152984169, "grad_norm": 0.6705387830734253, "learning_rate": 6.308984607104999e-05, "loss": 0.7235, "step": 130180 }, { "epoch": 0.8317468024481556, "grad_norm": 0.5788788199424744, "learning_rate": 6.308500332202443e-05, "loss": 0.7199, "step": 130190 }, { "epoch": 0.8318106895978943, "grad_norm": 0.9009729027748108, "learning_rate": 6.308016044122153e-05, "loss": 0.7121, "step": 130200 }, { "epoch": 0.831874576747633, "grad_norm": 0.7175112962722778, "learning_rate": 6.307531742869012e-05, "loss": 0.7884, "step": 130210 }, { "epoch": 0.8319384638973717, "grad_norm": 0.9665817022323608, "learning_rate": 6.307047428447894e-05, "loss": 0.9997, "step": 130220 }, { "epoch": 0.8320023510471104, "grad_norm": 1.334688663482666, "learning_rate": 6.306563100863679e-05, "loss": 0.8198, "step": 130230 }, { "epoch": 0.8320662381968491, "grad_norm": 0.5525422096252441, "learning_rate": 6.306078760121243e-05, "loss": 0.8491, "step": 130240 }, { "epoch": 0.8321301253465878, "grad_norm": 0.7009544968605042, "learning_rate": 6.305594406225464e-05, "loss": 0.7649, "step": 130250 }, { "epoch": 0.8321940124963265, "grad_norm": 1.0132756233215332, "learning_rate": 6.305110039181219e-05, "loss": 0.8507, "step": 130260 }, { "epoch": 0.8322578996460652, "grad_norm": 1.0832351446151733, "learning_rate": 6.304625658993388e-05, "loss": 0.9625, "step": 130270 }, { "epoch": 0.8323217867958039, "grad_norm": 1.351954698562622, "learning_rate": 6.304141265666846e-05, "loss": 1.1904, "step": 130280 }, { "epoch": 0.8323856739455426, "grad_norm": 0.9040460586547852, "learning_rate": 6.303656859206475e-05, "loss": 1.0508, "step": 130290 }, { "epoch": 0.8324495610952813, "grad_norm": 2.278919219970703, "learning_rate": 6.30317243961715e-05, "loss": 0.8939, "step": 130300 }, { "epoch": 0.83251344824502, "grad_norm": 1.002038836479187, "learning_rate": 6.302688006903753e-05, "loss": 0.9378, "step": 130310 }, { "epoch": 0.8325773353947586, "grad_norm": 1.055732250213623, "learning_rate": 6.30220356107116e-05, "loss": 1.185, "step": 130320 }, { "epoch": 0.8326412225444974, "grad_norm": 0.8173093199729919, "learning_rate": 6.301719102124251e-05, "loss": 0.8708, "step": 130330 }, { "epoch": 0.8327051096942361, "grad_norm": 0.8317503333091736, "learning_rate": 6.301234630067902e-05, "loss": 0.8327, "step": 130340 }, { "epoch": 0.8327689968439748, "grad_norm": 0.7437106370925903, "learning_rate": 6.300750144906997e-05, "loss": 0.8685, "step": 130350 }, { "epoch": 0.8328328839937135, "grad_norm": 0.8775268793106079, "learning_rate": 6.300265646646413e-05, "loss": 0.7828, "step": 130360 }, { "epoch": 0.8328967711434522, "grad_norm": 0.8213568329811096, "learning_rate": 6.299781135291028e-05, "loss": 0.8267, "step": 130370 }, { "epoch": 0.8329606582931909, "grad_norm": 0.6800155639648438, "learning_rate": 6.299296610845721e-05, "loss": 0.752, "step": 130380 }, { "epoch": 0.8330245454429296, "grad_norm": 0.9931345582008362, "learning_rate": 6.298812073315375e-05, "loss": 0.7927, "step": 130390 }, { "epoch": 0.8330884325926683, "grad_norm": 0.6396980881690979, "learning_rate": 6.298327522704869e-05, "loss": 0.8008, "step": 130400 }, { "epoch": 0.833152319742407, "grad_norm": 1.1802109479904175, "learning_rate": 6.29784295901908e-05, "loss": 0.8763, "step": 130410 }, { "epoch": 0.8332162068921457, "grad_norm": 0.64454185962677, "learning_rate": 6.29735838226289e-05, "loss": 0.8682, "step": 130420 }, { "epoch": 0.8332800940418844, "grad_norm": 0.9595576524734497, "learning_rate": 6.296873792441179e-05, "loss": 0.7091, "step": 130430 }, { "epoch": 0.8333439811916231, "grad_norm": 0.9316465854644775, "learning_rate": 6.296389189558825e-05, "loss": 0.9267, "step": 130440 }, { "epoch": 0.8334078683413618, "grad_norm": 0.6795920133590698, "learning_rate": 6.295904573620712e-05, "loss": 0.9662, "step": 130450 }, { "epoch": 0.8334717554911005, "grad_norm": 0.8197570443153381, "learning_rate": 6.29541994463172e-05, "loss": 0.9202, "step": 130460 }, { "epoch": 0.8335356426408392, "grad_norm": 1.0630313158035278, "learning_rate": 6.294935302596727e-05, "loss": 1.089, "step": 130470 }, { "epoch": 0.8335995297905779, "grad_norm": 1.3294517993927002, "learning_rate": 6.294450647520616e-05, "loss": 0.9093, "step": 130480 }, { "epoch": 0.8336634169403166, "grad_norm": 1.1934936046600342, "learning_rate": 6.293965979408267e-05, "loss": 0.7484, "step": 130490 }, { "epoch": 0.8337273040900554, "grad_norm": 0.4786630868911743, "learning_rate": 6.29348129826456e-05, "loss": 0.8751, "step": 130500 }, { "epoch": 0.8337911912397941, "grad_norm": 1.0088316202163696, "learning_rate": 6.292996604094378e-05, "loss": 0.9007, "step": 130510 }, { "epoch": 0.8338550783895328, "grad_norm": 0.8918789029121399, "learning_rate": 6.292511896902602e-05, "loss": 0.625, "step": 130520 }, { "epoch": 0.8339189655392715, "grad_norm": 0.9836956262588501, "learning_rate": 6.292027176694112e-05, "loss": 1.0436, "step": 130530 }, { "epoch": 0.8339828526890102, "grad_norm": 0.6663161516189575, "learning_rate": 6.29154244347379e-05, "loss": 1.0225, "step": 130540 }, { "epoch": 0.8340467398387489, "grad_norm": 0.9562898278236389, "learning_rate": 6.29105769724652e-05, "loss": 1.0247, "step": 130550 }, { "epoch": 0.8341106269884875, "grad_norm": 0.8911415338516235, "learning_rate": 6.29057293801718e-05, "loss": 0.7822, "step": 130560 }, { "epoch": 0.8341745141382262, "grad_norm": 0.6790681481361389, "learning_rate": 6.290088165790658e-05, "loss": 0.8417, "step": 130570 }, { "epoch": 0.8342384012879649, "grad_norm": 0.6850691437721252, "learning_rate": 6.289603380571828e-05, "loss": 0.808, "step": 130580 }, { "epoch": 0.8343022884377036, "grad_norm": 0.5608751773834229, "learning_rate": 6.289118582365578e-05, "loss": 0.9613, "step": 130590 }, { "epoch": 0.8343661755874423, "grad_norm": 1.013020634651184, "learning_rate": 6.288633771176789e-05, "loss": 1.102, "step": 130600 }, { "epoch": 0.834430062737181, "grad_norm": 1.1625406742095947, "learning_rate": 6.288148947010342e-05, "loss": 0.7877, "step": 130610 }, { "epoch": 0.8344939498869197, "grad_norm": 1.0570250749588013, "learning_rate": 6.287664109871121e-05, "loss": 0.9154, "step": 130620 }, { "epoch": 0.8345578370366584, "grad_norm": 0.5707595348358154, "learning_rate": 6.287179259764008e-05, "loss": 0.7431, "step": 130630 }, { "epoch": 0.8346217241863971, "grad_norm": 1.400903344154358, "learning_rate": 6.286694396693888e-05, "loss": 0.648, "step": 130640 }, { "epoch": 0.8346856113361358, "grad_norm": 0.6360903382301331, "learning_rate": 6.286209520665641e-05, "loss": 0.9326, "step": 130650 }, { "epoch": 0.8347494984858745, "grad_norm": 1.020822286605835, "learning_rate": 6.285724631684153e-05, "loss": 0.9831, "step": 130660 }, { "epoch": 0.8348133856356132, "grad_norm": 0.9532088041305542, "learning_rate": 6.285239729754304e-05, "loss": 0.9734, "step": 130670 }, { "epoch": 0.834877272785352, "grad_norm": 1.0500184297561646, "learning_rate": 6.284754814880979e-05, "loss": 1.1216, "step": 130680 }, { "epoch": 0.8349411599350907, "grad_norm": 0.7444097399711609, "learning_rate": 6.284269887069061e-05, "loss": 0.6931, "step": 130690 }, { "epoch": 0.8350050470848294, "grad_norm": 1.272265076637268, "learning_rate": 6.283784946323435e-05, "loss": 0.9984, "step": 130700 }, { "epoch": 0.8350689342345681, "grad_norm": 3.133178949356079, "learning_rate": 6.283299992648985e-05, "loss": 1.0528, "step": 130710 }, { "epoch": 0.8351328213843068, "grad_norm": 0.9269975423812866, "learning_rate": 6.282815026050593e-05, "loss": 0.7731, "step": 130720 }, { "epoch": 0.8351967085340455, "grad_norm": 1.0282938480377197, "learning_rate": 6.282330046533144e-05, "loss": 0.9324, "step": 130730 }, { "epoch": 0.8352605956837842, "grad_norm": 0.8756586909294128, "learning_rate": 6.281845054101522e-05, "loss": 0.7738, "step": 130740 }, { "epoch": 0.8353244828335229, "grad_norm": 0.8821702599525452, "learning_rate": 6.28136004876061e-05, "loss": 0.9636, "step": 130750 }, { "epoch": 0.8353883699832616, "grad_norm": 0.8497743010520935, "learning_rate": 6.280875030515295e-05, "loss": 0.8596, "step": 130760 }, { "epoch": 0.8354522571330003, "grad_norm": 0.6755285263061523, "learning_rate": 6.28038999937046e-05, "loss": 0.8027, "step": 130770 }, { "epoch": 0.835516144282739, "grad_norm": 1.5908513069152832, "learning_rate": 6.279904955330991e-05, "loss": 1.274, "step": 130780 }, { "epoch": 0.8355800314324777, "grad_norm": 0.8962165713310242, "learning_rate": 6.279419898401772e-05, "loss": 0.8612, "step": 130790 }, { "epoch": 0.8356439185822164, "grad_norm": 1.3172128200531006, "learning_rate": 6.278934828587686e-05, "loss": 0.8991, "step": 130800 }, { "epoch": 0.835707805731955, "grad_norm": 0.7222731113433838, "learning_rate": 6.278449745893621e-05, "loss": 1.0712, "step": 130810 }, { "epoch": 0.8357716928816937, "grad_norm": 0.9182828664779663, "learning_rate": 6.27796465032446e-05, "loss": 1.2312, "step": 130820 }, { "epoch": 0.8358355800314324, "grad_norm": 0.9249597191810608, "learning_rate": 6.277479541885091e-05, "loss": 0.9577, "step": 130830 }, { "epoch": 0.8358994671811711, "grad_norm": 0.8450763821601868, "learning_rate": 6.276994420580397e-05, "loss": 0.8475, "step": 130840 }, { "epoch": 0.8359633543309098, "grad_norm": 0.8186871409416199, "learning_rate": 6.276509286415265e-05, "loss": 0.677, "step": 130850 }, { "epoch": 0.8360272414806486, "grad_norm": 0.7687765955924988, "learning_rate": 6.276024139394578e-05, "loss": 0.7434, "step": 130860 }, { "epoch": 0.8360911286303873, "grad_norm": 0.8875823616981506, "learning_rate": 6.275538979523227e-05, "loss": 1.1177, "step": 130870 }, { "epoch": 0.836155015780126, "grad_norm": 1.5310128927230835, "learning_rate": 6.275053806806093e-05, "loss": 0.8904, "step": 130880 }, { "epoch": 0.8362189029298647, "grad_norm": 0.9724732041358948, "learning_rate": 6.274568621248065e-05, "loss": 0.901, "step": 130890 }, { "epoch": 0.8362827900796034, "grad_norm": 0.7733339071273804, "learning_rate": 6.274083422854026e-05, "loss": 0.8989, "step": 130900 }, { "epoch": 0.8363466772293421, "grad_norm": 0.7655275464057922, "learning_rate": 6.273598211628867e-05, "loss": 0.9544, "step": 130910 }, { "epoch": 0.8364105643790808, "grad_norm": 0.7450165748596191, "learning_rate": 6.273112987577472e-05, "loss": 0.5685, "step": 130920 }, { "epoch": 0.8364744515288195, "grad_norm": 0.9278712868690491, "learning_rate": 6.272627750704727e-05, "loss": 0.9439, "step": 130930 }, { "epoch": 0.8365383386785582, "grad_norm": 0.9437885284423828, "learning_rate": 6.272142501015521e-05, "loss": 0.8225, "step": 130940 }, { "epoch": 0.8366022258282969, "grad_norm": 2.503553628921509, "learning_rate": 6.27165723851474e-05, "loss": 1.0931, "step": 130950 }, { "epoch": 0.8366661129780356, "grad_norm": 0.7560781836509705, "learning_rate": 6.271171963207269e-05, "loss": 0.8088, "step": 130960 }, { "epoch": 0.8367300001277743, "grad_norm": 0.9613698124885559, "learning_rate": 6.270686675097997e-05, "loss": 0.8562, "step": 130970 }, { "epoch": 0.836793887277513, "grad_norm": 1.2895517349243164, "learning_rate": 6.27020137419181e-05, "loss": 0.9257, "step": 130980 }, { "epoch": 0.8368577744272517, "grad_norm": 0.9719459414482117, "learning_rate": 6.269716060493597e-05, "loss": 0.9031, "step": 130990 }, { "epoch": 0.8369216615769904, "grad_norm": 1.973030924797058, "learning_rate": 6.269230734008245e-05, "loss": 0.8468, "step": 131000 }, { "epoch": 0.8369855487267291, "grad_norm": 0.9847891330718994, "learning_rate": 6.26874539474064e-05, "loss": 0.7736, "step": 131010 }, { "epoch": 0.8370494358764679, "grad_norm": 0.9893288016319275, "learning_rate": 6.268260042695672e-05, "loss": 0.7408, "step": 131020 }, { "epoch": 0.8371133230262066, "grad_norm": 0.742874026298523, "learning_rate": 6.267823214934596e-05, "loss": 1.013, "step": 131030 }, { "epoch": 0.8371772101759453, "grad_norm": 1.0840404033660889, "learning_rate": 6.267337838626103e-05, "loss": 1.0696, "step": 131040 }, { "epoch": 0.8372410973256839, "grad_norm": 1.0095386505126953, "learning_rate": 6.266852449554422e-05, "loss": 0.9302, "step": 131050 }, { "epoch": 0.8373049844754226, "grad_norm": 1.0238926410675049, "learning_rate": 6.266367047724442e-05, "loss": 0.6575, "step": 131060 }, { "epoch": 0.8373688716251613, "grad_norm": 1.125106930732727, "learning_rate": 6.265881633141049e-05, "loss": 0.891, "step": 131070 }, { "epoch": 0.8374327587749, "grad_norm": 0.716673731803894, "learning_rate": 6.265396205809132e-05, "loss": 0.7425, "step": 131080 }, { "epoch": 0.8374966459246387, "grad_norm": 1.2633848190307617, "learning_rate": 6.264910765733582e-05, "loss": 0.9292, "step": 131090 }, { "epoch": 0.8375605330743774, "grad_norm": 1.3997453451156616, "learning_rate": 6.264425312919288e-05, "loss": 0.6964, "step": 131100 }, { "epoch": 0.8376244202241161, "grad_norm": 1.081032156944275, "learning_rate": 6.263939847371134e-05, "loss": 0.885, "step": 131110 }, { "epoch": 0.8376883073738548, "grad_norm": 0.6516674757003784, "learning_rate": 6.263454369094014e-05, "loss": 0.7648, "step": 131120 }, { "epoch": 0.8377521945235935, "grad_norm": 0.8595327138900757, "learning_rate": 6.262968878092814e-05, "loss": 0.7661, "step": 131130 }, { "epoch": 0.8378160816733322, "grad_norm": 1.0388331413269043, "learning_rate": 6.262483374372426e-05, "loss": 0.6961, "step": 131140 }, { "epoch": 0.8378799688230709, "grad_norm": 1.5679370164871216, "learning_rate": 6.261997857937738e-05, "loss": 0.8494, "step": 131150 }, { "epoch": 0.8379438559728096, "grad_norm": 1.0286293029785156, "learning_rate": 6.261512328793639e-05, "loss": 0.8269, "step": 131160 }, { "epoch": 0.8380077431225483, "grad_norm": 1.3701919317245483, "learning_rate": 6.261026786945021e-05, "loss": 0.7509, "step": 131170 }, { "epoch": 0.838071630272287, "grad_norm": 0.7950007915496826, "learning_rate": 6.260541232396771e-05, "loss": 1.3381, "step": 131180 }, { "epoch": 0.8381355174220257, "grad_norm": 0.7836694717407227, "learning_rate": 6.26005566515378e-05, "loss": 0.9361, "step": 131190 }, { "epoch": 0.8381994045717645, "grad_norm": 0.7753838300704956, "learning_rate": 6.259570085220939e-05, "loss": 0.8256, "step": 131200 }, { "epoch": 0.8382632917215032, "grad_norm": 0.8219996690750122, "learning_rate": 6.259084492603138e-05, "loss": 0.7421, "step": 131210 }, { "epoch": 0.8383271788712419, "grad_norm": 1.0222669839859009, "learning_rate": 6.258598887305265e-05, "loss": 0.6588, "step": 131220 }, { "epoch": 0.8383910660209806, "grad_norm": 0.9310179352760315, "learning_rate": 6.258113269332215e-05, "loss": 0.9836, "step": 131230 }, { "epoch": 0.8384549531707193, "grad_norm": 0.8922819495201111, "learning_rate": 6.257627638688875e-05, "loss": 0.8864, "step": 131240 }, { "epoch": 0.838518840320458, "grad_norm": 1.2268577814102173, "learning_rate": 6.257141995380136e-05, "loss": 1.1079, "step": 131250 }, { "epoch": 0.8385827274701967, "grad_norm": 0.7743911743164062, "learning_rate": 6.25665633941089e-05, "loss": 0.8269, "step": 131260 }, { "epoch": 0.8386466146199354, "grad_norm": 2.4467434883117676, "learning_rate": 6.256170670786028e-05, "loss": 1.1068, "step": 131270 }, { "epoch": 0.8387105017696741, "grad_norm": 1.4067353010177612, "learning_rate": 6.25568498951044e-05, "loss": 0.7124, "step": 131280 }, { "epoch": 0.8387743889194127, "grad_norm": 0.9311875700950623, "learning_rate": 6.255199295589018e-05, "loss": 1.0478, "step": 131290 }, { "epoch": 0.8388382760691514, "grad_norm": 0.8435894250869751, "learning_rate": 6.254713589026652e-05, "loss": 0.758, "step": 131300 }, { "epoch": 0.8389021632188901, "grad_norm": 1.4599822759628296, "learning_rate": 6.254227869828237e-05, "loss": 1.0697, "step": 131310 }, { "epoch": 0.8389660503686288, "grad_norm": 0.8313419818878174, "learning_rate": 6.253742137998661e-05, "loss": 1.1126, "step": 131320 }, { "epoch": 0.8390299375183675, "grad_norm": 0.9159870743751526, "learning_rate": 6.253256393542817e-05, "loss": 0.8222, "step": 131330 }, { "epoch": 0.8390938246681062, "grad_norm": 1.2503025531768799, "learning_rate": 6.252770636465597e-05, "loss": 0.9694, "step": 131340 }, { "epoch": 0.8391577118178449, "grad_norm": 0.7653509974479675, "learning_rate": 6.252284866771894e-05, "loss": 0.9817, "step": 131350 }, { "epoch": 0.8392215989675836, "grad_norm": 0.9600273370742798, "learning_rate": 6.251799084466596e-05, "loss": 0.9824, "step": 131360 }, { "epoch": 0.8392854861173223, "grad_norm": 0.5949135422706604, "learning_rate": 6.251313289554601e-05, "loss": 0.9494, "step": 131370 }, { "epoch": 0.839349373267061, "grad_norm": 0.6030875444412231, "learning_rate": 6.250827482040797e-05, "loss": 0.7904, "step": 131380 }, { "epoch": 0.8394132604167998, "grad_norm": 0.9871885180473328, "learning_rate": 6.25034166193008e-05, "loss": 0.7854, "step": 131390 }, { "epoch": 0.8394771475665385, "grad_norm": 1.0872247219085693, "learning_rate": 6.24985582922734e-05, "loss": 1.0537, "step": 131400 }, { "epoch": 0.8395410347162772, "grad_norm": 0.9975283741950989, "learning_rate": 6.24936998393747e-05, "loss": 0.8564, "step": 131410 }, { "epoch": 0.8396049218660159, "grad_norm": 1.5955655574798584, "learning_rate": 6.248884126065364e-05, "loss": 0.7626, "step": 131420 }, { "epoch": 0.8396688090157546, "grad_norm": 0.8411442041397095, "learning_rate": 6.248398255615913e-05, "loss": 1.0739, "step": 131430 }, { "epoch": 0.8397326961654933, "grad_norm": 0.9352008700370789, "learning_rate": 6.247912372594013e-05, "loss": 0.7532, "step": 131440 }, { "epoch": 0.839796583315232, "grad_norm": 1.1712517738342285, "learning_rate": 6.247426477004555e-05, "loss": 0.7926, "step": 131450 }, { "epoch": 0.8398604704649707, "grad_norm": 0.6964886784553528, "learning_rate": 6.246940568852435e-05, "loss": 0.7795, "step": 131460 }, { "epoch": 0.8399243576147094, "grad_norm": 0.7911357283592224, "learning_rate": 6.246454648142542e-05, "loss": 1.0756, "step": 131470 }, { "epoch": 0.8399882447644481, "grad_norm": 0.9874993562698364, "learning_rate": 6.245968714879773e-05, "loss": 1.0366, "step": 131480 }, { "epoch": 0.8400521319141868, "grad_norm": 0.5494539141654968, "learning_rate": 6.245482769069023e-05, "loss": 0.8724, "step": 131490 }, { "epoch": 0.8401160190639255, "grad_norm": 0.962289571762085, "learning_rate": 6.244996810715183e-05, "loss": 0.8694, "step": 131500 }, { "epoch": 0.8401799062136642, "grad_norm": 0.762205183506012, "learning_rate": 6.244510839823147e-05, "loss": 0.8485, "step": 131510 }, { "epoch": 0.8402437933634029, "grad_norm": 0.9968754649162292, "learning_rate": 6.244024856397812e-05, "loss": 0.7702, "step": 131520 }, { "epoch": 0.8403076805131416, "grad_norm": 1.0157196521759033, "learning_rate": 6.24353886044407e-05, "loss": 0.7992, "step": 131530 }, { "epoch": 0.8403715676628802, "grad_norm": 1.0793529748916626, "learning_rate": 6.243052851966816e-05, "loss": 0.8393, "step": 131540 }, { "epoch": 0.8404354548126189, "grad_norm": 0.6391186118125916, "learning_rate": 6.242566830970941e-05, "loss": 1.1761, "step": 131550 }, { "epoch": 0.8404993419623576, "grad_norm": 0.727288544178009, "learning_rate": 6.242080797461346e-05, "loss": 1.3424, "step": 131560 }, { "epoch": 0.8405632291120964, "grad_norm": 0.7789543271064758, "learning_rate": 6.241594751442923e-05, "loss": 0.8127, "step": 131570 }, { "epoch": 0.8406271162618351, "grad_norm": 0.8413047790527344, "learning_rate": 6.241108692920566e-05, "loss": 0.8366, "step": 131580 }, { "epoch": 0.8406910034115738, "grad_norm": 1.12662672996521, "learning_rate": 6.240622621899173e-05, "loss": 0.7906, "step": 131590 }, { "epoch": 0.8407548905613125, "grad_norm": 1.1771347522735596, "learning_rate": 6.240136538383635e-05, "loss": 0.9557, "step": 131600 }, { "epoch": 0.8408187777110512, "grad_norm": 1.3687745332717896, "learning_rate": 6.239650442378848e-05, "loss": 1.0635, "step": 131610 }, { "epoch": 0.8408826648607899, "grad_norm": 0.5779715180397034, "learning_rate": 6.239164333889711e-05, "loss": 0.6679, "step": 131620 }, { "epoch": 0.8409465520105286, "grad_norm": 0.9042608737945557, "learning_rate": 6.238678212921115e-05, "loss": 0.9131, "step": 131630 }, { "epoch": 0.8410104391602673, "grad_norm": 0.7568997144699097, "learning_rate": 6.238192079477959e-05, "loss": 0.8784, "step": 131640 }, { "epoch": 0.841074326310006, "grad_norm": 0.7763069868087769, "learning_rate": 6.237705933565137e-05, "loss": 1.0966, "step": 131650 }, { "epoch": 0.8411382134597447, "grad_norm": 0.7040805220603943, "learning_rate": 6.237219775187545e-05, "loss": 0.9219, "step": 131660 }, { "epoch": 0.8412021006094834, "grad_norm": 1.0088971853256226, "learning_rate": 6.236733604350081e-05, "loss": 0.8971, "step": 131670 }, { "epoch": 0.8412659877592221, "grad_norm": 1.101572036743164, "learning_rate": 6.236247421057639e-05, "loss": 0.7638, "step": 131680 }, { "epoch": 0.8413298749089608, "grad_norm": 0.8880965709686279, "learning_rate": 6.235761225315117e-05, "loss": 1.1101, "step": 131690 }, { "epoch": 0.8413937620586995, "grad_norm": 0.6575754284858704, "learning_rate": 6.235275017127409e-05, "loss": 0.7849, "step": 131700 }, { "epoch": 0.8414576492084382, "grad_norm": 1.0469022989273071, "learning_rate": 6.234788796499411e-05, "loss": 1.1646, "step": 131710 }, { "epoch": 0.841521536358177, "grad_norm": 0.6429979801177979, "learning_rate": 6.234302563436024e-05, "loss": 0.6877, "step": 131720 }, { "epoch": 0.8415854235079157, "grad_norm": 0.8447049260139465, "learning_rate": 6.233816317942143e-05, "loss": 0.836, "step": 131730 }, { "epoch": 0.8416493106576544, "grad_norm": 1.907248854637146, "learning_rate": 6.233330060022662e-05, "loss": 0.7056, "step": 131740 }, { "epoch": 0.8417131978073931, "grad_norm": 0.9654474854469299, "learning_rate": 6.232843789682483e-05, "loss": 0.8356, "step": 131750 }, { "epoch": 0.8417770849571318, "grad_norm": 1.3447314500808716, "learning_rate": 6.2323575069265e-05, "loss": 0.7557, "step": 131760 }, { "epoch": 0.8418409721068705, "grad_norm": 2.194685220718384, "learning_rate": 6.231871211759609e-05, "loss": 0.885, "step": 131770 }, { "epoch": 0.8419048592566091, "grad_norm": 1.1542408466339111, "learning_rate": 6.23138490418671e-05, "loss": 0.7755, "step": 131780 }, { "epoch": 0.8419687464063478, "grad_norm": 0.7625725865364075, "learning_rate": 6.2308985842127e-05, "loss": 1.0261, "step": 131790 }, { "epoch": 0.8420326335560865, "grad_norm": 1.0486656427383423, "learning_rate": 6.230412251842477e-05, "loss": 0.9115, "step": 131800 }, { "epoch": 0.8420965207058252, "grad_norm": 1.7022581100463867, "learning_rate": 6.229925907080937e-05, "loss": 0.853, "step": 131810 }, { "epoch": 0.8421604078555639, "grad_norm": 0.8428347110748291, "learning_rate": 6.229439549932979e-05, "loss": 0.9491, "step": 131820 }, { "epoch": 0.8422242950053026, "grad_norm": 1.126781940460205, "learning_rate": 6.228953180403503e-05, "loss": 0.7966, "step": 131830 }, { "epoch": 0.8422881821550413, "grad_norm": 0.7675802111625671, "learning_rate": 6.228466798497403e-05, "loss": 0.8985, "step": 131840 }, { "epoch": 0.84235206930478, "grad_norm": 0.7653781771659851, "learning_rate": 6.227980404219581e-05, "loss": 0.8768, "step": 131850 }, { "epoch": 0.8424159564545187, "grad_norm": 0.9524831175804138, "learning_rate": 6.227493997574933e-05, "loss": 0.93, "step": 131860 }, { "epoch": 0.8424798436042574, "grad_norm": 0.7941969037055969, "learning_rate": 6.227007578568358e-05, "loss": 0.9524, "step": 131870 }, { "epoch": 0.8425437307539961, "grad_norm": 1.5713688135147095, "learning_rate": 6.226521147204757e-05, "loss": 0.8452, "step": 131880 }, { "epoch": 0.8426076179037348, "grad_norm": 0.7123538255691528, "learning_rate": 6.226034703489025e-05, "loss": 0.962, "step": 131890 }, { "epoch": 0.8426715050534735, "grad_norm": 0.9128240942955017, "learning_rate": 6.225548247426064e-05, "loss": 0.6589, "step": 131900 }, { "epoch": 0.8427353922032123, "grad_norm": 1.0599886178970337, "learning_rate": 6.225061779020773e-05, "loss": 0.7505, "step": 131910 }, { "epoch": 0.842799279352951, "grad_norm": 0.7445502877235413, "learning_rate": 6.224575298278048e-05, "loss": 0.9809, "step": 131920 }, { "epoch": 0.8428631665026897, "grad_norm": 1.1872540712356567, "learning_rate": 6.224088805202791e-05, "loss": 0.9065, "step": 131930 }, { "epoch": 0.8429270536524284, "grad_norm": 1.1298291683197021, "learning_rate": 6.2236022997999e-05, "loss": 1.4187, "step": 131940 }, { "epoch": 0.8429909408021671, "grad_norm": 0.8236765265464783, "learning_rate": 6.223115782074278e-05, "loss": 0.7978, "step": 131950 }, { "epoch": 0.8430548279519058, "grad_norm": 0.953170895576477, "learning_rate": 6.22262925203082e-05, "loss": 0.8399, "step": 131960 }, { "epoch": 0.8431187151016445, "grad_norm": 0.5813484787940979, "learning_rate": 6.222142709674428e-05, "loss": 0.7039, "step": 131970 }, { "epoch": 0.8431826022513832, "grad_norm": 0.963085412979126, "learning_rate": 6.22165615501e-05, "loss": 0.7868, "step": 131980 }, { "epoch": 0.8432464894011219, "grad_norm": 0.8191744089126587, "learning_rate": 6.22116958804244e-05, "loss": 0.9115, "step": 131990 }, { "epoch": 0.8433103765508606, "grad_norm": 0.9577305912971497, "learning_rate": 6.220683008776645e-05, "loss": 0.7448, "step": 132000 }, { "epoch": 0.8433742637005993, "grad_norm": 1.459100604057312, "learning_rate": 6.220196417217516e-05, "loss": 0.9975, "step": 132010 }, { "epoch": 0.8434381508503379, "grad_norm": 0.7961175441741943, "learning_rate": 6.219709813369953e-05, "loss": 1.0328, "step": 132020 }, { "epoch": 0.8435020380000766, "grad_norm": 0.9462569355964661, "learning_rate": 6.219223197238858e-05, "loss": 0.8782, "step": 132030 }, { "epoch": 0.8435659251498153, "grad_norm": 1.1649507284164429, "learning_rate": 6.21873656882913e-05, "loss": 0.8889, "step": 132040 }, { "epoch": 0.843629812299554, "grad_norm": 0.6289653182029724, "learning_rate": 6.218249928145671e-05, "loss": 1.052, "step": 132050 }, { "epoch": 0.8436936994492927, "grad_norm": 0.8261928558349609, "learning_rate": 6.21776327519338e-05, "loss": 0.8439, "step": 132060 }, { "epoch": 0.8437575865990314, "grad_norm": 1.1747194528579712, "learning_rate": 6.21727660997716e-05, "loss": 1.1163, "step": 132070 }, { "epoch": 0.8438214737487701, "grad_norm": 0.8616983294487, "learning_rate": 6.216789932501912e-05, "loss": 0.7076, "step": 132080 }, { "epoch": 0.8438853608985089, "grad_norm": 1.5313186645507812, "learning_rate": 6.216303242772535e-05, "loss": 0.763, "step": 132090 }, { "epoch": 0.8439492480482476, "grad_norm": 0.84382164478302, "learning_rate": 6.215816540793934e-05, "loss": 0.9195, "step": 132100 }, { "epoch": 0.8440131351979863, "grad_norm": 1.5501306056976318, "learning_rate": 6.215329826571008e-05, "loss": 1.2185, "step": 132110 }, { "epoch": 0.844077022347725, "grad_norm": 1.0050925016403198, "learning_rate": 6.214843100108659e-05, "loss": 0.8055, "step": 132120 }, { "epoch": 0.8441409094974637, "grad_norm": 1.1478122472763062, "learning_rate": 6.214356361411788e-05, "loss": 0.7929, "step": 132130 }, { "epoch": 0.8442047966472024, "grad_norm": 1.0138068199157715, "learning_rate": 6.2138696104853e-05, "loss": 0.8049, "step": 132140 }, { "epoch": 0.8442686837969411, "grad_norm": 3.256178379058838, "learning_rate": 6.213382847334094e-05, "loss": 0.8957, "step": 132150 }, { "epoch": 0.8443325709466798, "grad_norm": 0.8341447710990906, "learning_rate": 6.212896071963072e-05, "loss": 1.0626, "step": 132160 }, { "epoch": 0.8443964580964185, "grad_norm": 0.8610829710960388, "learning_rate": 6.212409284377138e-05, "loss": 0.8818, "step": 132170 }, { "epoch": 0.8444603452461572, "grad_norm": 0.9030302166938782, "learning_rate": 6.211922484581194e-05, "loss": 0.9542, "step": 132180 }, { "epoch": 0.8445242323958959, "grad_norm": 0.8101795315742493, "learning_rate": 6.211435672580143e-05, "loss": 1.069, "step": 132190 }, { "epoch": 0.8445881195456346, "grad_norm": 0.8379024267196655, "learning_rate": 6.210948848378884e-05, "loss": 1.0042, "step": 132200 }, { "epoch": 0.8446520066953733, "grad_norm": 1.2797821760177612, "learning_rate": 6.210462011982325e-05, "loss": 1.0052, "step": 132210 }, { "epoch": 0.844715893845112, "grad_norm": 0.7084786295890808, "learning_rate": 6.209975163395365e-05, "loss": 0.9553, "step": 132220 }, { "epoch": 0.8447797809948507, "grad_norm": 1.345314860343933, "learning_rate": 6.209488302622909e-05, "loss": 1.0062, "step": 132230 }, { "epoch": 0.8448436681445894, "grad_norm": 1.0037018060684204, "learning_rate": 6.209001429669859e-05, "loss": 0.8894, "step": 132240 }, { "epoch": 0.8449075552943281, "grad_norm": 0.7273179292678833, "learning_rate": 6.208514544541118e-05, "loss": 0.7736, "step": 132250 }, { "epoch": 0.8449714424440669, "grad_norm": 1.832634687423706, "learning_rate": 6.208027647241591e-05, "loss": 0.79, "step": 132260 }, { "epoch": 0.8450353295938055, "grad_norm": 0.895901083946228, "learning_rate": 6.207540737776179e-05, "loss": 0.8047, "step": 132270 }, { "epoch": 0.8450992167435442, "grad_norm": 0.9929360151290894, "learning_rate": 6.207053816149789e-05, "loss": 0.9768, "step": 132280 }, { "epoch": 0.8451631038932829, "grad_norm": 2.9557993412017822, "learning_rate": 6.206566882367323e-05, "loss": 0.6881, "step": 132290 }, { "epoch": 0.8452269910430216, "grad_norm": 1.2809102535247803, "learning_rate": 6.206079936433685e-05, "loss": 0.7503, "step": 132300 }, { "epoch": 0.8452908781927603, "grad_norm": 0.7714309692382812, "learning_rate": 6.205592978353776e-05, "loss": 0.8082, "step": 132310 }, { "epoch": 0.845354765342499, "grad_norm": 1.0882459878921509, "learning_rate": 6.205106008132505e-05, "loss": 0.7081, "step": 132320 }, { "epoch": 0.8454186524922377, "grad_norm": 1.8441599607467651, "learning_rate": 6.204619025774774e-05, "loss": 0.8468, "step": 132330 }, { "epoch": 0.8454825396419764, "grad_norm": 0.8650910258293152, "learning_rate": 6.204132031285485e-05, "loss": 0.7917, "step": 132340 }, { "epoch": 0.8455464267917151, "grad_norm": 0.9829961061477661, "learning_rate": 6.203645024669548e-05, "loss": 0.861, "step": 132350 }, { "epoch": 0.8456103139414538, "grad_norm": 1.0663328170776367, "learning_rate": 6.203158005931861e-05, "loss": 0.9636, "step": 132360 }, { "epoch": 0.8456742010911925, "grad_norm": 0.7675907611846924, "learning_rate": 6.202670975077334e-05, "loss": 0.985, "step": 132370 }, { "epoch": 0.8457380882409312, "grad_norm": 0.8641635179519653, "learning_rate": 6.20218393211087e-05, "loss": 0.7433, "step": 132380 }, { "epoch": 0.8458019753906699, "grad_norm": 1.2303731441497803, "learning_rate": 6.201696877037373e-05, "loss": 0.8609, "step": 132390 }, { "epoch": 0.8458658625404086, "grad_norm": 0.7559641599655151, "learning_rate": 6.201209809861747e-05, "loss": 0.879, "step": 132400 }, { "epoch": 0.8459297496901473, "grad_norm": 0.9814415574073792, "learning_rate": 6.200722730588901e-05, "loss": 0.9192, "step": 132410 }, { "epoch": 0.845993636839886, "grad_norm": 0.9890975952148438, "learning_rate": 6.20023563922374e-05, "loss": 0.9853, "step": 132420 }, { "epoch": 0.8460575239896247, "grad_norm": 0.8274144530296326, "learning_rate": 6.199748535771165e-05, "loss": 0.8835, "step": 132430 }, { "epoch": 0.8461214111393635, "grad_norm": 0.6956206560134888, "learning_rate": 6.199261420236086e-05, "loss": 0.9952, "step": 132440 }, { "epoch": 0.8461852982891022, "grad_norm": 0.6252676844596863, "learning_rate": 6.198774292623406e-05, "loss": 0.8467, "step": 132450 }, { "epoch": 0.8462491854388409, "grad_norm": 1.0025311708450317, "learning_rate": 6.198287152938031e-05, "loss": 0.9938, "step": 132460 }, { "epoch": 0.8463130725885796, "grad_norm": 1.1646289825439453, "learning_rate": 6.197800001184869e-05, "loss": 1.0137, "step": 132470 }, { "epoch": 0.8463769597383183, "grad_norm": 0.8148912191390991, "learning_rate": 6.197312837368825e-05, "loss": 0.937, "step": 132480 }, { "epoch": 0.846440846888057, "grad_norm": 1.7211027145385742, "learning_rate": 6.196825661494805e-05, "loss": 0.852, "step": 132490 }, { "epoch": 0.8465047340377957, "grad_norm": 0.7814443707466125, "learning_rate": 6.196338473567714e-05, "loss": 1.0702, "step": 132500 }, { "epoch": 0.8465686211875343, "grad_norm": 1.2225807905197144, "learning_rate": 6.19585127359246e-05, "loss": 0.7909, "step": 132510 }, { "epoch": 0.846632508337273, "grad_norm": 0.9283073544502258, "learning_rate": 6.19536406157395e-05, "loss": 0.8267, "step": 132520 }, { "epoch": 0.8466963954870117, "grad_norm": 0.9126421809196472, "learning_rate": 6.194876837517089e-05, "loss": 0.7771, "step": 132530 }, { "epoch": 0.8467602826367504, "grad_norm": 1.5700596570968628, "learning_rate": 6.194389601426784e-05, "loss": 1.0307, "step": 132540 }, { "epoch": 0.8468241697864891, "grad_norm": 0.8613380193710327, "learning_rate": 6.193902353307943e-05, "loss": 0.7906, "step": 132550 }, { "epoch": 0.8468880569362278, "grad_norm": 0.9143043756484985, "learning_rate": 6.193415093165473e-05, "loss": 0.9351, "step": 132560 }, { "epoch": 0.8469519440859665, "grad_norm": 1.215604543685913, "learning_rate": 6.192927821004281e-05, "loss": 0.8207, "step": 132570 }, { "epoch": 0.8470158312357052, "grad_norm": 1.1382776498794556, "learning_rate": 6.192440536829272e-05, "loss": 0.6806, "step": 132580 }, { "epoch": 0.8470797183854439, "grad_norm": 0.8516457080841064, "learning_rate": 6.191953240645356e-05, "loss": 0.9752, "step": 132590 }, { "epoch": 0.8471436055351826, "grad_norm": 0.6986536383628845, "learning_rate": 6.191465932457439e-05, "loss": 0.7651, "step": 132600 }, { "epoch": 0.8472074926849213, "grad_norm": 1.2519372701644897, "learning_rate": 6.19097861227043e-05, "loss": 0.8898, "step": 132610 }, { "epoch": 0.84727137983466, "grad_norm": 0.8474778532981873, "learning_rate": 6.190491280089236e-05, "loss": 0.9425, "step": 132620 }, { "epoch": 0.8473352669843988, "grad_norm": 0.9403843879699707, "learning_rate": 6.190003935918766e-05, "loss": 0.8585, "step": 132630 }, { "epoch": 0.8473991541341375, "grad_norm": 0.9674764275550842, "learning_rate": 6.189516579763925e-05, "loss": 1.0548, "step": 132640 }, { "epoch": 0.8474630412838762, "grad_norm": 1.1590251922607422, "learning_rate": 6.189029211629625e-05, "loss": 0.8096, "step": 132650 }, { "epoch": 0.8475269284336149, "grad_norm": 1.0220919847488403, "learning_rate": 6.188541831520772e-05, "loss": 0.7432, "step": 132660 }, { "epoch": 0.8475908155833536, "grad_norm": 0.7467325329780579, "learning_rate": 6.188054439442273e-05, "loss": 0.8427, "step": 132670 }, { "epoch": 0.8476547027330923, "grad_norm": 1.00656259059906, "learning_rate": 6.187567035399038e-05, "loss": 1.0052, "step": 132680 }, { "epoch": 0.847718589882831, "grad_norm": 1.0796363353729248, "learning_rate": 6.187079619395976e-05, "loss": 0.7158, "step": 132690 }, { "epoch": 0.8477824770325697, "grad_norm": 0.8648567795753479, "learning_rate": 6.186592191437995e-05, "loss": 0.8839, "step": 132700 }, { "epoch": 0.8478463641823084, "grad_norm": 1.2238774299621582, "learning_rate": 6.186104751530004e-05, "loss": 0.9307, "step": 132710 }, { "epoch": 0.8479102513320471, "grad_norm": 1.1575216054916382, "learning_rate": 6.185617299676913e-05, "loss": 0.9198, "step": 132720 }, { "epoch": 0.8479741384817858, "grad_norm": 0.6900240778923035, "learning_rate": 6.18512983588363e-05, "loss": 0.9578, "step": 132730 }, { "epoch": 0.8480380256315245, "grad_norm": 1.0454217195510864, "learning_rate": 6.184642360155062e-05, "loss": 0.7726, "step": 132740 }, { "epoch": 0.8481019127812631, "grad_norm": 1.078119158744812, "learning_rate": 6.184154872496124e-05, "loss": 0.8945, "step": 132750 }, { "epoch": 0.8481657999310018, "grad_norm": 1.2794424295425415, "learning_rate": 6.18366737291172e-05, "loss": 0.8144, "step": 132760 }, { "epoch": 0.8482296870807405, "grad_norm": 1.1177339553833008, "learning_rate": 6.18317986140676e-05, "loss": 0.711, "step": 132770 }, { "epoch": 0.8482935742304792, "grad_norm": 0.9638439416885376, "learning_rate": 6.182692337986157e-05, "loss": 0.712, "step": 132780 }, { "epoch": 0.848357461380218, "grad_norm": 0.7159675359725952, "learning_rate": 6.18220480265482e-05, "loss": 0.6036, "step": 132790 }, { "epoch": 0.8484213485299567, "grad_norm": 0.9956109523773193, "learning_rate": 6.181717255417658e-05, "loss": 0.8205, "step": 132800 }, { "epoch": 0.8484852356796954, "grad_norm": 0.7932936549186707, "learning_rate": 6.18122969627958e-05, "loss": 0.9146, "step": 132810 }, { "epoch": 0.8485491228294341, "grad_norm": 1.288333535194397, "learning_rate": 6.180742125245497e-05, "loss": 0.8525, "step": 132820 }, { "epoch": 0.8486130099791728, "grad_norm": 0.8285970687866211, "learning_rate": 6.180254542320319e-05, "loss": 0.9556, "step": 132830 }, { "epoch": 0.8486768971289115, "grad_norm": 0.8148375153541565, "learning_rate": 6.179766947508957e-05, "loss": 0.822, "step": 132840 }, { "epoch": 0.8487407842786502, "grad_norm": 0.717943549156189, "learning_rate": 6.17927934081632e-05, "loss": 0.9766, "step": 132850 }, { "epoch": 0.8488046714283889, "grad_norm": 0.7930614948272705, "learning_rate": 6.178791722247321e-05, "loss": 0.8497, "step": 132860 }, { "epoch": 0.8488685585781276, "grad_norm": 1.0193837881088257, "learning_rate": 6.17830409180687e-05, "loss": 0.9573, "step": 132870 }, { "epoch": 0.8489324457278663, "grad_norm": 1.764022946357727, "learning_rate": 6.177816449499878e-05, "loss": 0.9453, "step": 132880 }, { "epoch": 0.848996332877605, "grad_norm": 1.1653591394424438, "learning_rate": 6.177328795331253e-05, "loss": 0.7105, "step": 132890 }, { "epoch": 0.8490602200273437, "grad_norm": 0.9944359064102173, "learning_rate": 6.176841129305911e-05, "loss": 0.8217, "step": 132900 }, { "epoch": 0.8491241071770824, "grad_norm": 0.7695066928863525, "learning_rate": 6.176353451428758e-05, "loss": 0.8568, "step": 132910 }, { "epoch": 0.8491879943268211, "grad_norm": 0.8422155976295471, "learning_rate": 6.17586576170471e-05, "loss": 0.7569, "step": 132920 }, { "epoch": 0.8492518814765598, "grad_norm": 0.6993025541305542, "learning_rate": 6.175378060138674e-05, "loss": 0.8512, "step": 132930 }, { "epoch": 0.8493157686262985, "grad_norm": 0.7745869755744934, "learning_rate": 6.174890346735566e-05, "loss": 0.8467, "step": 132940 }, { "epoch": 0.8493796557760372, "grad_norm": 0.8811076283454895, "learning_rate": 6.174402621500297e-05, "loss": 0.6894, "step": 132950 }, { "epoch": 0.849443542925776, "grad_norm": 1.6308493614196777, "learning_rate": 6.173914884437777e-05, "loss": 1.1379, "step": 132960 }, { "epoch": 0.8495074300755147, "grad_norm": 0.7950766682624817, "learning_rate": 6.173427135552917e-05, "loss": 0.8812, "step": 132970 }, { "epoch": 0.8495713172252534, "grad_norm": 0.5945727229118347, "learning_rate": 6.172939374850633e-05, "loss": 0.7214, "step": 132980 }, { "epoch": 0.849635204374992, "grad_norm": 0.9707133173942566, "learning_rate": 6.172451602335833e-05, "loss": 0.8997, "step": 132990 }, { "epoch": 0.8496990915247307, "grad_norm": 0.5925787091255188, "learning_rate": 6.17196381801343e-05, "loss": 0.6871, "step": 133000 }, { "epoch": 0.8497629786744694, "grad_norm": 0.6003409624099731, "learning_rate": 6.171476021888341e-05, "loss": 0.8482, "step": 133010 }, { "epoch": 0.8498268658242081, "grad_norm": 0.6482333540916443, "learning_rate": 6.170988213965471e-05, "loss": 0.912, "step": 133020 }, { "epoch": 0.8498907529739468, "grad_norm": 0.6910186409950256, "learning_rate": 6.170500394249739e-05, "loss": 0.804, "step": 133030 }, { "epoch": 0.8499546401236855, "grad_norm": 0.9655617475509644, "learning_rate": 6.170012562746056e-05, "loss": 0.8942, "step": 133040 }, { "epoch": 0.8500185272734242, "grad_norm": 0.7532824873924255, "learning_rate": 6.169524719459334e-05, "loss": 0.6668, "step": 133050 }, { "epoch": 0.8500824144231629, "grad_norm": 1.318796992301941, "learning_rate": 6.169036864394485e-05, "loss": 0.9058, "step": 133060 }, { "epoch": 0.8501463015729016, "grad_norm": 0.89380943775177, "learning_rate": 6.168548997556425e-05, "loss": 0.8642, "step": 133070 }, { "epoch": 0.8502101887226403, "grad_norm": 1.309441328048706, "learning_rate": 6.168061118950063e-05, "loss": 0.9597, "step": 133080 }, { "epoch": 0.850274075872379, "grad_norm": 0.8857962489128113, "learning_rate": 6.167573228580317e-05, "loss": 0.7761, "step": 133090 }, { "epoch": 0.8503379630221177, "grad_norm": 0.7629507780075073, "learning_rate": 6.167085326452098e-05, "loss": 0.9627, "step": 133100 }, { "epoch": 0.8504018501718564, "grad_norm": 0.7469977140426636, "learning_rate": 6.16659741257032e-05, "loss": 0.8373, "step": 133110 }, { "epoch": 0.8504657373215951, "grad_norm": 0.9111135005950928, "learning_rate": 6.166109486939898e-05, "loss": 0.8188, "step": 133120 }, { "epoch": 0.8505296244713338, "grad_norm": 0.9041001796722412, "learning_rate": 6.165621549565742e-05, "loss": 1.0147, "step": 133130 }, { "epoch": 0.8505935116210726, "grad_norm": 0.7008116245269775, "learning_rate": 6.16513360045277e-05, "loss": 0.8531, "step": 133140 }, { "epoch": 0.8506573987708113, "grad_norm": NaN, "learning_rate": 6.164694436218468e-05, "loss": 0.9906, "step": 133150 }, { "epoch": 0.85072128592055, "grad_norm": 0.6200425028800964, "learning_rate": 6.164206464815282e-05, "loss": 0.9371, "step": 133160 }, { "epoch": 0.8507851730702887, "grad_norm": 1.1376943588256836, "learning_rate": 6.16371848168753e-05, "loss": 0.7415, "step": 133170 }, { "epoch": 0.8508490602200274, "grad_norm": 0.8951854109764099, "learning_rate": 6.163230486840124e-05, "loss": 0.9746, "step": 133180 }, { "epoch": 0.8509129473697661, "grad_norm": 0.6957682967185974, "learning_rate": 6.162742480277984e-05, "loss": 0.8415, "step": 133190 }, { "epoch": 0.8509768345195048, "grad_norm": 0.7306455373764038, "learning_rate": 6.162254462006018e-05, "loss": 1.1689, "step": 133200 }, { "epoch": 0.8510407216692435, "grad_norm": 0.8031535744667053, "learning_rate": 6.161766432029146e-05, "loss": 0.9466, "step": 133210 }, { "epoch": 0.8511046088189822, "grad_norm": 0.6348420977592468, "learning_rate": 6.16127839035228e-05, "loss": 1.0381, "step": 133220 }, { "epoch": 0.8511684959687209, "grad_norm": 1.0873243808746338, "learning_rate": 6.160790336980335e-05, "loss": 0.9977, "step": 133230 }, { "epoch": 0.8512323831184595, "grad_norm": 0.8845553994178772, "learning_rate": 6.160302271918229e-05, "loss": 0.8587, "step": 133240 }, { "epoch": 0.8512962702681982, "grad_norm": 0.7557221055030823, "learning_rate": 6.159814195170876e-05, "loss": 0.7288, "step": 133250 }, { "epoch": 0.8513601574179369, "grad_norm": 0.9131662845611572, "learning_rate": 6.159326106743188e-05, "loss": 0.8813, "step": 133260 }, { "epoch": 0.8514240445676756, "grad_norm": 1.1992850303649902, "learning_rate": 6.158838006640086e-05, "loss": 0.8098, "step": 133270 }, { "epoch": 0.8514879317174143, "grad_norm": 0.9900951385498047, "learning_rate": 6.15834989486648e-05, "loss": 0.8796, "step": 133280 }, { "epoch": 0.851551818867153, "grad_norm": 1.0529921054840088, "learning_rate": 6.15786177142729e-05, "loss": 1.2292, "step": 133290 }, { "epoch": 0.8516157060168917, "grad_norm": 0.8537276983261108, "learning_rate": 6.15737363632743e-05, "loss": 0.7625, "step": 133300 }, { "epoch": 0.8516795931666304, "grad_norm": 0.9666260480880737, "learning_rate": 6.156885489571816e-05, "loss": 1.091, "step": 133310 }, { "epoch": 0.8517434803163692, "grad_norm": 0.8413388729095459, "learning_rate": 6.156397331165364e-05, "loss": 0.8328, "step": 133320 }, { "epoch": 0.8518073674661079, "grad_norm": 0.5855796933174133, "learning_rate": 6.155909161112992e-05, "loss": 0.9068, "step": 133330 }, { "epoch": 0.8518712546158466, "grad_norm": 1.3463701009750366, "learning_rate": 6.155420979419612e-05, "loss": 0.88, "step": 133340 }, { "epoch": 0.8519351417655853, "grad_norm": 0.8949428796768188, "learning_rate": 6.154932786090146e-05, "loss": 0.9982, "step": 133350 }, { "epoch": 0.851999028915324, "grad_norm": 1.747206687927246, "learning_rate": 6.154444581129506e-05, "loss": 1.1201, "step": 133360 }, { "epoch": 0.8520629160650627, "grad_norm": 2.57167911529541, "learning_rate": 6.153956364542612e-05, "loss": 0.5995, "step": 133370 }, { "epoch": 0.8521268032148014, "grad_norm": 0.8123181462287903, "learning_rate": 6.153468136334377e-05, "loss": 0.7424, "step": 133380 }, { "epoch": 0.8521906903645401, "grad_norm": 0.9312867522239685, "learning_rate": 6.15297989650972e-05, "loss": 0.7271, "step": 133390 }, { "epoch": 0.8522545775142788, "grad_norm": 0.9896630644798279, "learning_rate": 6.15249164507356e-05, "loss": 0.7866, "step": 133400 }, { "epoch": 0.8523184646640175, "grad_norm": 1.394911527633667, "learning_rate": 6.152003382030809e-05, "loss": 0.9573, "step": 133410 }, { "epoch": 0.8523823518137562, "grad_norm": 0.9811311960220337, "learning_rate": 6.151515107386389e-05, "loss": 1.0954, "step": 133420 }, { "epoch": 0.8524462389634949, "grad_norm": 1.180068016052246, "learning_rate": 6.15107565029105e-05, "loss": 1.2012, "step": 133430 }, { "epoch": 0.8525101261132336, "grad_norm": 1.4226782321929932, "learning_rate": 6.150587353617002e-05, "loss": 0.6689, "step": 133440 }, { "epoch": 0.8525740132629723, "grad_norm": 0.7661568522453308, "learning_rate": 6.150099045355547e-05, "loss": 1.1808, "step": 133450 }, { "epoch": 0.852637900412711, "grad_norm": 1.3695670366287231, "learning_rate": 6.149610725511597e-05, "loss": 0.8527, "step": 133460 }, { "epoch": 0.8527017875624497, "grad_norm": 1.424774408340454, "learning_rate": 6.149122394090073e-05, "loss": 0.7823, "step": 133470 }, { "epoch": 0.8527656747121883, "grad_norm": 1.2621248960494995, "learning_rate": 6.148634051095893e-05, "loss": 0.9556, "step": 133480 }, { "epoch": 0.852829561861927, "grad_norm": 0.7116201519966125, "learning_rate": 6.148145696533973e-05, "loss": 0.8582, "step": 133490 }, { "epoch": 0.8528934490116657, "grad_norm": 1.219441533088684, "learning_rate": 6.147657330409234e-05, "loss": 0.8675, "step": 133500 }, { "epoch": 0.8529573361614045, "grad_norm": 0.8610523343086243, "learning_rate": 6.147168952726593e-05, "loss": 0.9913, "step": 133510 }, { "epoch": 0.8530212233111432, "grad_norm": 1.2600919008255005, "learning_rate": 6.146680563490968e-05, "loss": 1.2457, "step": 133520 }, { "epoch": 0.8530851104608819, "grad_norm": 0.7178075909614563, "learning_rate": 6.146192162707275e-05, "loss": 1.0738, "step": 133530 }, { "epoch": 0.8531489976106206, "grad_norm": 0.7833428382873535, "learning_rate": 6.145703750380439e-05, "loss": 0.8051, "step": 133540 }, { "epoch": 0.8532128847603593, "grad_norm": 0.6498239636421204, "learning_rate": 6.145215326515375e-05, "loss": 0.836, "step": 133550 }, { "epoch": 0.853276771910098, "grad_norm": 1.1565107107162476, "learning_rate": 6.144726891117e-05, "loss": 0.6755, "step": 133560 }, { "epoch": 0.8533406590598367, "grad_norm": 0.6840099692344666, "learning_rate": 6.144238444190236e-05, "loss": 0.9921, "step": 133570 }, { "epoch": 0.8534045462095754, "grad_norm": 0.7863107323646545, "learning_rate": 6.143749985740001e-05, "loss": 0.8842, "step": 133580 }, { "epoch": 0.8534684333593141, "grad_norm": 1.3972042798995972, "learning_rate": 6.143261515771214e-05, "loss": 0.7173, "step": 133590 }, { "epoch": 0.8535323205090528, "grad_norm": 0.5784814357757568, "learning_rate": 6.142773034288794e-05, "loss": 1.0078, "step": 133600 }, { "epoch": 0.8535962076587915, "grad_norm": 0.8995794057846069, "learning_rate": 6.14228454129766e-05, "loss": 0.7556, "step": 133610 }, { "epoch": 0.8536600948085302, "grad_norm": 1.020451307296753, "learning_rate": 6.141796036802734e-05, "loss": 0.917, "step": 133620 }, { "epoch": 0.8537239819582689, "grad_norm": 0.7589079141616821, "learning_rate": 6.141307520808934e-05, "loss": 0.7353, "step": 133630 }, { "epoch": 0.8537878691080076, "grad_norm": 0.9039588570594788, "learning_rate": 6.14081899332118e-05, "loss": 1.0483, "step": 133640 }, { "epoch": 0.8538517562577463, "grad_norm": 0.9937171339988708, "learning_rate": 6.140330454344391e-05, "loss": 0.7808, "step": 133650 }, { "epoch": 0.853915643407485, "grad_norm": 2.9743402004241943, "learning_rate": 6.139841903883488e-05, "loss": 0.6187, "step": 133660 }, { "epoch": 0.8539795305572238, "grad_norm": 0.9731509685516357, "learning_rate": 6.139353341943391e-05, "loss": 1.0048, "step": 133670 }, { "epoch": 0.8540434177069625, "grad_norm": 0.7726428508758545, "learning_rate": 6.13886476852902e-05, "loss": 0.8544, "step": 133680 }, { "epoch": 0.8541073048567012, "grad_norm": 1.0278613567352295, "learning_rate": 6.138376183645295e-05, "loss": 0.8038, "step": 133690 }, { "epoch": 0.8541711920064399, "grad_norm": 0.9102327823638916, "learning_rate": 6.137887587297138e-05, "loss": 0.8712, "step": 133700 }, { "epoch": 0.8542350791561786, "grad_norm": 0.6600117683410645, "learning_rate": 6.137398979489468e-05, "loss": 1.0407, "step": 133710 }, { "epoch": 0.8542989663059172, "grad_norm": 1.2443808317184448, "learning_rate": 6.136910360227207e-05, "loss": 0.9258, "step": 133720 }, { "epoch": 0.8543628534556559, "grad_norm": 0.6546837687492371, "learning_rate": 6.136421729515275e-05, "loss": 0.8981, "step": 133730 }, { "epoch": 0.8544267406053946, "grad_norm": 0.5999804139137268, "learning_rate": 6.135933087358591e-05, "loss": 0.8701, "step": 133740 }, { "epoch": 0.8544906277551333, "grad_norm": 0.6947192549705505, "learning_rate": 6.135444433762081e-05, "loss": 0.8316, "step": 133750 }, { "epoch": 0.854554514904872, "grad_norm": 1.474822759628296, "learning_rate": 6.134955768730663e-05, "loss": 0.806, "step": 133760 }, { "epoch": 0.8546184020546107, "grad_norm": 0.8612034320831299, "learning_rate": 6.134467092269257e-05, "loss": 0.7773, "step": 133770 }, { "epoch": 0.8546822892043494, "grad_norm": 1.0290131568908691, "learning_rate": 6.133978404382786e-05, "loss": 0.8817, "step": 133780 }, { "epoch": 0.8547461763540881, "grad_norm": 0.7579601407051086, "learning_rate": 6.133489705076172e-05, "loss": 0.9595, "step": 133790 }, { "epoch": 0.8548100635038268, "grad_norm": 0.7710822224617004, "learning_rate": 6.133000994354337e-05, "loss": 0.8086, "step": 133800 }, { "epoch": 0.8548739506535655, "grad_norm": 0.9965303540229797, "learning_rate": 6.1325122722222e-05, "loss": 0.8848, "step": 133810 }, { "epoch": 0.8549378378033042, "grad_norm": 0.9291059970855713, "learning_rate": 6.132023538684687e-05, "loss": 0.7339, "step": 133820 }, { "epoch": 0.8550017249530429, "grad_norm": 0.7711076140403748, "learning_rate": 6.131534793746716e-05, "loss": 0.9099, "step": 133830 }, { "epoch": 0.8550656121027816, "grad_norm": 0.9747552871704102, "learning_rate": 6.131046037413211e-05, "loss": 1.076, "step": 133840 }, { "epoch": 0.8551294992525204, "grad_norm": 0.6776683330535889, "learning_rate": 6.130557269689092e-05, "loss": 0.9466, "step": 133850 }, { "epoch": 0.8551933864022591, "grad_norm": 2.5817158222198486, "learning_rate": 6.130068490579286e-05, "loss": 0.7916, "step": 133860 }, { "epoch": 0.8552572735519978, "grad_norm": 0.8578413724899292, "learning_rate": 6.129579700088711e-05, "loss": 0.8225, "step": 133870 }, { "epoch": 0.8553211607017365, "grad_norm": 0.8771921992301941, "learning_rate": 6.129090898222291e-05, "loss": 0.8542, "step": 133880 }, { "epoch": 0.8553850478514752, "grad_norm": 0.8163079023361206, "learning_rate": 6.128602084984951e-05, "loss": 0.7394, "step": 133890 }, { "epoch": 0.8554489350012139, "grad_norm": 1.6026225090026855, "learning_rate": 6.128113260381611e-05, "loss": 0.7461, "step": 133900 }, { "epoch": 0.8555128221509526, "grad_norm": 1.152044415473938, "learning_rate": 6.127624424417193e-05, "loss": 0.9446, "step": 133910 }, { "epoch": 0.8555767093006913, "grad_norm": 0.7472025156021118, "learning_rate": 6.127135577096623e-05, "loss": 0.7461, "step": 133920 }, { "epoch": 0.85564059645043, "grad_norm": 0.7701200246810913, "learning_rate": 6.126646718424822e-05, "loss": 0.9566, "step": 133930 }, { "epoch": 0.8557044836001687, "grad_norm": 0.626395583152771, "learning_rate": 6.126157848406712e-05, "loss": 0.6947, "step": 133940 }, { "epoch": 0.8557683707499074, "grad_norm": 1.1391795873641968, "learning_rate": 6.12566896704722e-05, "loss": 0.8568, "step": 133950 }, { "epoch": 0.8558322578996461, "grad_norm": 1.0339782238006592, "learning_rate": 6.125180074351269e-05, "loss": 0.9725, "step": 133960 }, { "epoch": 0.8558961450493847, "grad_norm": 0.8343575596809387, "learning_rate": 6.12469117032378e-05, "loss": 1.1068, "step": 133970 }, { "epoch": 0.8559600321991234, "grad_norm": 0.4701806604862213, "learning_rate": 6.124202254969678e-05, "loss": 0.7032, "step": 133980 }, { "epoch": 0.8560239193488621, "grad_norm": 0.6901923418045044, "learning_rate": 6.123713328293887e-05, "loss": 0.9095, "step": 133990 }, { "epoch": 0.8560878064986008, "grad_norm": 1.137757658958435, "learning_rate": 6.123224390301329e-05, "loss": 0.8695, "step": 134000 }, { "epoch": 0.8561516936483395, "grad_norm": 1.1090092658996582, "learning_rate": 6.122735440996931e-05, "loss": 1.1309, "step": 134010 }, { "epoch": 0.8562155807980782, "grad_norm": 0.6597867608070374, "learning_rate": 6.122246480385616e-05, "loss": 1.0033, "step": 134020 }, { "epoch": 0.856279467947817, "grad_norm": 0.6848984360694885, "learning_rate": 6.121757508472308e-05, "loss": 0.9416, "step": 134030 }, { "epoch": 0.8563433550975557, "grad_norm": 0.9123812913894653, "learning_rate": 6.12126852526193e-05, "loss": 0.98, "step": 134040 }, { "epoch": 0.8564072422472944, "grad_norm": 0.5774307250976562, "learning_rate": 6.120779530759409e-05, "loss": 0.7818, "step": 134050 }, { "epoch": 0.8564711293970331, "grad_norm": 1.0986335277557373, "learning_rate": 6.120290524969668e-05, "loss": 0.7268, "step": 134060 }, { "epoch": 0.8565350165467718, "grad_norm": 4.184600830078125, "learning_rate": 6.119801507897634e-05, "loss": 1.0409, "step": 134070 }, { "epoch": 0.8565989036965105, "grad_norm": 0.8018998503684998, "learning_rate": 6.119312479548229e-05, "loss": 0.9884, "step": 134080 }, { "epoch": 0.8566627908462492, "grad_norm": 0.6833622455596924, "learning_rate": 6.118823439926379e-05, "loss": 0.7314, "step": 134090 }, { "epoch": 0.8567266779959879, "grad_norm": 3.289335012435913, "learning_rate": 6.118334389037008e-05, "loss": 1.3275, "step": 134100 }, { "epoch": 0.8567905651457266, "grad_norm": 2.1931798458099365, "learning_rate": 6.117845326885043e-05, "loss": 1.194, "step": 134110 }, { "epoch": 0.8568544522954653, "grad_norm": 0.985336422920227, "learning_rate": 6.11735625347541e-05, "loss": 0.9866, "step": 134120 }, { "epoch": 0.856918339445204, "grad_norm": 1.144832968711853, "learning_rate": 6.116867168813031e-05, "loss": 0.7581, "step": 134130 }, { "epoch": 0.8569822265949427, "grad_norm": 0.8992478251457214, "learning_rate": 6.116378072902833e-05, "loss": 0.7862, "step": 134140 }, { "epoch": 0.8570461137446814, "grad_norm": 0.4670437276363373, "learning_rate": 6.115888965749744e-05, "loss": 1.1326, "step": 134150 }, { "epoch": 0.8571100008944201, "grad_norm": 0.688841700553894, "learning_rate": 6.115399847358685e-05, "loss": 0.7873, "step": 134160 }, { "epoch": 0.8571738880441588, "grad_norm": 1.1751261949539185, "learning_rate": 6.114910717734586e-05, "loss": 0.8015, "step": 134170 }, { "epoch": 0.8572377751938975, "grad_norm": 0.8896322846412659, "learning_rate": 6.114421576882372e-05, "loss": 1.0137, "step": 134180 }, { "epoch": 0.8573016623436363, "grad_norm": 0.692790150642395, "learning_rate": 6.113932424806969e-05, "loss": 0.8688, "step": 134190 }, { "epoch": 0.857365549493375, "grad_norm": 0.8335268497467041, "learning_rate": 6.113443261513302e-05, "loss": 0.9654, "step": 134200 }, { "epoch": 0.8574294366431136, "grad_norm": 0.880637526512146, "learning_rate": 6.112954087006297e-05, "loss": 0.769, "step": 134210 }, { "epoch": 0.8574933237928523, "grad_norm": 0.6039393544197083, "learning_rate": 6.112464901290882e-05, "loss": 0.9527, "step": 134220 }, { "epoch": 0.857557210942591, "grad_norm": 1.0236138105392456, "learning_rate": 6.111975704371984e-05, "loss": 1.0147, "step": 134230 }, { "epoch": 0.8576210980923297, "grad_norm": 1.451583743095398, "learning_rate": 6.111486496254528e-05, "loss": 0.7267, "step": 134240 }, { "epoch": 0.8576849852420684, "grad_norm": 0.8510944843292236, "learning_rate": 6.110997276943442e-05, "loss": 1.272, "step": 134250 }, { "epoch": 0.8577488723918071, "grad_norm": 0.7636389136314392, "learning_rate": 6.110508046443652e-05, "loss": 0.7339, "step": 134260 }, { "epoch": 0.8578127595415458, "grad_norm": 0.7402799725532532, "learning_rate": 6.110018804760085e-05, "loss": 0.7794, "step": 134270 }, { "epoch": 0.8578766466912845, "grad_norm": 0.8340638279914856, "learning_rate": 6.109529551897669e-05, "loss": 0.8052, "step": 134280 }, { "epoch": 0.8579405338410232, "grad_norm": 1.0177710056304932, "learning_rate": 6.109040287861331e-05, "loss": 0.9876, "step": 134290 }, { "epoch": 0.8580044209907619, "grad_norm": 0.8329386711120605, "learning_rate": 6.108551012655996e-05, "loss": 0.8806, "step": 134300 }, { "epoch": 0.8580683081405006, "grad_norm": 0.7960025072097778, "learning_rate": 6.108061726286596e-05, "loss": 0.6743, "step": 134310 }, { "epoch": 0.8581321952902393, "grad_norm": 1.751345157623291, "learning_rate": 6.107572428758053e-05, "loss": 0.9132, "step": 134320 }, { "epoch": 0.858196082439978, "grad_norm": 0.8473448753356934, "learning_rate": 6.1070831200753e-05, "loss": 0.8131, "step": 134330 }, { "epoch": 0.8582599695897167, "grad_norm": 0.8581190705299377, "learning_rate": 6.10659380024326e-05, "loss": 0.8037, "step": 134340 }, { "epoch": 0.8583238567394554, "grad_norm": 0.964256227016449, "learning_rate": 6.106104469266865e-05, "loss": 0.6935, "step": 134350 }, { "epoch": 0.8583877438891941, "grad_norm": 1.3757505416870117, "learning_rate": 6.105615127151039e-05, "loss": 0.7474, "step": 134360 }, { "epoch": 0.8584516310389328, "grad_norm": 0.7548801898956299, "learning_rate": 6.105125773900712e-05, "loss": 0.8156, "step": 134370 }, { "epoch": 0.8585155181886716, "grad_norm": 0.4941951036453247, "learning_rate": 6.104636409520814e-05, "loss": 0.7144, "step": 134380 }, { "epoch": 0.8585794053384103, "grad_norm": 0.6011403799057007, "learning_rate": 6.10414703401627e-05, "loss": 1.0064, "step": 134390 }, { "epoch": 0.858643292488149, "grad_norm": 1.6302911043167114, "learning_rate": 6.103657647392012e-05, "loss": 0.9264, "step": 134400 }, { "epoch": 0.8587071796378877, "grad_norm": 0.9661455154418945, "learning_rate": 6.103168249652966e-05, "loss": 1.068, "step": 134410 }, { "epoch": 0.8587710667876264, "grad_norm": 1.3184993267059326, "learning_rate": 6.1026788408040616e-05, "loss": 0.8662, "step": 134420 }, { "epoch": 0.8588349539373651, "grad_norm": 0.8495551943778992, "learning_rate": 6.102189420850226e-05, "loss": 0.8618, "step": 134430 }, { "epoch": 0.8588988410871038, "grad_norm": 0.7962411642074585, "learning_rate": 6.101699989796391e-05, "loss": 1.0453, "step": 134440 }, { "epoch": 0.8589627282368424, "grad_norm": 0.7486063241958618, "learning_rate": 6.1012105476474835e-05, "loss": 0.9581, "step": 134450 }, { "epoch": 0.8590266153865811, "grad_norm": 0.8825688362121582, "learning_rate": 6.100721094408434e-05, "loss": 0.9215, "step": 134460 }, { "epoch": 0.8590905025363198, "grad_norm": 0.9865175485610962, "learning_rate": 6.100231630084169e-05, "loss": 0.8866, "step": 134470 }, { "epoch": 0.8591543896860585, "grad_norm": 0.7580648064613342, "learning_rate": 6.099742154679621e-05, "loss": 0.7733, "step": 134480 }, { "epoch": 0.8592182768357972, "grad_norm": 0.9302807450294495, "learning_rate": 6.099252668199718e-05, "loss": 0.7856, "step": 134490 }, { "epoch": 0.8592821639855359, "grad_norm": 0.8940306305885315, "learning_rate": 6.098763170649389e-05, "loss": 1.2527, "step": 134500 }, { "epoch": 0.8593460511352746, "grad_norm": 1.2878268957138062, "learning_rate": 6.0982736620335644e-05, "loss": 0.8244, "step": 134510 }, { "epoch": 0.8594099382850133, "grad_norm": 0.8814017176628113, "learning_rate": 6.097784142357174e-05, "loss": 1.0, "step": 134520 }, { "epoch": 0.859473825434752, "grad_norm": 0.6660280823707581, "learning_rate": 6.097294611625147e-05, "loss": 0.7688, "step": 134530 }, { "epoch": 0.8595377125844907, "grad_norm": 1.1975473165512085, "learning_rate": 6.0968050698424154e-05, "loss": 1.033, "step": 134540 }, { "epoch": 0.8596015997342294, "grad_norm": 0.822115421295166, "learning_rate": 6.0963155170139066e-05, "loss": 0.8708, "step": 134550 }, { "epoch": 0.8596654868839682, "grad_norm": 0.9180407524108887, "learning_rate": 6.095825953144553e-05, "loss": 0.9883, "step": 134560 }, { "epoch": 0.8597293740337069, "grad_norm": 1.0190486907958984, "learning_rate": 6.095336378239284e-05, "loss": 0.8116, "step": 134570 }, { "epoch": 0.8597932611834456, "grad_norm": 0.6985743045806885, "learning_rate": 6.094846792303029e-05, "loss": 0.8544, "step": 134580 }, { "epoch": 0.8598571483331843, "grad_norm": 0.9220016598701477, "learning_rate": 6.0943571953407205e-05, "loss": 1.1432, "step": 134590 }, { "epoch": 0.859921035482923, "grad_norm": 1.1213401556015015, "learning_rate": 6.093867587357288e-05, "loss": 1.1309, "step": 134600 }, { "epoch": 0.8599849226326617, "grad_norm": 0.7928663492202759, "learning_rate": 6.093377968357663e-05, "loss": 0.8537, "step": 134610 }, { "epoch": 0.8600488097824004, "grad_norm": 1.3430203199386597, "learning_rate": 6.092888338346775e-05, "loss": 0.8679, "step": 134620 }, { "epoch": 0.8601126969321391, "grad_norm": 0.7503795027732849, "learning_rate": 6.0923986973295564e-05, "loss": 1.0493, "step": 134630 }, { "epoch": 0.8601765840818778, "grad_norm": 0.9265238046646118, "learning_rate": 6.091909045310938e-05, "loss": 0.9348, "step": 134640 }, { "epoch": 0.8602404712316165, "grad_norm": 0.69620680809021, "learning_rate": 6.091419382295851e-05, "loss": 0.8313, "step": 134650 }, { "epoch": 0.8603043583813552, "grad_norm": 0.8834882974624634, "learning_rate": 6.090929708289227e-05, "loss": 0.7323, "step": 134660 }, { "epoch": 0.8603682455310939, "grad_norm": 0.9060829281806946, "learning_rate": 6.0904400232959965e-05, "loss": 0.8098, "step": 134670 }, { "epoch": 0.8604321326808326, "grad_norm": 0.8208954930305481, "learning_rate": 6.089950327321092e-05, "loss": 0.9883, "step": 134680 }, { "epoch": 0.8604960198305712, "grad_norm": 0.8429823517799377, "learning_rate": 6.089460620369444e-05, "loss": 0.9104, "step": 134690 }, { "epoch": 0.8605599069803099, "grad_norm": 0.753400981426239, "learning_rate": 6.088970902445985e-05, "loss": 0.8429, "step": 134700 }, { "epoch": 0.8606237941300486, "grad_norm": 1.0955417156219482, "learning_rate": 6.088481173555648e-05, "loss": 1.0817, "step": 134710 }, { "epoch": 0.8606876812797873, "grad_norm": 1.585567831993103, "learning_rate": 6.087991433703363e-05, "loss": 0.8062, "step": 134720 }, { "epoch": 0.860751568429526, "grad_norm": 0.6598794460296631, "learning_rate": 6.0875016828940635e-05, "loss": 0.8811, "step": 134730 }, { "epoch": 0.8608154555792648, "grad_norm": 0.5704881548881531, "learning_rate": 6.08701192113268e-05, "loss": 0.7517, "step": 134740 }, { "epoch": 0.8608793427290035, "grad_norm": 0.8759427666664124, "learning_rate": 6.086522148424148e-05, "loss": 0.874, "step": 134750 }, { "epoch": 0.8609432298787422, "grad_norm": 0.7344384789466858, "learning_rate": 6.086032364773396e-05, "loss": 1.0344, "step": 134760 }, { "epoch": 0.8610071170284809, "grad_norm": 1.3539067506790161, "learning_rate": 6.0855425701853596e-05, "loss": 1.1221, "step": 134770 }, { "epoch": 0.8610710041782196, "grad_norm": 0.85466068983078, "learning_rate": 6.08505276466497e-05, "loss": 0.9896, "step": 134780 }, { "epoch": 0.8611348913279583, "grad_norm": 1.4604058265686035, "learning_rate": 6.0845629482171626e-05, "loss": 0.762, "step": 134790 }, { "epoch": 0.861198778477697, "grad_norm": 0.6832066178321838, "learning_rate": 6.084073120846866e-05, "loss": 0.8548, "step": 134800 }, { "epoch": 0.8612626656274357, "grad_norm": 0.6687494516372681, "learning_rate": 6.083583282559016e-05, "loss": 1.1136, "step": 134810 }, { "epoch": 0.8613265527771744, "grad_norm": 0.9443023800849915, "learning_rate": 6.083093433358544e-05, "loss": 1.0468, "step": 134820 }, { "epoch": 0.8613904399269131, "grad_norm": 0.6644616723060608, "learning_rate": 6.082603573250384e-05, "loss": 0.7965, "step": 134830 }, { "epoch": 0.8614543270766518, "grad_norm": 0.6500226855278015, "learning_rate": 6.0821137022394705e-05, "loss": 1.0983, "step": 134840 }, { "epoch": 0.8615182142263905, "grad_norm": 1.1436829566955566, "learning_rate": 6.0816238203307355e-05, "loss": 1.0032, "step": 134850 }, { "epoch": 0.8615821013761292, "grad_norm": 0.5248997211456299, "learning_rate": 6.081133927529112e-05, "loss": 0.7732, "step": 134860 }, { "epoch": 0.8616459885258679, "grad_norm": 0.8002848625183105, "learning_rate": 6.0806440238395347e-05, "loss": 0.8625, "step": 134870 }, { "epoch": 0.8617098756756066, "grad_norm": 1.1468842029571533, "learning_rate": 6.080154109266938e-05, "loss": 0.6841, "step": 134880 }, { "epoch": 0.8617737628253453, "grad_norm": 1.0037697553634644, "learning_rate": 6.0796641838162546e-05, "loss": 0.83, "step": 134890 }, { "epoch": 0.861837649975084, "grad_norm": 1.227607250213623, "learning_rate": 6.0791742474924175e-05, "loss": 0.9535, "step": 134900 }, { "epoch": 0.8619015371248228, "grad_norm": 0.9437126517295837, "learning_rate": 6.0786843003003636e-05, "loss": 1.2439, "step": 134910 }, { "epoch": 0.8619654242745615, "grad_norm": 1.4104220867156982, "learning_rate": 6.078194342245025e-05, "loss": 1.1915, "step": 134920 }, { "epoch": 0.8620293114243002, "grad_norm": 0.9898942112922668, "learning_rate": 6.0777043733313375e-05, "loss": 0.8426, "step": 134930 }, { "epoch": 0.8620931985740388, "grad_norm": 1.2425917387008667, "learning_rate": 6.077214393564234e-05, "loss": 0.7399, "step": 134940 }, { "epoch": 0.8621570857237775, "grad_norm": 0.8567221760749817, "learning_rate": 6.07672440294865e-05, "loss": 0.8017, "step": 134950 }, { "epoch": 0.8622209728735162, "grad_norm": 0.8979184031486511, "learning_rate": 6.07623440148952e-05, "loss": 1.1169, "step": 134960 }, { "epoch": 0.8622848600232549, "grad_norm": 0.6733188033103943, "learning_rate": 6.075744389191778e-05, "loss": 0.9605, "step": 134970 }, { "epoch": 0.8623487471729936, "grad_norm": 1.6359256505966187, "learning_rate": 6.0752543660603587e-05, "loss": 0.7852, "step": 134980 }, { "epoch": 0.8624126343227323, "grad_norm": 1.0377789735794067, "learning_rate": 6.074764332100199e-05, "loss": 1.1418, "step": 134990 }, { "epoch": 0.862476521472471, "grad_norm": 0.5584946274757385, "learning_rate": 6.074274287316232e-05, "loss": 0.6663, "step": 135000 }, { "epoch": 0.8625404086222097, "grad_norm": 0.936705470085144, "learning_rate": 6.073784231713393e-05, "loss": 0.9404, "step": 135010 }, { "epoch": 0.8626042957719484, "grad_norm": 0.8763816356658936, "learning_rate": 6.0732941652966194e-05, "loss": 0.7721, "step": 135020 }, { "epoch": 0.8626681829216871, "grad_norm": 2.3810360431671143, "learning_rate": 6.072804088070844e-05, "loss": 0.6623, "step": 135030 }, { "epoch": 0.8627320700714258, "grad_norm": 1.851711392402649, "learning_rate": 6.0723140000410036e-05, "loss": 0.8857, "step": 135040 }, { "epoch": 0.8627959572211645, "grad_norm": 0.9787930846214294, "learning_rate": 6.0718239012120334e-05, "loss": 0.9424, "step": 135050 }, { "epoch": 0.8628598443709032, "grad_norm": 1.1540073156356812, "learning_rate": 6.071333791588868e-05, "loss": 0.8247, "step": 135060 }, { "epoch": 0.862923731520642, "grad_norm": 0.7594394087791443, "learning_rate": 6.0708436711764464e-05, "loss": 1.098, "step": 135070 }, { "epoch": 0.8629876186703807, "grad_norm": 0.9598045349121094, "learning_rate": 6.070353539979702e-05, "loss": 0.7815, "step": 135080 }, { "epoch": 0.8630515058201194, "grad_norm": 0.8562808632850647, "learning_rate": 6.069863398003571e-05, "loss": 0.9166, "step": 135090 }, { "epoch": 0.8631153929698581, "grad_norm": 1.306075930595398, "learning_rate": 6.0693732452529906e-05, "loss": 0.7203, "step": 135100 }, { "epoch": 0.8631792801195968, "grad_norm": 0.9534823298454285, "learning_rate": 6.0688830817328955e-05, "loss": 0.9033, "step": 135110 }, { "epoch": 0.8632431672693355, "grad_norm": 0.8284388184547424, "learning_rate": 6.068392907448224e-05, "loss": 0.7617, "step": 135120 }, { "epoch": 0.8633070544190742, "grad_norm": 0.9172964096069336, "learning_rate": 6.067902722403912e-05, "loss": 0.7122, "step": 135130 }, { "epoch": 0.8633709415688129, "grad_norm": 1.129095435142517, "learning_rate": 6.067412526604894e-05, "loss": 1.0913, "step": 135140 }, { "epoch": 0.8634348287185516, "grad_norm": 0.673621416091919, "learning_rate": 6.06692232005611e-05, "loss": 0.9686, "step": 135150 }, { "epoch": 0.8634987158682903, "grad_norm": 0.5581203699111938, "learning_rate": 6.066432102762495e-05, "loss": 1.0076, "step": 135160 }, { "epoch": 0.863562603018029, "grad_norm": 0.8796345591545105, "learning_rate": 6.0659418747289864e-05, "loss": 0.8523, "step": 135170 }, { "epoch": 0.8636264901677676, "grad_norm": 0.9877477288246155, "learning_rate": 6.06545163596052e-05, "loss": 0.8171, "step": 135180 }, { "epoch": 0.8636903773175063, "grad_norm": 1.176950216293335, "learning_rate": 6.0649613864620345e-05, "loss": 0.7933, "step": 135190 }, { "epoch": 0.863754264467245, "grad_norm": 1.119374394416809, "learning_rate": 6.064471126238467e-05, "loss": 0.9705, "step": 135200 }, { "epoch": 0.8638181516169837, "grad_norm": 0.757959246635437, "learning_rate": 6.063980855294753e-05, "loss": 0.9029, "step": 135210 }, { "epoch": 0.8638820387667224, "grad_norm": 0.6737072467803955, "learning_rate": 6.0634905736358326e-05, "loss": 0.9489, "step": 135220 }, { "epoch": 0.8639459259164611, "grad_norm": 0.7668609619140625, "learning_rate": 6.063000281266641e-05, "loss": 0.9851, "step": 135230 }, { "epoch": 0.8640098130661998, "grad_norm": 0.9595603346824646, "learning_rate": 6.062509978192118e-05, "loss": 1.0468, "step": 135240 }, { "epoch": 0.8640737002159385, "grad_norm": 1.4497737884521484, "learning_rate": 6.062019664417199e-05, "loss": 0.9572, "step": 135250 }, { "epoch": 0.8641375873656773, "grad_norm": 0.8604928851127625, "learning_rate": 6.061529339946824e-05, "loss": 0.9146, "step": 135260 }, { "epoch": 0.864201474515416, "grad_norm": 0.8765950798988342, "learning_rate": 6.061039004785929e-05, "loss": 0.7102, "step": 135270 }, { "epoch": 0.8642653616651547, "grad_norm": 0.523587703704834, "learning_rate": 6.060548658939456e-05, "loss": 0.7571, "step": 135280 }, { "epoch": 0.8643292488148934, "grad_norm": 0.93330979347229, "learning_rate": 6.0600583024123394e-05, "loss": 0.859, "step": 135290 }, { "epoch": 0.8643931359646321, "grad_norm": 0.9948300123214722, "learning_rate": 6.059567935209518e-05, "loss": 0.8673, "step": 135300 }, { "epoch": 0.8644570231143708, "grad_norm": 0.9706994295120239, "learning_rate": 6.059077557335931e-05, "loss": 0.7599, "step": 135310 }, { "epoch": 0.8645209102641095, "grad_norm": 1.1021097898483276, "learning_rate": 6.058587168796517e-05, "loss": 0.7995, "step": 135320 }, { "epoch": 0.8645847974138482, "grad_norm": 0.8516930937767029, "learning_rate": 6.058096769596213e-05, "loss": 0.9658, "step": 135330 }, { "epoch": 0.8646486845635869, "grad_norm": 0.8828617930412292, "learning_rate": 6.0576063597399615e-05, "loss": 0.7387, "step": 135340 }, { "epoch": 0.8647125717133256, "grad_norm": 0.8962679505348206, "learning_rate": 6.0571159392326974e-05, "loss": 1.0775, "step": 135350 }, { "epoch": 0.8647764588630643, "grad_norm": 0.678126871585846, "learning_rate": 6.056625508079361e-05, "loss": 0.9041, "step": 135360 }, { "epoch": 0.864840346012803, "grad_norm": 1.034623146057129, "learning_rate": 6.056135066284893e-05, "loss": 1.1352, "step": 135370 }, { "epoch": 0.8649042331625417, "grad_norm": 0.7552897930145264, "learning_rate": 6.05564461385423e-05, "loss": 0.9906, "step": 135380 }, { "epoch": 0.8649681203122804, "grad_norm": 1.2172116041183472, "learning_rate": 6.055154150792313e-05, "loss": 0.8446, "step": 135390 }, { "epoch": 0.8650320074620191, "grad_norm": 0.5961598753929138, "learning_rate": 6.054663677104081e-05, "loss": 0.7268, "step": 135400 }, { "epoch": 0.8650958946117578, "grad_norm": 0.595866322517395, "learning_rate": 6.0541731927944734e-05, "loss": 0.7465, "step": 135410 }, { "epoch": 0.8651597817614964, "grad_norm": 0.8760963678359985, "learning_rate": 6.0536826978684294e-05, "loss": 0.8472, "step": 135420 }, { "epoch": 0.8652236689112351, "grad_norm": 0.7106996774673462, "learning_rate": 6.0531921923308874e-05, "loss": 0.979, "step": 135430 }, { "epoch": 0.8652875560609739, "grad_norm": 0.6956402063369751, "learning_rate": 6.052701676186791e-05, "loss": 0.9007, "step": 135440 }, { "epoch": 0.8653514432107126, "grad_norm": 2.090533971786499, "learning_rate": 6.0522111494410785e-05, "loss": 0.8266, "step": 135450 }, { "epoch": 0.8654153303604513, "grad_norm": 0.7869872450828552, "learning_rate": 6.051720612098688e-05, "loss": 0.6341, "step": 135460 }, { "epoch": 0.86547921751019, "grad_norm": 0.6018970012664795, "learning_rate": 6.051230064164561e-05, "loss": 0.7308, "step": 135470 }, { "epoch": 0.8655431046599287, "grad_norm": 0.8730195164680481, "learning_rate": 6.050739505643639e-05, "loss": 1.086, "step": 135480 }, { "epoch": 0.8656069918096674, "grad_norm": 0.8639483451843262, "learning_rate": 6.050248936540861e-05, "loss": 1.0237, "step": 135490 }, { "epoch": 0.8656708789594061, "grad_norm": 0.7378480434417725, "learning_rate": 6.0497583568611674e-05, "loss": 0.8411, "step": 135500 }, { "epoch": 0.8657347661091448, "grad_norm": 0.876330554485321, "learning_rate": 6.049267766609499e-05, "loss": 1.0058, "step": 135510 }, { "epoch": 0.8657986532588835, "grad_norm": 0.6682674884796143, "learning_rate": 6.0487771657907974e-05, "loss": 0.8205, "step": 135520 }, { "epoch": 0.8658625404086222, "grad_norm": 1.085636019706726, "learning_rate": 6.048286554410001e-05, "loss": 0.9317, "step": 135530 }, { "epoch": 0.8659264275583609, "grad_norm": 0.9509056210517883, "learning_rate": 6.047795932472052e-05, "loss": 0.7042, "step": 135540 }, { "epoch": 0.8659903147080996, "grad_norm": 1.0311496257781982, "learning_rate": 6.0473052999818925e-05, "loss": 1.0667, "step": 135550 }, { "epoch": 0.8660542018578383, "grad_norm": 1.0395874977111816, "learning_rate": 6.0468146569444615e-05, "loss": 1.0347, "step": 135560 }, { "epoch": 0.866118089007577, "grad_norm": 0.933964729309082, "learning_rate": 6.0463240033647025e-05, "loss": 0.9422, "step": 135570 }, { "epoch": 0.8661819761573157, "grad_norm": 0.8697935342788696, "learning_rate": 6.045833339247555e-05, "loss": 1.1477, "step": 135580 }, { "epoch": 0.8662458633070544, "grad_norm": 0.7333647012710571, "learning_rate": 6.045342664597959e-05, "loss": 0.8173, "step": 135590 }, { "epoch": 0.8663097504567931, "grad_norm": 0.761461079120636, "learning_rate": 6.04485197942086e-05, "loss": 0.9989, "step": 135600 }, { "epoch": 0.8663736376065319, "grad_norm": 0.7777496576309204, "learning_rate": 6.0443612837211984e-05, "loss": 0.9211, "step": 135610 }, { "epoch": 0.8664375247562706, "grad_norm": 0.8241527080535889, "learning_rate": 6.043870577503914e-05, "loss": 0.8573, "step": 135620 }, { "epoch": 0.8665014119060093, "grad_norm": 1.33556067943573, "learning_rate": 6.04337986077395e-05, "loss": 0.88, "step": 135630 }, { "epoch": 0.866565299055748, "grad_norm": 0.8477666974067688, "learning_rate": 6.0428891335362484e-05, "loss": 0.9298, "step": 135640 }, { "epoch": 0.8666291862054867, "grad_norm": 1.5744928121566772, "learning_rate": 6.0423983957957505e-05, "loss": 0.9288, "step": 135650 }, { "epoch": 0.8666930733552254, "grad_norm": 1.1519935131072998, "learning_rate": 6.041907647557399e-05, "loss": 0.9664, "step": 135660 }, { "epoch": 0.866756960504964, "grad_norm": 0.649913489818573, "learning_rate": 6.041416888826137e-05, "loss": 0.9266, "step": 135670 }, { "epoch": 0.8668208476547027, "grad_norm": 0.9466597437858582, "learning_rate": 6.040926119606906e-05, "loss": 0.8896, "step": 135680 }, { "epoch": 0.8668847348044414, "grad_norm": 0.5539588332176208, "learning_rate": 6.040435339904646e-05, "loss": 0.9554, "step": 135690 }, { "epoch": 0.8669486219541801, "grad_norm": 1.099380373954773, "learning_rate": 6.039944549724305e-05, "loss": 0.8488, "step": 135700 }, { "epoch": 0.8670125091039188, "grad_norm": 0.6397864818572998, "learning_rate": 6.0394537490708216e-05, "loss": 0.9452, "step": 135710 }, { "epoch": 0.8670763962536575, "grad_norm": 0.7527474164962769, "learning_rate": 6.0389629379491395e-05, "loss": 1.1831, "step": 135720 }, { "epoch": 0.8671402834033962, "grad_norm": 1.0391385555267334, "learning_rate": 6.0384721163642024e-05, "loss": 0.951, "step": 135730 }, { "epoch": 0.8672041705531349, "grad_norm": 0.9291607737541199, "learning_rate": 6.0379812843209515e-05, "loss": 0.8248, "step": 135740 }, { "epoch": 0.8672680577028736, "grad_norm": 0.8267570734024048, "learning_rate": 6.0374904418243315e-05, "loss": 0.7667, "step": 135750 }, { "epoch": 0.8673319448526123, "grad_norm": 1.5941237211227417, "learning_rate": 6.0369995888792863e-05, "loss": 0.8021, "step": 135760 }, { "epoch": 0.867395832002351, "grad_norm": 0.5440096259117126, "learning_rate": 6.036508725490757e-05, "loss": 0.6538, "step": 135770 }, { "epoch": 0.8674597191520897, "grad_norm": 1.1164497137069702, "learning_rate": 6.036017851663689e-05, "loss": 0.719, "step": 135780 }, { "epoch": 0.8675236063018285, "grad_norm": 0.8377860188484192, "learning_rate": 6.035526967403023e-05, "loss": 0.9899, "step": 135790 }, { "epoch": 0.8675874934515672, "grad_norm": 0.7305436730384827, "learning_rate": 6.035036072713707e-05, "loss": 0.744, "step": 135800 }, { "epoch": 0.8676513806013059, "grad_norm": 1.4228670597076416, "learning_rate": 6.034545167600682e-05, "loss": 0.8398, "step": 135810 }, { "epoch": 0.8677152677510446, "grad_norm": 0.7904695868492126, "learning_rate": 6.0340542520688904e-05, "loss": 0.8431, "step": 135820 }, { "epoch": 0.8677791549007833, "grad_norm": 1.2709144353866577, "learning_rate": 6.03356332612328e-05, "loss": 0.8764, "step": 135830 }, { "epoch": 0.867843042050522, "grad_norm": 0.8756301999092102, "learning_rate": 6.03307238976879e-05, "loss": 0.782, "step": 135840 }, { "epoch": 0.8679069292002607, "grad_norm": 0.5899653434753418, "learning_rate": 6.03258144301037e-05, "loss": 0.7944, "step": 135850 }, { "epoch": 0.8679708163499994, "grad_norm": 0.976030707359314, "learning_rate": 6.03209048585296e-05, "loss": 0.8582, "step": 135860 }, { "epoch": 0.8680347034997381, "grad_norm": 1.095521092414856, "learning_rate": 6.0315995183015064e-05, "loss": 0.9034, "step": 135870 }, { "epoch": 0.8680985906494768, "grad_norm": 0.8074119091033936, "learning_rate": 6.031108540360954e-05, "loss": 0.8167, "step": 135880 }, { "epoch": 0.8681624777992155, "grad_norm": 1.544575810432434, "learning_rate": 6.0306175520362454e-05, "loss": 0.8362, "step": 135890 }, { "epoch": 0.8682263649489542, "grad_norm": 0.7311546802520752, "learning_rate": 6.030126553332327e-05, "loss": 1.0513, "step": 135900 }, { "epoch": 0.8682902520986928, "grad_norm": 1.0786371231079102, "learning_rate": 6.029635544254143e-05, "loss": 1.0181, "step": 135910 }, { "epoch": 0.8683541392484315, "grad_norm": 1.0580967664718628, "learning_rate": 6.029144524806638e-05, "loss": 0.7061, "step": 135920 }, { "epoch": 0.8684180263981702, "grad_norm": 0.9200608730316162, "learning_rate": 6.028653494994757e-05, "loss": 0.9578, "step": 135930 }, { "epoch": 0.8684819135479089, "grad_norm": 1.2529308795928955, "learning_rate": 6.028162454823446e-05, "loss": 0.878, "step": 135940 }, { "epoch": 0.8685458006976476, "grad_norm": 0.6350985765457153, "learning_rate": 6.0276714042976504e-05, "loss": 0.8528, "step": 135950 }, { "epoch": 0.8686096878473863, "grad_norm": 1.1729838848114014, "learning_rate": 6.0271803434223115e-05, "loss": 0.8492, "step": 135960 }, { "epoch": 0.868673574997125, "grad_norm": 0.679898738861084, "learning_rate": 6.02668927220238e-05, "loss": 1.0879, "step": 135970 }, { "epoch": 0.8687374621468638, "grad_norm": 0.9746125936508179, "learning_rate": 6.0261981906428e-05, "loss": 0.9354, "step": 135980 }, { "epoch": 0.8688013492966025, "grad_norm": 0.8682552576065063, "learning_rate": 6.0257070987485166e-05, "loss": 0.9114, "step": 135990 }, { "epoch": 0.8688652364463412, "grad_norm": 0.879461944103241, "learning_rate": 6.025215996524474e-05, "loss": 0.7752, "step": 136000 }, { "epoch": 0.8689291235960799, "grad_norm": 1.0179787874221802, "learning_rate": 6.024724883975621e-05, "loss": 0.9302, "step": 136010 }, { "epoch": 0.8689930107458186, "grad_norm": 0.7405498623847961, "learning_rate": 6.024233761106901e-05, "loss": 0.8306, "step": 136020 }, { "epoch": 0.8690568978955573, "grad_norm": 0.9917730689048767, "learning_rate": 6.023742627923261e-05, "loss": 0.8827, "step": 136030 }, { "epoch": 0.869120785045296, "grad_norm": 1.0026957988739014, "learning_rate": 6.023251484429647e-05, "loss": 0.9303, "step": 136040 }, { "epoch": 0.8691846721950347, "grad_norm": 0.6799507141113281, "learning_rate": 6.022760330631005e-05, "loss": 0.7576, "step": 136050 }, { "epoch": 0.8692485593447734, "grad_norm": 0.7701660990715027, "learning_rate": 6.0222691665322815e-05, "loss": 0.7309, "step": 136060 }, { "epoch": 0.8693124464945121, "grad_norm": 0.7990044355392456, "learning_rate": 6.0217779921384246e-05, "loss": 0.9324, "step": 136070 }, { "epoch": 0.8693763336442508, "grad_norm": 0.8976256251335144, "learning_rate": 6.0212868074543785e-05, "loss": 0.855, "step": 136080 }, { "epoch": 0.8694402207939895, "grad_norm": 1.0746898651123047, "learning_rate": 6.02079561248509e-05, "loss": 0.8045, "step": 136090 }, { "epoch": 0.8695041079437282, "grad_norm": 1.292189359664917, "learning_rate": 6.0203044072355065e-05, "loss": 0.8419, "step": 136100 }, { "epoch": 0.8695679950934669, "grad_norm": 0.673413097858429, "learning_rate": 6.019813191710576e-05, "loss": 0.7643, "step": 136110 }, { "epoch": 0.8696318822432056, "grad_norm": 0.834862232208252, "learning_rate": 6.0193219659152424e-05, "loss": 0.8355, "step": 136120 }, { "epoch": 0.8696957693929444, "grad_norm": 0.6725580096244812, "learning_rate": 6.018830729854457e-05, "loss": 1.1082, "step": 136130 }, { "epoch": 0.8697596565426831, "grad_norm": 1.442153811454773, "learning_rate": 6.018339483533163e-05, "loss": 0.9068, "step": 136140 }, { "epoch": 0.8698235436924217, "grad_norm": 1.0553643703460693, "learning_rate": 6.017848226956311e-05, "loss": 0.7979, "step": 136150 }, { "epoch": 0.8698874308421604, "grad_norm": 1.0946028232574463, "learning_rate": 6.017356960128846e-05, "loss": 0.8663, "step": 136160 }, { "epoch": 0.8699513179918991, "grad_norm": 1.3556715250015259, "learning_rate": 6.0168656830557165e-05, "loss": 0.9034, "step": 136170 }, { "epoch": 0.8700152051416378, "grad_norm": 0.47752645611763, "learning_rate": 6.016374395741869e-05, "loss": 0.9813, "step": 136180 }, { "epoch": 0.8700790922913765, "grad_norm": 1.1327354907989502, "learning_rate": 6.0158830981922544e-05, "loss": 0.838, "step": 136190 }, { "epoch": 0.8701429794411152, "grad_norm": 0.8484867811203003, "learning_rate": 6.0153917904118164e-05, "loss": 0.8683, "step": 136200 }, { "epoch": 0.8702068665908539, "grad_norm": 0.7545785307884216, "learning_rate": 6.0149004724055046e-05, "loss": 0.9844, "step": 136210 }, { "epoch": 0.8702707537405926, "grad_norm": 0.7116890549659729, "learning_rate": 6.0144091441782666e-05, "loss": 0.8028, "step": 136220 }, { "epoch": 0.8703346408903313, "grad_norm": 0.5172243714332581, "learning_rate": 6.013917805735052e-05, "loss": 0.8459, "step": 136230 }, { "epoch": 0.87039852804007, "grad_norm": 0.9006187319755554, "learning_rate": 6.0134264570808076e-05, "loss": 1.1869, "step": 136240 }, { "epoch": 0.8704624151898087, "grad_norm": 1.2033772468566895, "learning_rate": 6.012935098220483e-05, "loss": 0.7985, "step": 136250 }, { "epoch": 0.8705263023395474, "grad_norm": 0.8718836903572083, "learning_rate": 6.012443729159025e-05, "loss": 0.8137, "step": 136260 }, { "epoch": 0.8705901894892861, "grad_norm": 1.3549836874008179, "learning_rate": 6.011952349901382e-05, "loss": 1.1543, "step": 136270 }, { "epoch": 0.8706540766390248, "grad_norm": 1.342417597770691, "learning_rate": 6.011460960452503e-05, "loss": 0.8184, "step": 136280 }, { "epoch": 0.8707179637887635, "grad_norm": 0.5869442224502563, "learning_rate": 6.010969560817338e-05, "loss": 0.7865, "step": 136290 }, { "epoch": 0.8707818509385022, "grad_norm": 1.237336277961731, "learning_rate": 6.0104781510008345e-05, "loss": 0.8941, "step": 136300 }, { "epoch": 0.870845738088241, "grad_norm": 0.9919825792312622, "learning_rate": 6.0099867310079416e-05, "loss": 0.9696, "step": 136310 }, { "epoch": 0.8709096252379797, "grad_norm": 0.6661075949668884, "learning_rate": 6.0094953008436094e-05, "loss": 0.7271, "step": 136320 }, { "epoch": 0.8709735123877184, "grad_norm": 1.1574594974517822, "learning_rate": 6.009003860512785e-05, "loss": 0.6527, "step": 136330 }, { "epoch": 0.8710373995374571, "grad_norm": 0.8676467537879944, "learning_rate": 6.0085124100204205e-05, "loss": 0.7372, "step": 136340 }, { "epoch": 0.8711012866871958, "grad_norm": 0.6834307312965393, "learning_rate": 6.0080209493714626e-05, "loss": 0.9976, "step": 136350 }, { "epoch": 0.8711651738369345, "grad_norm": 0.8142191767692566, "learning_rate": 6.0075294785708617e-05, "loss": 0.9738, "step": 136360 }, { "epoch": 0.8712290609866732, "grad_norm": 1.038397192955017, "learning_rate": 6.007037997623567e-05, "loss": 0.78, "step": 136370 }, { "epoch": 0.8712929481364119, "grad_norm": 1.5472460985183716, "learning_rate": 6.006546506534529e-05, "loss": 0.6741, "step": 136380 }, { "epoch": 0.8713568352861505, "grad_norm": 0.9952694177627563, "learning_rate": 6.006055005308697e-05, "loss": 0.7892, "step": 136390 }, { "epoch": 0.8714207224358892, "grad_norm": 1.1230021715164185, "learning_rate": 6.005563493951021e-05, "loss": 0.9274, "step": 136400 }, { "epoch": 0.8714846095856279, "grad_norm": 1.2984684705734253, "learning_rate": 6.005071972466449e-05, "loss": 0.8563, "step": 136410 }, { "epoch": 0.8715484967353666, "grad_norm": 0.9113028049468994, "learning_rate": 6.004580440859934e-05, "loss": 0.8594, "step": 136420 }, { "epoch": 0.8716123838851053, "grad_norm": 1.0389131307601929, "learning_rate": 6.0040888991364255e-05, "loss": 1.0333, "step": 136430 }, { "epoch": 0.871676271034844, "grad_norm": 0.7670816779136658, "learning_rate": 6.003597347300872e-05, "loss": 0.833, "step": 136440 }, { "epoch": 0.8717401581845827, "grad_norm": 0.7277560234069824, "learning_rate": 6.003105785358225e-05, "loss": 1.0034, "step": 136450 }, { "epoch": 0.8718040453343214, "grad_norm": 0.8484509587287903, "learning_rate": 6.0026142133134354e-05, "loss": 1.1544, "step": 136460 }, { "epoch": 0.8718679324840601, "grad_norm": 1.0106443166732788, "learning_rate": 6.0021226311714526e-05, "loss": 0.7397, "step": 136470 }, { "epoch": 0.8719318196337988, "grad_norm": 2.2280375957489014, "learning_rate": 6.0016310389372275e-05, "loss": 0.987, "step": 136480 }, { "epoch": 0.8719957067835375, "grad_norm": 0.9950495958328247, "learning_rate": 6.001139436615713e-05, "loss": 0.7946, "step": 136490 }, { "epoch": 0.8720595939332763, "grad_norm": 0.8028036952018738, "learning_rate": 6.000647824211858e-05, "loss": 0.8753, "step": 136500 }, { "epoch": 0.872123481083015, "grad_norm": 1.1068840026855469, "learning_rate": 6.000156201730614e-05, "loss": 1.0608, "step": 136510 }, { "epoch": 0.8721873682327537, "grad_norm": 0.5927395224571228, "learning_rate": 5.9996645691769305e-05, "loss": 0.7914, "step": 136520 }, { "epoch": 0.8722512553824924, "grad_norm": 1.3745521306991577, "learning_rate": 5.9991729265557605e-05, "loss": 1.0867, "step": 136530 }, { "epoch": 0.8723151425322311, "grad_norm": 1.0866520404815674, "learning_rate": 5.998681273872055e-05, "loss": 0.7932, "step": 136540 }, { "epoch": 0.8723790296819698, "grad_norm": 1.1197307109832764, "learning_rate": 5.998189611130764e-05, "loss": 1.0212, "step": 136550 }, { "epoch": 0.8724429168317085, "grad_norm": 0.9691267609596252, "learning_rate": 5.9976979383368414e-05, "loss": 0.7832, "step": 136560 }, { "epoch": 0.8725068039814472, "grad_norm": 2.4745099544525146, "learning_rate": 5.997206255495237e-05, "loss": 0.9366, "step": 136570 }, { "epoch": 0.8725706911311859, "grad_norm": 1.1133451461791992, "learning_rate": 5.9967145626109035e-05, "loss": 1.0052, "step": 136580 }, { "epoch": 0.8726345782809246, "grad_norm": 0.8237787485122681, "learning_rate": 5.996222859688791e-05, "loss": 0.9783, "step": 136590 }, { "epoch": 0.8726984654306633, "grad_norm": 0.960617184638977, "learning_rate": 5.995731146733853e-05, "loss": 0.7924, "step": 136600 }, { "epoch": 0.872762352580402, "grad_norm": 0.8641276955604553, "learning_rate": 5.99523942375104e-05, "loss": 0.9258, "step": 136610 }, { "epoch": 0.8728262397301407, "grad_norm": 0.6767961382865906, "learning_rate": 5.994747690745306e-05, "loss": 0.8195, "step": 136620 }, { "epoch": 0.8728901268798794, "grad_norm": 0.8257598280906677, "learning_rate": 5.9942559477216024e-05, "loss": 1.0143, "step": 136630 }, { "epoch": 0.872954014029618, "grad_norm": 0.9936842918395996, "learning_rate": 5.99376419468488e-05, "loss": 1.14, "step": 136640 }, { "epoch": 0.8730179011793567, "grad_norm": 1.0033197402954102, "learning_rate": 5.993272431640093e-05, "loss": 0.9506, "step": 136650 }, { "epoch": 0.8730817883290954, "grad_norm": 0.9221176505088806, "learning_rate": 5.992780658592193e-05, "loss": 0.8055, "step": 136660 }, { "epoch": 0.8731456754788341, "grad_norm": 1.5162618160247803, "learning_rate": 5.9922888755461336e-05, "loss": 0.8351, "step": 136670 }, { "epoch": 0.8732095626285729, "grad_norm": 0.7005751132965088, "learning_rate": 5.991797082506867e-05, "loss": 0.8902, "step": 136680 }, { "epoch": 0.8732734497783116, "grad_norm": 0.8630402684211731, "learning_rate": 5.9913052794793453e-05, "loss": 0.9382, "step": 136690 }, { "epoch": 0.8733373369280503, "grad_norm": 0.7950804829597473, "learning_rate": 5.990813466468522e-05, "loss": 1.0334, "step": 136700 }, { "epoch": 0.873401224077789, "grad_norm": 0.9526651501655579, "learning_rate": 5.9903216434793494e-05, "loss": 1.1781, "step": 136710 }, { "epoch": 0.8734651112275277, "grad_norm": 1.170040488243103, "learning_rate": 5.989829810516782e-05, "loss": 1.0522, "step": 136720 }, { "epoch": 0.8735289983772664, "grad_norm": 1.0779001712799072, "learning_rate": 5.9893379675857706e-05, "loss": 0.8159, "step": 136730 }, { "epoch": 0.8735928855270051, "grad_norm": 0.806840181350708, "learning_rate": 5.9888461146912736e-05, "loss": 0.7811, "step": 136740 }, { "epoch": 0.8736567726767438, "grad_norm": 0.7434895634651184, "learning_rate": 5.988354251838237e-05, "loss": 0.8606, "step": 136750 }, { "epoch": 0.8737206598264825, "grad_norm": 1.0427266359329224, "learning_rate": 5.987862379031619e-05, "loss": 1.006, "step": 136760 }, { "epoch": 0.8737845469762212, "grad_norm": 1.1790105104446411, "learning_rate": 5.987370496276372e-05, "loss": 0.81, "step": 136770 }, { "epoch": 0.8738484341259599, "grad_norm": 1.0513496398925781, "learning_rate": 5.9868786035774504e-05, "loss": 0.9206, "step": 136780 }, { "epoch": 0.8739123212756986, "grad_norm": 1.0362788438796997, "learning_rate": 5.986386700939808e-05, "loss": 0.8835, "step": 136790 }, { "epoch": 0.8739762084254373, "grad_norm": 1.3794645071029663, "learning_rate": 5.985894788368397e-05, "loss": 0.6992, "step": 136800 }, { "epoch": 0.874040095575176, "grad_norm": 0.74660724401474, "learning_rate": 5.9854028658681724e-05, "loss": 1.0083, "step": 136810 }, { "epoch": 0.8741039827249147, "grad_norm": 0.5785887837409973, "learning_rate": 5.984910933444089e-05, "loss": 0.9494, "step": 136820 }, { "epoch": 0.8741678698746534, "grad_norm": 0.7972803115844727, "learning_rate": 5.984418991101101e-05, "loss": 1.1168, "step": 136830 }, { "epoch": 0.8742317570243922, "grad_norm": 0.9159103631973267, "learning_rate": 5.983927038844162e-05, "loss": 1.1372, "step": 136840 }, { "epoch": 0.8742956441741309, "grad_norm": 1.0228685140609741, "learning_rate": 5.9834350766782255e-05, "loss": 0.5965, "step": 136850 }, { "epoch": 0.8743595313238696, "grad_norm": 1.0952800512313843, "learning_rate": 5.982943104608247e-05, "loss": 0.8082, "step": 136860 }, { "epoch": 0.8744234184736083, "grad_norm": 0.7565765380859375, "learning_rate": 5.982451122639182e-05, "loss": 0.6736, "step": 136870 }, { "epoch": 0.8744873056233469, "grad_norm": 0.7633196115493774, "learning_rate": 5.981959130775985e-05, "loss": 0.8517, "step": 136880 }, { "epoch": 0.8745511927730856, "grad_norm": 2.1346609592437744, "learning_rate": 5.981467129023609e-05, "loss": 0.854, "step": 136890 }, { "epoch": 0.8746150799228243, "grad_norm": 0.8160780668258667, "learning_rate": 5.98097511738701e-05, "loss": 1.1138, "step": 136900 }, { "epoch": 0.874678967072563, "grad_norm": 0.6529989838600159, "learning_rate": 5.9804830958711425e-05, "loss": 0.675, "step": 136910 }, { "epoch": 0.8747428542223017, "grad_norm": 0.846062421798706, "learning_rate": 5.979991064480962e-05, "loss": 0.763, "step": 136920 }, { "epoch": 0.8748067413720404, "grad_norm": 0.8752646446228027, "learning_rate": 5.9794990232214244e-05, "loss": 0.7932, "step": 136930 }, { "epoch": 0.8748706285217791, "grad_norm": 0.695993959903717, "learning_rate": 5.979006972097484e-05, "loss": 0.7567, "step": 136940 }, { "epoch": 0.8749345156715178, "grad_norm": 0.825805127620697, "learning_rate": 5.978514911114096e-05, "loss": 0.7572, "step": 136950 }, { "epoch": 0.8749984028212565, "grad_norm": 1.5052249431610107, "learning_rate": 5.9780228402762165e-05, "loss": 0.944, "step": 136960 }, { "epoch": 0.8750622899709952, "grad_norm": 1.3028863668441772, "learning_rate": 5.9775307595888006e-05, "loss": 1.0014, "step": 136970 }, { "epoch": 0.8751261771207339, "grad_norm": 2.27508282661438, "learning_rate": 5.977038669056805e-05, "loss": 0.8975, "step": 136980 }, { "epoch": 0.8751900642704726, "grad_norm": 0.5962340235710144, "learning_rate": 5.9765465686851854e-05, "loss": 0.8318, "step": 136990 }, { "epoch": 0.8752539514202113, "grad_norm": 0.7743219137191772, "learning_rate": 5.976054458478896e-05, "loss": 0.9495, "step": 137000 }, { "epoch": 0.87531783856995, "grad_norm": 0.840707540512085, "learning_rate": 5.975562338442893e-05, "loss": 0.8466, "step": 137010 }, { "epoch": 0.8753817257196888, "grad_norm": 0.7525313496589661, "learning_rate": 5.975070208582134e-05, "loss": 0.5504, "step": 137020 }, { "epoch": 0.8754456128694275, "grad_norm": 2.833361864089966, "learning_rate": 5.974578068901575e-05, "loss": 0.9305, "step": 137030 }, { "epoch": 0.8755095000191662, "grad_norm": 0.896931529045105, "learning_rate": 5.9740859194061717e-05, "loss": 1.0519, "step": 137040 }, { "epoch": 0.8755733871689049, "grad_norm": 0.6994075179100037, "learning_rate": 5.97359376010088e-05, "loss": 0.9204, "step": 137050 }, { "epoch": 0.8756372743186436, "grad_norm": 0.8043060898780823, "learning_rate": 5.9731015909906565e-05, "loss": 0.7847, "step": 137060 }, { "epoch": 0.8757011614683823, "grad_norm": 0.9698672294616699, "learning_rate": 5.9726094120804585e-05, "loss": 0.7268, "step": 137070 }, { "epoch": 0.875765048618121, "grad_norm": 1.0071710348129272, "learning_rate": 5.972117223375242e-05, "loss": 0.7952, "step": 137080 }, { "epoch": 0.8758289357678597, "grad_norm": 0.7718594074249268, "learning_rate": 5.9716250248799644e-05, "loss": 0.7514, "step": 137090 }, { "epoch": 0.8758928229175984, "grad_norm": 0.8059403300285339, "learning_rate": 5.971132816599583e-05, "loss": 0.9773, "step": 137100 }, { "epoch": 0.8759567100673371, "grad_norm": 0.6279333829879761, "learning_rate": 5.970640598539052e-05, "loss": 1.1655, "step": 137110 }, { "epoch": 0.8760205972170757, "grad_norm": 0.5626464486122131, "learning_rate": 5.970148370703332e-05, "loss": 0.7618, "step": 137120 }, { "epoch": 0.8760844843668144, "grad_norm": 0.6805403828620911, "learning_rate": 5.969656133097379e-05, "loss": 0.9308, "step": 137130 }, { "epoch": 0.8761483715165531, "grad_norm": 1.129631519317627, "learning_rate": 5.969163885726148e-05, "loss": 0.8858, "step": 137140 }, { "epoch": 0.8762122586662918, "grad_norm": 0.6671173572540283, "learning_rate": 5.9686716285946e-05, "loss": 0.9919, "step": 137150 }, { "epoch": 0.8762761458160305, "grad_norm": 0.8226957321166992, "learning_rate": 5.9681793617076895e-05, "loss": 0.8594, "step": 137160 }, { "epoch": 0.8763400329657692, "grad_norm": 0.9677339792251587, "learning_rate": 5.9676870850703747e-05, "loss": 0.9001, "step": 137170 }, { "epoch": 0.8764039201155079, "grad_norm": 1.0769922733306885, "learning_rate": 5.967194798687615e-05, "loss": 0.9104, "step": 137180 }, { "epoch": 0.8764678072652466, "grad_norm": 0.9808753728866577, "learning_rate": 5.966702502564366e-05, "loss": 0.8969, "step": 137190 }, { "epoch": 0.8765316944149854, "grad_norm": 0.8168275356292725, "learning_rate": 5.9662101967055885e-05, "loss": 0.8239, "step": 137200 }, { "epoch": 0.8765955815647241, "grad_norm": 0.7705772519111633, "learning_rate": 5.965717881116237e-05, "loss": 0.7709, "step": 137210 }, { "epoch": 0.8766594687144628, "grad_norm": 0.7873682975769043, "learning_rate": 5.965225555801272e-05, "loss": 0.8556, "step": 137220 }, { "epoch": 0.8767233558642015, "grad_norm": 1.820673942565918, "learning_rate": 5.9647332207656505e-05, "loss": 1.1398, "step": 137230 }, { "epoch": 0.8767872430139402, "grad_norm": 1.0492981672286987, "learning_rate": 5.9642408760143296e-05, "loss": 1.0855, "step": 137240 }, { "epoch": 0.8768511301636789, "grad_norm": 1.0693048238754272, "learning_rate": 5.9637485215522694e-05, "loss": 1.043, "step": 137250 }, { "epoch": 0.8769150173134176, "grad_norm": 1.1092848777770996, "learning_rate": 5.963256157384427e-05, "loss": 1.1529, "step": 137260 }, { "epoch": 0.8769789044631563, "grad_norm": 1.148908257484436, "learning_rate": 5.962763783515763e-05, "loss": 0.7518, "step": 137270 }, { "epoch": 0.877042791612895, "grad_norm": 1.0465582609176636, "learning_rate": 5.9622713999512345e-05, "loss": 0.8351, "step": 137280 }, { "epoch": 0.8771066787626337, "grad_norm": 0.7496880888938904, "learning_rate": 5.9617790066958e-05, "loss": 1.0184, "step": 137290 }, { "epoch": 0.8771705659123724, "grad_norm": 2.036813259124756, "learning_rate": 5.96128660375442e-05, "loss": 1.1939, "step": 137300 }, { "epoch": 0.8772344530621111, "grad_norm": 0.8851515054702759, "learning_rate": 5.9607941911320506e-05, "loss": 0.8136, "step": 137310 }, { "epoch": 0.8772983402118498, "grad_norm": 1.1349178552627563, "learning_rate": 5.960301768833654e-05, "loss": 0.8269, "step": 137320 }, { "epoch": 0.8773622273615885, "grad_norm": 1.0561522245407104, "learning_rate": 5.959809336864186e-05, "loss": 1.0435, "step": 137330 }, { "epoch": 0.8774261145113272, "grad_norm": 1.015069842338562, "learning_rate": 5.959316895228609e-05, "loss": 0.752, "step": 137340 }, { "epoch": 0.8774900016610659, "grad_norm": 1.3497037887573242, "learning_rate": 5.958824443931881e-05, "loss": 0.7568, "step": 137350 }, { "epoch": 0.8775538888108046, "grad_norm": 0.6360141038894653, "learning_rate": 5.958331982978961e-05, "loss": 0.8139, "step": 137360 }, { "epoch": 0.8776177759605432, "grad_norm": 1.0269728899002075, "learning_rate": 5.957839512374809e-05, "loss": 0.9107, "step": 137370 }, { "epoch": 0.877681663110282, "grad_norm": 0.7632153630256653, "learning_rate": 5.957347032124384e-05, "loss": 1.0206, "step": 137380 }, { "epoch": 0.8777455502600207, "grad_norm": 0.7974910736083984, "learning_rate": 5.9568545422326474e-05, "loss": 0.8792, "step": 137390 }, { "epoch": 0.8778094374097594, "grad_norm": 1.0210436582565308, "learning_rate": 5.956362042704556e-05, "loss": 1.0714, "step": 137400 }, { "epoch": 0.8778733245594981, "grad_norm": 0.8050969839096069, "learning_rate": 5.955869533545073e-05, "loss": 0.9401, "step": 137410 }, { "epoch": 0.8779372117092368, "grad_norm": 0.888954758644104, "learning_rate": 5.955377014759156e-05, "loss": 0.9508, "step": 137420 }, { "epoch": 0.8780010988589755, "grad_norm": 0.69648677110672, "learning_rate": 5.954884486351766e-05, "loss": 0.9033, "step": 137430 }, { "epoch": 0.8780649860087142, "grad_norm": 1.9958151578903198, "learning_rate": 5.954391948327864e-05, "loss": 0.9938, "step": 137440 }, { "epoch": 0.8781288731584529, "grad_norm": 0.7439517378807068, "learning_rate": 5.9538994006924085e-05, "loss": 0.9702, "step": 137450 }, { "epoch": 0.8781927603081916, "grad_norm": 0.9544771313667297, "learning_rate": 5.953406843450361e-05, "loss": 1.0634, "step": 137460 }, { "epoch": 0.8782566474579303, "grad_norm": 0.9266231656074524, "learning_rate": 5.9529142766066823e-05, "loss": 0.8061, "step": 137470 }, { "epoch": 0.878320534607669, "grad_norm": 0.9102841019630432, "learning_rate": 5.952421700166333e-05, "loss": 0.7466, "step": 137480 }, { "epoch": 0.8783844217574077, "grad_norm": 0.9724735021591187, "learning_rate": 5.9519291141342714e-05, "loss": 0.7188, "step": 137490 }, { "epoch": 0.8784483089071464, "grad_norm": 0.6619033217430115, "learning_rate": 5.951436518515461e-05, "loss": 0.696, "step": 137500 }, { "epoch": 0.8785121960568851, "grad_norm": 1.0394726991653442, "learning_rate": 5.9509439133148616e-05, "loss": 0.9148, "step": 137510 }, { "epoch": 0.8785760832066238, "grad_norm": 0.9882583618164062, "learning_rate": 5.950451298537434e-05, "loss": 0.7845, "step": 137520 }, { "epoch": 0.8786399703563625, "grad_norm": 1.2519365549087524, "learning_rate": 5.94995867418814e-05, "loss": 0.7793, "step": 137530 }, { "epoch": 0.8787038575061012, "grad_norm": 0.7872567772865295, "learning_rate": 5.9494660402719404e-05, "loss": 1.0541, "step": 137540 }, { "epoch": 0.87876774465584, "grad_norm": 0.8353559970855713, "learning_rate": 5.948973396793795e-05, "loss": 1.0608, "step": 137550 }, { "epoch": 0.8788316318055787, "grad_norm": 0.48675644397735596, "learning_rate": 5.948480743758669e-05, "loss": 0.9564, "step": 137560 }, { "epoch": 0.8788955189553174, "grad_norm": 1.1137011051177979, "learning_rate": 5.9479880811715195e-05, "loss": 0.7974, "step": 137570 }, { "epoch": 0.8789594061050561, "grad_norm": 1.0456700325012207, "learning_rate": 5.9474954090373106e-05, "loss": 0.9706, "step": 137580 }, { "epoch": 0.8790232932547948, "grad_norm": 1.0245221853256226, "learning_rate": 5.947002727361003e-05, "loss": 0.8457, "step": 137590 }, { "epoch": 0.8790871804045335, "grad_norm": 1.006938099861145, "learning_rate": 5.9465593056979326e-05, "loss": 0.7818, "step": 137600 }, { "epoch": 0.8791510675542721, "grad_norm": 1.0359126329421997, "learning_rate": 5.946066605905308e-05, "loss": 0.7724, "step": 137610 }, { "epoch": 0.8792149547040108, "grad_norm": 1.068823218345642, "learning_rate": 5.945573896584974e-05, "loss": 0.9845, "step": 137620 }, { "epoch": 0.8792788418537495, "grad_norm": 0.46609166264533997, "learning_rate": 5.945081177741892e-05, "loss": 0.7789, "step": 137630 }, { "epoch": 0.8793427290034882, "grad_norm": 1.1455978155136108, "learning_rate": 5.9445884493810256e-05, "loss": 0.7966, "step": 137640 }, { "epoch": 0.8794066161532269, "grad_norm": 1.5680001974105835, "learning_rate": 5.944095711507337e-05, "loss": 0.9451, "step": 137650 }, { "epoch": 0.8794705033029656, "grad_norm": 1.8977959156036377, "learning_rate": 5.943602964125787e-05, "loss": 0.8331, "step": 137660 }, { "epoch": 0.8795343904527043, "grad_norm": 0.6231663227081299, "learning_rate": 5.943110207241339e-05, "loss": 0.8725, "step": 137670 }, { "epoch": 0.879598277602443, "grad_norm": 0.8517551422119141, "learning_rate": 5.942617440858955e-05, "loss": 1.0001, "step": 137680 }, { "epoch": 0.8796621647521817, "grad_norm": 0.9704746007919312, "learning_rate": 5.9421246649835985e-05, "loss": 1.2601, "step": 137690 }, { "epoch": 0.8797260519019204, "grad_norm": 0.6457834839820862, "learning_rate": 5.941631879620231e-05, "loss": 0.6845, "step": 137700 }, { "epoch": 0.8797899390516591, "grad_norm": 1.3771389722824097, "learning_rate": 5.941139084773817e-05, "loss": 1.0085, "step": 137710 }, { "epoch": 0.8798538262013978, "grad_norm": 0.8982274532318115, "learning_rate": 5.940646280449317e-05, "loss": 0.754, "step": 137720 }, { "epoch": 0.8799177133511366, "grad_norm": 1.1403874158859253, "learning_rate": 5.9401534666516955e-05, "loss": 0.9035, "step": 137730 }, { "epoch": 0.8799816005008753, "grad_norm": 0.8235518932342529, "learning_rate": 5.939660643385915e-05, "loss": 0.9339, "step": 137740 }, { "epoch": 0.880045487650614, "grad_norm": 0.8350309133529663, "learning_rate": 5.939167810656939e-05, "loss": 1.0703, "step": 137750 }, { "epoch": 0.8801093748003527, "grad_norm": 1.7924656867980957, "learning_rate": 5.938674968469731e-05, "loss": 1.1085, "step": 137760 }, { "epoch": 0.8801732619500914, "grad_norm": 1.2257702350616455, "learning_rate": 5.9381821168292536e-05, "loss": 0.9338, "step": 137770 }, { "epoch": 0.8802371490998301, "grad_norm": 1.0357496738433838, "learning_rate": 5.9376892557404704e-05, "loss": 0.8123, "step": 137780 }, { "epoch": 0.8803010362495688, "grad_norm": 0.990088164806366, "learning_rate": 5.937196385208346e-05, "loss": 1.004, "step": 137790 }, { "epoch": 0.8803649233993075, "grad_norm": 0.97257000207901, "learning_rate": 5.936703505237843e-05, "loss": 0.6953, "step": 137800 }, { "epoch": 0.8804288105490462, "grad_norm": 0.6690786480903625, "learning_rate": 5.9362106158339245e-05, "loss": 0.9455, "step": 137810 }, { "epoch": 0.8804926976987849, "grad_norm": 1.3547656536102295, "learning_rate": 5.935717717001556e-05, "loss": 0.856, "step": 137820 }, { "epoch": 0.8805565848485236, "grad_norm": 0.7319701313972473, "learning_rate": 5.9352248087456994e-05, "loss": 0.9645, "step": 137830 }, { "epoch": 0.8806204719982623, "grad_norm": 0.9429260492324829, "learning_rate": 5.934731891071321e-05, "loss": 1.0002, "step": 137840 }, { "epoch": 0.8806843591480009, "grad_norm": 0.8033540844917297, "learning_rate": 5.934238963983384e-05, "loss": 0.676, "step": 137850 }, { "epoch": 0.8807482462977396, "grad_norm": 1.9178016185760498, "learning_rate": 5.933746027486853e-05, "loss": 0.727, "step": 137860 }, { "epoch": 0.8808121334474783, "grad_norm": 0.7014086842536926, "learning_rate": 5.9333023765997284e-05, "loss": 1.0007, "step": 137870 }, { "epoch": 0.880876020597217, "grad_norm": 1.6544643640518188, "learning_rate": 5.9328094222405437e-05, "loss": 0.8551, "step": 137880 }, { "epoch": 0.8809399077469557, "grad_norm": 1.3481382131576538, "learning_rate": 5.932316458487162e-05, "loss": 0.7606, "step": 137890 }, { "epoch": 0.8810037948966944, "grad_norm": 1.015859842300415, "learning_rate": 5.931823485344545e-05, "loss": 0.9227, "step": 137900 }, { "epoch": 0.8810676820464332, "grad_norm": 1.004987359046936, "learning_rate": 5.9313305028176606e-05, "loss": 1.1689, "step": 137910 }, { "epoch": 0.8811315691961719, "grad_norm": 0.8518670797348022, "learning_rate": 5.930837510911471e-05, "loss": 1.1651, "step": 137920 }, { "epoch": 0.8811954563459106, "grad_norm": 0.6853091716766357, "learning_rate": 5.930344509630943e-05, "loss": 0.7861, "step": 137930 }, { "epoch": 0.8812593434956493, "grad_norm": 1.4543042182922363, "learning_rate": 5.929851498981041e-05, "loss": 0.718, "step": 137940 }, { "epoch": 0.881323230645388, "grad_norm": 0.791410505771637, "learning_rate": 5.92935847896673e-05, "loss": 0.8162, "step": 137950 }, { "epoch": 0.8813871177951267, "grad_norm": 0.8567259311676025, "learning_rate": 5.928865449592976e-05, "loss": 0.797, "step": 137960 }, { "epoch": 0.8814510049448654, "grad_norm": 0.9072690010070801, "learning_rate": 5.928372410864742e-05, "loss": 0.9948, "step": 137970 }, { "epoch": 0.8815148920946041, "grad_norm": 1.3205629587173462, "learning_rate": 5.9278793627869955e-05, "loss": 1.0577, "step": 137980 }, { "epoch": 0.8815787792443428, "grad_norm": 0.8285039663314819, "learning_rate": 5.9273863053647015e-05, "loss": 0.9573, "step": 137990 }, { "epoch": 0.8816426663940815, "grad_norm": 0.7097443342208862, "learning_rate": 5.926893238602825e-05, "loss": 0.927, "step": 138000 }, { "epoch": 0.8817065535438202, "grad_norm": 0.7954055666923523, "learning_rate": 5.926400162506331e-05, "loss": 1.0107, "step": 138010 }, { "epoch": 0.8817704406935589, "grad_norm": 0.9735956788063049, "learning_rate": 5.9259070770801874e-05, "loss": 0.8408, "step": 138020 }, { "epoch": 0.8818343278432976, "grad_norm": 1.4425255060195923, "learning_rate": 5.925413982329357e-05, "loss": 1.0734, "step": 138030 }, { "epoch": 0.8818982149930363, "grad_norm": 0.9620723724365234, "learning_rate": 5.9249208782588076e-05, "loss": 0.9398, "step": 138040 }, { "epoch": 0.881962102142775, "grad_norm": 0.6004499793052673, "learning_rate": 5.924427764873505e-05, "loss": 0.9196, "step": 138050 }, { "epoch": 0.8820259892925137, "grad_norm": 0.9870150685310364, "learning_rate": 5.9239346421784135e-05, "loss": 0.9864, "step": 138060 }, { "epoch": 0.8820898764422525, "grad_norm": 0.897495687007904, "learning_rate": 5.9234415101785026e-05, "loss": 1.0131, "step": 138070 }, { "epoch": 0.8821537635919912, "grad_norm": 0.9317723512649536, "learning_rate": 5.922948368878736e-05, "loss": 0.856, "step": 138080 }, { "epoch": 0.8822176507417298, "grad_norm": 0.9993261694908142, "learning_rate": 5.922455218284081e-05, "loss": 0.9035, "step": 138090 }, { "epoch": 0.8822815378914685, "grad_norm": 0.8899745345115662, "learning_rate": 5.921962058399504e-05, "loss": 0.9287, "step": 138100 }, { "epoch": 0.8823454250412072, "grad_norm": 0.59910649061203, "learning_rate": 5.921468889229971e-05, "loss": 1.0332, "step": 138110 }, { "epoch": 0.8824093121909459, "grad_norm": 0.8276026248931885, "learning_rate": 5.92097571078045e-05, "loss": 0.8867, "step": 138120 }, { "epoch": 0.8824731993406846, "grad_norm": 1.0071065425872803, "learning_rate": 5.9204825230559056e-05, "loss": 0.8795, "step": 138130 }, { "epoch": 0.8825370864904233, "grad_norm": 1.0851963758468628, "learning_rate": 5.919989326061307e-05, "loss": 0.9147, "step": 138140 }, { "epoch": 0.882600973640162, "grad_norm": 0.9725841879844666, "learning_rate": 5.9194961198016196e-05, "loss": 0.7633, "step": 138150 }, { "epoch": 0.8826648607899007, "grad_norm": 0.5369303226470947, "learning_rate": 5.9190029042818105e-05, "loss": 0.9501, "step": 138160 }, { "epoch": 0.8827287479396394, "grad_norm": 0.9604431390762329, "learning_rate": 5.918509679506847e-05, "loss": 0.785, "step": 138170 }, { "epoch": 0.8827926350893781, "grad_norm": 1.0034009218215942, "learning_rate": 5.918016445481698e-05, "loss": 0.9936, "step": 138180 }, { "epoch": 0.8828565222391168, "grad_norm": 0.8154608607292175, "learning_rate": 5.917523202211328e-05, "loss": 0.7805, "step": 138190 }, { "epoch": 0.8829204093888555, "grad_norm": 1.2569918632507324, "learning_rate": 5.9170299497007053e-05, "loss": 0.6671, "step": 138200 }, { "epoch": 0.8829842965385942, "grad_norm": 0.6132636666297913, "learning_rate": 5.916536687954798e-05, "loss": 0.7076, "step": 138210 }, { "epoch": 0.8830481836883329, "grad_norm": 2.1591336727142334, "learning_rate": 5.916043416978574e-05, "loss": 1.1469, "step": 138220 }, { "epoch": 0.8831120708380716, "grad_norm": 0.9249553084373474, "learning_rate": 5.915550136776999e-05, "loss": 0.8875, "step": 138230 }, { "epoch": 0.8831759579878103, "grad_norm": 1.4961109161376953, "learning_rate": 5.915056847355043e-05, "loss": 0.7952, "step": 138240 }, { "epoch": 0.883239845137549, "grad_norm": 1.0955626964569092, "learning_rate": 5.914563548717673e-05, "loss": 0.8794, "step": 138250 }, { "epoch": 0.8833037322872878, "grad_norm": 0.5093604922294617, "learning_rate": 5.9140702408698554e-05, "loss": 0.7851, "step": 138260 }, { "epoch": 0.8833676194370265, "grad_norm": 1.133516788482666, "learning_rate": 5.913576923816562e-05, "loss": 0.699, "step": 138270 }, { "epoch": 0.8834315065867652, "grad_norm": 1.351069450378418, "learning_rate": 5.9130835975627574e-05, "loss": 0.9823, "step": 138280 }, { "epoch": 0.8834953937365039, "grad_norm": 0.741649329662323, "learning_rate": 5.912590262113411e-05, "loss": 1.0134, "step": 138290 }, { "epoch": 0.8835592808862426, "grad_norm": 0.5988890528678894, "learning_rate": 5.912096917473491e-05, "loss": 0.7114, "step": 138300 }, { "epoch": 0.8836231680359813, "grad_norm": 0.9725940823554993, "learning_rate": 5.911603563647966e-05, "loss": 0.9138, "step": 138310 }, { "epoch": 0.88368705518572, "grad_norm": 0.5736109018325806, "learning_rate": 5.911110200641805e-05, "loss": 0.7067, "step": 138320 }, { "epoch": 0.8837509423354587, "grad_norm": 0.8855761885643005, "learning_rate": 5.910616828459975e-05, "loss": 1.2011, "step": 138330 }, { "epoch": 0.8838148294851973, "grad_norm": 0.8970593810081482, "learning_rate": 5.910123447107446e-05, "loss": 0.9496, "step": 138340 }, { "epoch": 0.883878716634936, "grad_norm": 0.8814042806625366, "learning_rate": 5.909630056589188e-05, "loss": 0.957, "step": 138350 }, { "epoch": 0.8839426037846747, "grad_norm": 1.0015789270401, "learning_rate": 5.909136656910167e-05, "loss": 0.8841, "step": 138360 }, { "epoch": 0.8840064909344134, "grad_norm": 0.869117796421051, "learning_rate": 5.908643248075354e-05, "loss": 0.7216, "step": 138370 }, { "epoch": 0.8840703780841521, "grad_norm": 0.8888474702835083, "learning_rate": 5.9081498300897167e-05, "loss": 0.8551, "step": 138380 }, { "epoch": 0.8841342652338908, "grad_norm": 0.8927051424980164, "learning_rate": 5.907656402958226e-05, "loss": 0.9334, "step": 138390 }, { "epoch": 0.8841981523836295, "grad_norm": 1.0528945922851562, "learning_rate": 5.907162966685849e-05, "loss": 0.7634, "step": 138400 }, { "epoch": 0.8842620395333682, "grad_norm": 1.1046907901763916, "learning_rate": 5.906669521277557e-05, "loss": 0.715, "step": 138410 }, { "epoch": 0.8843259266831069, "grad_norm": 0.8882020711898804, "learning_rate": 5.906176066738317e-05, "loss": 1.0122, "step": 138420 }, { "epoch": 0.8843898138328457, "grad_norm": 0.9222348928451538, "learning_rate": 5.905682603073102e-05, "loss": 0.8114, "step": 138430 }, { "epoch": 0.8844537009825844, "grad_norm": 0.5830926895141602, "learning_rate": 5.905189130286879e-05, "loss": 1.0322, "step": 138440 }, { "epoch": 0.8845175881323231, "grad_norm": 1.4993237257003784, "learning_rate": 5.904695648384617e-05, "loss": 0.6984, "step": 138450 }, { "epoch": 0.8845814752820618, "grad_norm": 4.328673839569092, "learning_rate": 5.904202157371288e-05, "loss": 0.8778, "step": 138460 }, { "epoch": 0.8846453624318005, "grad_norm": 0.7862850427627563, "learning_rate": 5.903708657251861e-05, "loss": 0.8364, "step": 138470 }, { "epoch": 0.8847092495815392, "grad_norm": 1.8233660459518433, "learning_rate": 5.903215148031307e-05, "loss": 0.8774, "step": 138480 }, { "epoch": 0.8847731367312779, "grad_norm": 0.7515198588371277, "learning_rate": 5.902721629714595e-05, "loss": 1.2306, "step": 138490 }, { "epoch": 0.8848370238810166, "grad_norm": 1.1745033264160156, "learning_rate": 5.902228102306695e-05, "loss": 0.9555, "step": 138500 }, { "epoch": 0.8849009110307553, "grad_norm": 1.0315542221069336, "learning_rate": 5.901734565812577e-05, "loss": 0.987, "step": 138510 }, { "epoch": 0.884964798180494, "grad_norm": 1.1841830015182495, "learning_rate": 5.9012410202372114e-05, "loss": 1.1246, "step": 138520 }, { "epoch": 0.8850286853302327, "grad_norm": 1.072008490562439, "learning_rate": 5.9007474655855696e-05, "loss": 0.8357, "step": 138530 }, { "epoch": 0.8850925724799714, "grad_norm": 1.0678666830062866, "learning_rate": 5.900253901862621e-05, "loss": 0.7345, "step": 138540 }, { "epoch": 0.8851564596297101, "grad_norm": 0.83828204870224, "learning_rate": 5.899760329073338e-05, "loss": 0.8972, "step": 138550 }, { "epoch": 0.8852203467794488, "grad_norm": 0.9922822713851929, "learning_rate": 5.899266747222689e-05, "loss": 0.7582, "step": 138560 }, { "epoch": 0.8852842339291875, "grad_norm": 2.141287088394165, "learning_rate": 5.8987731563156464e-05, "loss": 1.1712, "step": 138570 }, { "epoch": 0.8853481210789261, "grad_norm": 0.8751981258392334, "learning_rate": 5.89827955635718e-05, "loss": 0.9515, "step": 138580 }, { "epoch": 0.8854120082286648, "grad_norm": 0.6795740723609924, "learning_rate": 5.897785947352262e-05, "loss": 0.7279, "step": 138590 }, { "epoch": 0.8854758953784035, "grad_norm": 0.8922616839408875, "learning_rate": 5.8972923293058636e-05, "loss": 1.0773, "step": 138600 }, { "epoch": 0.8855397825281422, "grad_norm": 0.8627411127090454, "learning_rate": 5.896798702222953e-05, "loss": 1.0776, "step": 138610 }, { "epoch": 0.885603669677881, "grad_norm": 0.6423478126525879, "learning_rate": 5.896305066108504e-05, "loss": 1.0121, "step": 138620 }, { "epoch": 0.8856675568276197, "grad_norm": 0.9135613441467285, "learning_rate": 5.895811420967489e-05, "loss": 0.7514, "step": 138630 }, { "epoch": 0.8857314439773584, "grad_norm": 1.0383354425430298, "learning_rate": 5.895317766804877e-05, "loss": 0.9648, "step": 138640 }, { "epoch": 0.8857953311270971, "grad_norm": 1.2800050973892212, "learning_rate": 5.89482410362564e-05, "loss": 0.7818, "step": 138650 }, { "epoch": 0.8858592182768358, "grad_norm": 0.8451805710792542, "learning_rate": 5.894330431434751e-05, "loss": 0.9926, "step": 138660 }, { "epoch": 0.8859231054265745, "grad_norm": 0.9237948060035706, "learning_rate": 5.893836750237181e-05, "loss": 0.9855, "step": 138670 }, { "epoch": 0.8859869925763132, "grad_norm": 0.6071786880493164, "learning_rate": 5.893343060037902e-05, "loss": 0.8057, "step": 138680 }, { "epoch": 0.8860508797260519, "grad_norm": 1.0154786109924316, "learning_rate": 5.892849360841886e-05, "loss": 0.9599, "step": 138690 }, { "epoch": 0.8861147668757906, "grad_norm": 1.8094230890274048, "learning_rate": 5.892355652654102e-05, "loss": 0.9918, "step": 138700 }, { "epoch": 0.8861786540255293, "grad_norm": 0.8188693523406982, "learning_rate": 5.891861935479527e-05, "loss": 0.857, "step": 138710 }, { "epoch": 0.886242541175268, "grad_norm": 0.6113899350166321, "learning_rate": 5.891368209323129e-05, "loss": 1.092, "step": 138720 }, { "epoch": 0.8863064283250067, "grad_norm": 0.7425000667572021, "learning_rate": 5.8908744741898846e-05, "loss": 0.7412, "step": 138730 }, { "epoch": 0.8863703154747454, "grad_norm": 1.9018378257751465, "learning_rate": 5.8903807300847627e-05, "loss": 0.889, "step": 138740 }, { "epoch": 0.8864342026244841, "grad_norm": 1.2525657415390015, "learning_rate": 5.889886977012735e-05, "loss": 0.901, "step": 138750 }, { "epoch": 0.8864980897742228, "grad_norm": 0.6941089630126953, "learning_rate": 5.8893932149787764e-05, "loss": 0.9275, "step": 138760 }, { "epoch": 0.8865619769239615, "grad_norm": 0.9113277196884155, "learning_rate": 5.8888994439878584e-05, "loss": 0.9316, "step": 138770 }, { "epoch": 0.8866258640737003, "grad_norm": 1.0078089237213135, "learning_rate": 5.888405664044953e-05, "loss": 0.9862, "step": 138780 }, { "epoch": 0.886689751223439, "grad_norm": 0.7824812531471252, "learning_rate": 5.887911875155036e-05, "loss": 0.8826, "step": 138790 }, { "epoch": 0.8867536383731777, "grad_norm": 0.5827013254165649, "learning_rate": 5.887418077323077e-05, "loss": 0.6422, "step": 138800 }, { "epoch": 0.8868175255229164, "grad_norm": 1.1789437532424927, "learning_rate": 5.886924270554051e-05, "loss": 1.123, "step": 138810 }, { "epoch": 0.886881412672655, "grad_norm": 0.9129090905189514, "learning_rate": 5.886430454852929e-05, "loss": 0.9861, "step": 138820 }, { "epoch": 0.8869452998223937, "grad_norm": 0.82326340675354, "learning_rate": 5.885936630224686e-05, "loss": 0.8269, "step": 138830 }, { "epoch": 0.8870091869721324, "grad_norm": 2.5597972869873047, "learning_rate": 5.885442796674295e-05, "loss": 0.9155, "step": 138840 }, { "epoch": 0.8870730741218711, "grad_norm": 1.1469552516937256, "learning_rate": 5.8849489542067296e-05, "loss": 0.806, "step": 138850 }, { "epoch": 0.8871369612716098, "grad_norm": 0.7060733437538147, "learning_rate": 5.8844551028269625e-05, "loss": 1.0475, "step": 138860 }, { "epoch": 0.8872008484213485, "grad_norm": 0.5902007222175598, "learning_rate": 5.883961242539966e-05, "loss": 0.9141, "step": 138870 }, { "epoch": 0.8872647355710872, "grad_norm": 1.3339205980300903, "learning_rate": 5.883467373350716e-05, "loss": 0.8036, "step": 138880 }, { "epoch": 0.8873286227208259, "grad_norm": 0.8260666728019714, "learning_rate": 5.882973495264186e-05, "loss": 0.7641, "step": 138890 }, { "epoch": 0.8873925098705646, "grad_norm": 1.3517704010009766, "learning_rate": 5.8824796082853485e-05, "loss": 0.7486, "step": 138900 }, { "epoch": 0.8874563970203033, "grad_norm": 0.7072157859802246, "learning_rate": 5.8819857124191766e-05, "loss": 1.1322, "step": 138910 }, { "epoch": 0.887520284170042, "grad_norm": 0.7836194634437561, "learning_rate": 5.881491807670647e-05, "loss": 1.0416, "step": 138920 }, { "epoch": 0.8875841713197807, "grad_norm": 0.8094397783279419, "learning_rate": 5.880997894044732e-05, "loss": 0.7803, "step": 138930 }, { "epoch": 0.8876480584695194, "grad_norm": 0.9594300985336304, "learning_rate": 5.880503971546406e-05, "loss": 0.6825, "step": 138940 }, { "epoch": 0.8877119456192581, "grad_norm": 0.7078715562820435, "learning_rate": 5.8800100401806436e-05, "loss": 0.7998, "step": 138950 }, { "epoch": 0.8877758327689969, "grad_norm": 1.1923208236694336, "learning_rate": 5.879516099952418e-05, "loss": 1.1095, "step": 138960 }, { "epoch": 0.8878397199187356, "grad_norm": 0.8840433955192566, "learning_rate": 5.8790221508667045e-05, "loss": 0.8077, "step": 138970 }, { "epoch": 0.8879036070684743, "grad_norm": 1.028594732284546, "learning_rate": 5.878528192928479e-05, "loss": 0.8315, "step": 138980 }, { "epoch": 0.887967494218213, "grad_norm": 0.873859703540802, "learning_rate": 5.878034226142712e-05, "loss": 0.8896, "step": 138990 }, { "epoch": 0.8880313813679517, "grad_norm": 1.0538140535354614, "learning_rate": 5.877540250514383e-05, "loss": 0.7489, "step": 139000 }, { "epoch": 0.8880952685176904, "grad_norm": 1.347963571548462, "learning_rate": 5.8770462660484625e-05, "loss": 0.836, "step": 139010 }, { "epoch": 0.8881591556674291, "grad_norm": 0.8959457874298096, "learning_rate": 5.876552272749929e-05, "loss": 0.7588, "step": 139020 }, { "epoch": 0.8882230428171678, "grad_norm": 0.6587477922439575, "learning_rate": 5.876058270623756e-05, "loss": 0.7995, "step": 139030 }, { "epoch": 0.8882869299669065, "grad_norm": 0.7410009503364563, "learning_rate": 5.8755642596749164e-05, "loss": 0.8671, "step": 139040 }, { "epoch": 0.8883508171166452, "grad_norm": 0.96707683801651, "learning_rate": 5.875070239908389e-05, "loss": 0.7018, "step": 139050 }, { "epoch": 0.8884147042663839, "grad_norm": 0.7956843972206116, "learning_rate": 5.8745762113291455e-05, "loss": 0.7706, "step": 139060 }, { "epoch": 0.8884785914161225, "grad_norm": 0.9614824652671814, "learning_rate": 5.874082173942165e-05, "loss": 0.8501, "step": 139070 }, { "epoch": 0.8885424785658612, "grad_norm": 1.660465121269226, "learning_rate": 5.8735881277524195e-05, "loss": 1.0422, "step": 139080 }, { "epoch": 0.8886063657155999, "grad_norm": 0.7335018515586853, "learning_rate": 5.8730940727648864e-05, "loss": 1.0635, "step": 139090 }, { "epoch": 0.8886702528653386, "grad_norm": 0.8188953399658203, "learning_rate": 5.87260000898454e-05, "loss": 0.9997, "step": 139100 }, { "epoch": 0.8887341400150773, "grad_norm": 0.44217541813850403, "learning_rate": 5.8721059364163564e-05, "loss": 0.9746, "step": 139110 }, { "epoch": 0.888798027164816, "grad_norm": 1.0299861431121826, "learning_rate": 5.871611855065313e-05, "loss": 1.0003, "step": 139120 }, { "epoch": 0.8888619143145547, "grad_norm": 0.9896953105926514, "learning_rate": 5.871117764936382e-05, "loss": 0.8213, "step": 139130 }, { "epoch": 0.8889258014642935, "grad_norm": 1.2226732969284058, "learning_rate": 5.870623666034544e-05, "loss": 0.8156, "step": 139140 }, { "epoch": 0.8889896886140322, "grad_norm": 0.6431970000267029, "learning_rate": 5.87012955836477e-05, "loss": 0.9195, "step": 139150 }, { "epoch": 0.8890535757637709, "grad_norm": 1.434300422668457, "learning_rate": 5.86963544193204e-05, "loss": 0.7808, "step": 139160 }, { "epoch": 0.8891174629135096, "grad_norm": 0.8103228807449341, "learning_rate": 5.869141316741328e-05, "loss": 0.9496, "step": 139170 }, { "epoch": 0.8891813500632483, "grad_norm": 0.9674835801124573, "learning_rate": 5.868647182797612e-05, "loss": 0.9964, "step": 139180 }, { "epoch": 0.889245237212987, "grad_norm": 0.9061577916145325, "learning_rate": 5.868153040105867e-05, "loss": 0.9738, "step": 139190 }, { "epoch": 0.8893091243627257, "grad_norm": 0.7973967790603638, "learning_rate": 5.8676588886710695e-05, "loss": 0.8598, "step": 139200 }, { "epoch": 0.8893730115124644, "grad_norm": 1.2977885007858276, "learning_rate": 5.867164728498197e-05, "loss": 0.9625, "step": 139210 }, { "epoch": 0.8894368986622031, "grad_norm": 0.897599458694458, "learning_rate": 5.866670559592226e-05, "loss": 1.0682, "step": 139220 }, { "epoch": 0.8895007858119418, "grad_norm": 0.9015941023826599, "learning_rate": 5.8661763819581314e-05, "loss": 0.7841, "step": 139230 }, { "epoch": 0.8895646729616805, "grad_norm": 1.129776120185852, "learning_rate": 5.865682195600892e-05, "loss": 1.0793, "step": 139240 }, { "epoch": 0.8896285601114192, "grad_norm": 0.8388169407844543, "learning_rate": 5.865188000525484e-05, "loss": 0.9023, "step": 139250 }, { "epoch": 0.8896924472611579, "grad_norm": 0.8551932573318481, "learning_rate": 5.864693796736884e-05, "loss": 0.7204, "step": 139260 }, { "epoch": 0.8897563344108966, "grad_norm": 0.8071838021278381, "learning_rate": 5.86419958424007e-05, "loss": 0.7368, "step": 139270 }, { "epoch": 0.8898202215606353, "grad_norm": 0.644696831703186, "learning_rate": 5.863705363040017e-05, "loss": 0.8828, "step": 139280 }, { "epoch": 0.889884108710374, "grad_norm": 0.895206868648529, "learning_rate": 5.863211133141705e-05, "loss": 0.8151, "step": 139290 }, { "epoch": 0.8899479958601128, "grad_norm": 1.169389009475708, "learning_rate": 5.8627168945501096e-05, "loss": 1.0632, "step": 139300 }, { "epoch": 0.8900118830098513, "grad_norm": 0.695212721824646, "learning_rate": 5.862222647270208e-05, "loss": 0.884, "step": 139310 }, { "epoch": 0.89007577015959, "grad_norm": 0.8649401664733887, "learning_rate": 5.8617283913069796e-05, "loss": 0.9877, "step": 139320 }, { "epoch": 0.8901396573093288, "grad_norm": 1.0308243036270142, "learning_rate": 5.8612341266654015e-05, "loss": 0.9435, "step": 139330 }, { "epoch": 0.8902035444590675, "grad_norm": 0.9423206448554993, "learning_rate": 5.86073985335045e-05, "loss": 0.8339, "step": 139340 }, { "epoch": 0.8902674316088062, "grad_norm": 0.6064127087593079, "learning_rate": 5.860245571367102e-05, "loss": 0.8482, "step": 139350 }, { "epoch": 0.8903313187585449, "grad_norm": 0.6485791206359863, "learning_rate": 5.8597512807203393e-05, "loss": 0.8751, "step": 139360 }, { "epoch": 0.8903952059082836, "grad_norm": 4.558902740478516, "learning_rate": 5.859256981415135e-05, "loss": 0.9094, "step": 139370 }, { "epoch": 0.8904590930580223, "grad_norm": 0.827876091003418, "learning_rate": 5.858762673456472e-05, "loss": 1.1908, "step": 139380 }, { "epoch": 0.890522980207761, "grad_norm": 0.8407139182090759, "learning_rate": 5.858268356849325e-05, "loss": 0.8698, "step": 139390 }, { "epoch": 0.8905868673574997, "grad_norm": 1.8288122415542603, "learning_rate": 5.857774031598673e-05, "loss": 1.018, "step": 139400 }, { "epoch": 0.8906507545072384, "grad_norm": 0.8015510439872742, "learning_rate": 5.8572796977094936e-05, "loss": 0.803, "step": 139410 }, { "epoch": 0.8907146416569771, "grad_norm": 1.08255934715271, "learning_rate": 5.856785355186767e-05, "loss": 0.8632, "step": 139420 }, { "epoch": 0.8907785288067158, "grad_norm": 1.29863440990448, "learning_rate": 5.8562910040354705e-05, "loss": 0.8677, "step": 139430 }, { "epoch": 0.8908424159564545, "grad_norm": 1.1884722709655762, "learning_rate": 5.855796644260583e-05, "loss": 0.8054, "step": 139440 }, { "epoch": 0.8909063031061932, "grad_norm": 1.657820463180542, "learning_rate": 5.8553022758670816e-05, "loss": 0.866, "step": 139450 }, { "epoch": 0.8909701902559319, "grad_norm": 1.095188021659851, "learning_rate": 5.8548078988599484e-05, "loss": 0.8458, "step": 139460 }, { "epoch": 0.8910340774056706, "grad_norm": 0.8800215125083923, "learning_rate": 5.8543135132441585e-05, "loss": 0.7631, "step": 139470 }, { "epoch": 0.8910979645554093, "grad_norm": 0.9168996214866638, "learning_rate": 5.8538191190246924e-05, "loss": 0.8652, "step": 139480 }, { "epoch": 0.8911618517051481, "grad_norm": 0.6553764343261719, "learning_rate": 5.85332471620653e-05, "loss": 0.7113, "step": 139490 }, { "epoch": 0.8912257388548868, "grad_norm": 1.1859967708587646, "learning_rate": 5.85283030479465e-05, "loss": 0.7221, "step": 139500 }, { "epoch": 0.8912896260046255, "grad_norm": 0.7039145827293396, "learning_rate": 5.852335884794029e-05, "loss": 0.6689, "step": 139510 }, { "epoch": 0.8913535131543642, "grad_norm": 2.0129079818725586, "learning_rate": 5.85184145620965e-05, "loss": 0.8524, "step": 139520 }, { "epoch": 0.8914174003041029, "grad_norm": 0.9877476692199707, "learning_rate": 5.8513470190464905e-05, "loss": 0.8462, "step": 139530 }, { "epoch": 0.8914812874538416, "grad_norm": 0.46770796179771423, "learning_rate": 5.8508525733095285e-05, "loss": 1.1589, "step": 139540 }, { "epoch": 0.8915451746035802, "grad_norm": 1.1145647764205933, "learning_rate": 5.8503581190037474e-05, "loss": 1.1228, "step": 139550 }, { "epoch": 0.8916090617533189, "grad_norm": 1.5091100931167603, "learning_rate": 5.8498636561341224e-05, "loss": 0.7566, "step": 139560 }, { "epoch": 0.8916729489030576, "grad_norm": 0.8867336511611938, "learning_rate": 5.849369184705635e-05, "loss": 0.8833, "step": 139570 }, { "epoch": 0.8917368360527963, "grad_norm": 1.0336995124816895, "learning_rate": 5.8488747047232675e-05, "loss": 0.9395, "step": 139580 }, { "epoch": 0.891800723202535, "grad_norm": 1.3706622123718262, "learning_rate": 5.848380216191995e-05, "loss": 0.7776, "step": 139590 }, { "epoch": 0.8918646103522737, "grad_norm": 1.2319433689117432, "learning_rate": 5.8478857191168e-05, "loss": 0.7916, "step": 139600 }, { "epoch": 0.8919284975020124, "grad_norm": 0.5949766039848328, "learning_rate": 5.847391213502663e-05, "loss": 0.7991, "step": 139610 }, { "epoch": 0.8919923846517511, "grad_norm": 0.7637538313865662, "learning_rate": 5.846896699354564e-05, "loss": 0.8839, "step": 139620 }, { "epoch": 0.8920562718014898, "grad_norm": 1.117910385131836, "learning_rate": 5.846402176677481e-05, "loss": 0.7672, "step": 139630 }, { "epoch": 0.8921201589512285, "grad_norm": 1.7316735982894897, "learning_rate": 5.845907645476397e-05, "loss": 1.0049, "step": 139640 }, { "epoch": 0.8921840461009672, "grad_norm": 1.0572848320007324, "learning_rate": 5.8454131057562914e-05, "loss": 1.3189, "step": 139650 }, { "epoch": 0.892247933250706, "grad_norm": 1.482458233833313, "learning_rate": 5.844918557522143e-05, "loss": 0.8126, "step": 139660 }, { "epoch": 0.8923118204004447, "grad_norm": 1.1422396898269653, "learning_rate": 5.8444240007789343e-05, "loss": 0.946, "step": 139670 }, { "epoch": 0.8923757075501834, "grad_norm": 0.6669201254844666, "learning_rate": 5.8439294355316455e-05, "loss": 0.8283, "step": 139680 }, { "epoch": 0.8924395946999221, "grad_norm": 0.7748156785964966, "learning_rate": 5.8434348617852566e-05, "loss": 0.8111, "step": 139690 }, { "epoch": 0.8925034818496608, "grad_norm": 0.7147510051727295, "learning_rate": 5.842940279544751e-05, "loss": 0.7302, "step": 139700 }, { "epoch": 0.8925673689993995, "grad_norm": 0.9036562442779541, "learning_rate": 5.842445688815106e-05, "loss": 0.8618, "step": 139710 }, { "epoch": 0.8926312561491382, "grad_norm": 1.1501970291137695, "learning_rate": 5.841951089601304e-05, "loss": 0.7836, "step": 139720 }, { "epoch": 0.8926951432988769, "grad_norm": 1.0131080150604248, "learning_rate": 5.8414564819083275e-05, "loss": 0.7891, "step": 139730 }, { "epoch": 0.8927590304486156, "grad_norm": 0.6381675601005554, "learning_rate": 5.8409618657411544e-05, "loss": 0.9683, "step": 139740 }, { "epoch": 0.8928229175983543, "grad_norm": 0.8520289063453674, "learning_rate": 5.840467241104769e-05, "loss": 0.8815, "step": 139750 }, { "epoch": 0.892886804748093, "grad_norm": 0.7620411515235901, "learning_rate": 5.8399726080041504e-05, "loss": 0.8859, "step": 139760 }, { "epoch": 0.8929506918978317, "grad_norm": 0.7203412652015686, "learning_rate": 5.839477966444282e-05, "loss": 0.863, "step": 139770 }, { "epoch": 0.8930145790475704, "grad_norm": 1.159543752670288, "learning_rate": 5.8389833164301445e-05, "loss": 0.7974, "step": 139780 }, { "epoch": 0.8930784661973091, "grad_norm": 0.6249431371688843, "learning_rate": 5.838488657966717e-05, "loss": 0.954, "step": 139790 }, { "epoch": 0.8931423533470477, "grad_norm": 0.8362451195716858, "learning_rate": 5.8379939910589854e-05, "loss": 0.8083, "step": 139800 }, { "epoch": 0.8932062404967864, "grad_norm": 1.5072931051254272, "learning_rate": 5.8374993157119296e-05, "loss": 1.3744, "step": 139810 }, { "epoch": 0.8932701276465251, "grad_norm": 0.9383344054222107, "learning_rate": 5.8370046319305296e-05, "loss": 0.8008, "step": 139820 }, { "epoch": 0.8933340147962638, "grad_norm": 0.8047425150871277, "learning_rate": 5.8365099397197695e-05, "loss": 1.0529, "step": 139830 }, { "epoch": 0.8933979019460025, "grad_norm": 0.8353585600852966, "learning_rate": 5.8360152390846304e-05, "loss": 0.6732, "step": 139840 }, { "epoch": 0.8934617890957413, "grad_norm": 1.0151777267456055, "learning_rate": 5.835520530030094e-05, "loss": 0.7437, "step": 139850 }, { "epoch": 0.89352567624548, "grad_norm": 0.9449456930160522, "learning_rate": 5.8350258125611436e-05, "loss": 0.8322, "step": 139860 }, { "epoch": 0.8935895633952187, "grad_norm": 1.3340734243392944, "learning_rate": 5.834531086682762e-05, "loss": 0.9176, "step": 139870 }, { "epoch": 0.8936534505449574, "grad_norm": 0.7839272022247314, "learning_rate": 5.834036352399929e-05, "loss": 0.8046, "step": 139880 }, { "epoch": 0.8937173376946961, "grad_norm": 1.2315632104873657, "learning_rate": 5.833541609717629e-05, "loss": 0.9361, "step": 139890 }, { "epoch": 0.8937812248444348, "grad_norm": 1.0572025775909424, "learning_rate": 5.833046858640844e-05, "loss": 0.7237, "step": 139900 }, { "epoch": 0.8938451119941735, "grad_norm": 0.9382676482200623, "learning_rate": 5.832552099174556e-05, "loss": 0.8231, "step": 139910 }, { "epoch": 0.8939089991439122, "grad_norm": 1.3315147161483765, "learning_rate": 5.832057331323748e-05, "loss": 0.8058, "step": 139920 }, { "epoch": 0.8939728862936509, "grad_norm": 0.7122629284858704, "learning_rate": 5.8316120330933764e-05, "loss": 1.0295, "step": 139930 }, { "epoch": 0.8940367734433896, "grad_norm": 0.9100248217582703, "learning_rate": 5.831117249325708e-05, "loss": 1.1005, "step": 139940 }, { "epoch": 0.8941006605931283, "grad_norm": 1.913546085357666, "learning_rate": 5.830622457187971e-05, "loss": 0.9199, "step": 139950 }, { "epoch": 0.894164547742867, "grad_norm": 0.9733704328536987, "learning_rate": 5.830127656685145e-05, "loss": 0.8767, "step": 139960 }, { "epoch": 0.8942284348926057, "grad_norm": 1.0809566974639893, "learning_rate": 5.8296328478222174e-05, "loss": 0.7217, "step": 139970 }, { "epoch": 0.8942923220423444, "grad_norm": 1.1782524585723877, "learning_rate": 5.8291380306041685e-05, "loss": 1.0244, "step": 139980 }, { "epoch": 0.8943562091920831, "grad_norm": 0.9064955711364746, "learning_rate": 5.828643205035982e-05, "loss": 0.8093, "step": 139990 }, { "epoch": 0.8944200963418218, "grad_norm": 1.3655163049697876, "learning_rate": 5.828148371122643e-05, "loss": 0.9088, "step": 140000 }, { "epoch": 0.8944839834915606, "grad_norm": 0.760166585445404, "learning_rate": 5.8276535288691325e-05, "loss": 0.8999, "step": 140010 }, { "epoch": 0.8945478706412993, "grad_norm": 1.0654029846191406, "learning_rate": 5.8271586782804344e-05, "loss": 0.9849, "step": 140020 }, { "epoch": 0.894611757791038, "grad_norm": 0.7643817067146301, "learning_rate": 5.826663819361534e-05, "loss": 1.1517, "step": 140030 }, { "epoch": 0.8946756449407766, "grad_norm": 0.8907740116119385, "learning_rate": 5.8261689521174136e-05, "loss": 1.0153, "step": 140040 }, { "epoch": 0.8947395320905153, "grad_norm": 0.8669731616973877, "learning_rate": 5.825674076553056e-05, "loss": 0.9049, "step": 140050 }, { "epoch": 0.894803419240254, "grad_norm": 0.9580491185188293, "learning_rate": 5.8251791926734464e-05, "loss": 0.885, "step": 140060 }, { "epoch": 0.8948673063899927, "grad_norm": 1.1952874660491943, "learning_rate": 5.8246843004835695e-05, "loss": 0.8488, "step": 140070 }, { "epoch": 0.8949311935397314, "grad_norm": 0.9386703372001648, "learning_rate": 5.824189399988408e-05, "loss": 0.8763, "step": 140080 }, { "epoch": 0.8949950806894701, "grad_norm": 0.8566167950630188, "learning_rate": 5.823694491192947e-05, "loss": 0.7872, "step": 140090 }, { "epoch": 0.8950589678392088, "grad_norm": 0.6722133755683899, "learning_rate": 5.8231995741021685e-05, "loss": 0.9128, "step": 140100 }, { "epoch": 0.8951228549889475, "grad_norm": 0.688102662563324, "learning_rate": 5.822704648721059e-05, "loss": 0.8653, "step": 140110 }, { "epoch": 0.8951867421386862, "grad_norm": 0.8262643814086914, "learning_rate": 5.8222097150545996e-05, "loss": 0.9295, "step": 140120 }, { "epoch": 0.8952506292884249, "grad_norm": 0.7746517658233643, "learning_rate": 5.821714773107779e-05, "loss": 0.9269, "step": 140130 }, { "epoch": 0.8953145164381636, "grad_norm": 0.5757784247398376, "learning_rate": 5.82121982288558e-05, "loss": 0.9843, "step": 140140 }, { "epoch": 0.8953784035879023, "grad_norm": 0.8565959334373474, "learning_rate": 5.8207248643929854e-05, "loss": 0.9264, "step": 140150 }, { "epoch": 0.895442290737641, "grad_norm": 0.7413806319236755, "learning_rate": 5.820229897634983e-05, "loss": 1.0038, "step": 140160 }, { "epoch": 0.8955061778873797, "grad_norm": 0.862273633480072, "learning_rate": 5.8197349226165556e-05, "loss": 1.0254, "step": 140170 }, { "epoch": 0.8955700650371184, "grad_norm": 1.1083346605300903, "learning_rate": 5.8192399393426874e-05, "loss": 1.0992, "step": 140180 }, { "epoch": 0.8956339521868572, "grad_norm": 0.8349512219429016, "learning_rate": 5.818744947818367e-05, "loss": 0.9434, "step": 140190 }, { "epoch": 0.8956978393365959, "grad_norm": 0.8543719053268433, "learning_rate": 5.818249948048573e-05, "loss": 0.7931, "step": 140200 }, { "epoch": 0.8957617264863346, "grad_norm": 0.930448055267334, "learning_rate": 5.817754940038296e-05, "loss": 0.9503, "step": 140210 }, { "epoch": 0.8958256136360733, "grad_norm": 1.1186769008636475, "learning_rate": 5.8172599237925195e-05, "loss": 0.9436, "step": 140220 }, { "epoch": 0.895889500785812, "grad_norm": 0.9526256322860718, "learning_rate": 5.8167648993162285e-05, "loss": 0.7081, "step": 140230 }, { "epoch": 0.8959533879355507, "grad_norm": 1.0328584909439087, "learning_rate": 5.816269866614408e-05, "loss": 0.9017, "step": 140240 }, { "epoch": 0.8960172750852894, "grad_norm": 0.8597428798675537, "learning_rate": 5.815774825692044e-05, "loss": 0.8572, "step": 140250 }, { "epoch": 0.8960811622350281, "grad_norm": 1.493808388710022, "learning_rate": 5.815279776554121e-05, "loss": 0.728, "step": 140260 }, { "epoch": 0.8961450493847668, "grad_norm": 0.6569556593894958, "learning_rate": 5.814784719205626e-05, "loss": 0.7934, "step": 140270 }, { "epoch": 0.8962089365345054, "grad_norm": 1.5247915983200073, "learning_rate": 5.814289653651544e-05, "loss": 0.9852, "step": 140280 }, { "epoch": 0.8962728236842441, "grad_norm": 0.8611108064651489, "learning_rate": 5.8137945798968606e-05, "loss": 0.7608, "step": 140290 }, { "epoch": 0.8963367108339828, "grad_norm": 1.3073726892471313, "learning_rate": 5.813299497946562e-05, "loss": 1.0608, "step": 140300 }, { "epoch": 0.8964005979837215, "grad_norm": 0.9002431631088257, "learning_rate": 5.812804407805633e-05, "loss": 0.9049, "step": 140310 }, { "epoch": 0.8964644851334602, "grad_norm": 0.9179620742797852, "learning_rate": 5.8123093094790603e-05, "loss": 0.761, "step": 140320 }, { "epoch": 0.8965283722831989, "grad_norm": 1.2235110998153687, "learning_rate": 5.8118142029718303e-05, "loss": 0.7735, "step": 140330 }, { "epoch": 0.8965922594329376, "grad_norm": 0.7875511646270752, "learning_rate": 5.811319088288931e-05, "loss": 1.0747, "step": 140340 }, { "epoch": 0.8966561465826763, "grad_norm": 1.0100558996200562, "learning_rate": 5.8108239654353444e-05, "loss": 0.8439, "step": 140350 }, { "epoch": 0.896720033732415, "grad_norm": 0.727079451084137, "learning_rate": 5.81032883441606e-05, "loss": 0.6591, "step": 140360 }, { "epoch": 0.8967839208821538, "grad_norm": 0.9995219707489014, "learning_rate": 5.809833695236063e-05, "loss": 1.0365, "step": 140370 }, { "epoch": 0.8968478080318925, "grad_norm": 1.349561095237732, "learning_rate": 5.80933854790034e-05, "loss": 0.8023, "step": 140380 }, { "epoch": 0.8969116951816312, "grad_norm": 0.9812521934509277, "learning_rate": 5.8088433924138785e-05, "loss": 0.6831, "step": 140390 }, { "epoch": 0.8969755823313699, "grad_norm": 0.8825498223304749, "learning_rate": 5.808348228781662e-05, "loss": 0.8826, "step": 140400 }, { "epoch": 0.8970394694811086, "grad_norm": 1.0122778415679932, "learning_rate": 5.807853057008682e-05, "loss": 0.8666, "step": 140410 }, { "epoch": 0.8971033566308473, "grad_norm": 0.6166019439697266, "learning_rate": 5.807357877099922e-05, "loss": 1.0452, "step": 140420 }, { "epoch": 0.897167243780586, "grad_norm": 0.8858250379562378, "learning_rate": 5.806862689060369e-05, "loss": 1.0248, "step": 140430 }, { "epoch": 0.8972311309303247, "grad_norm": 0.8427072167396545, "learning_rate": 5.806367492895011e-05, "loss": 0.7888, "step": 140440 }, { "epoch": 0.8972950180800634, "grad_norm": 0.750184178352356, "learning_rate": 5.805872288608834e-05, "loss": 0.7918, "step": 140450 }, { "epoch": 0.8973589052298021, "grad_norm": 1.128303050994873, "learning_rate": 5.805377076206828e-05, "loss": 0.6939, "step": 140460 }, { "epoch": 0.8974227923795408, "grad_norm": 0.7906418442726135, "learning_rate": 5.804881855693976e-05, "loss": 0.9361, "step": 140470 }, { "epoch": 0.8974866795292795, "grad_norm": 0.7705846428871155, "learning_rate": 5.804386627075268e-05, "loss": 1.2284, "step": 140480 }, { "epoch": 0.8975505666790182, "grad_norm": 0.898186445236206, "learning_rate": 5.803891390355691e-05, "loss": 0.7291, "step": 140490 }, { "epoch": 0.8976144538287569, "grad_norm": 0.7422863841056824, "learning_rate": 5.803396145540232e-05, "loss": 0.7275, "step": 140500 }, { "epoch": 0.8976783409784956, "grad_norm": 1.0972338914871216, "learning_rate": 5.802900892633879e-05, "loss": 0.8772, "step": 140510 }, { "epoch": 0.8977422281282342, "grad_norm": 1.0843368768692017, "learning_rate": 5.8024056316416197e-05, "loss": 0.9729, "step": 140520 }, { "epoch": 0.8978061152779729, "grad_norm": 1.3560301065444946, "learning_rate": 5.801910362568441e-05, "loss": 1.0922, "step": 140530 }, { "epoch": 0.8978700024277116, "grad_norm": 0.6802355647087097, "learning_rate": 5.801415085419332e-05, "loss": 0.851, "step": 140540 }, { "epoch": 0.8979338895774504, "grad_norm": 1.0492808818817139, "learning_rate": 5.800919800199279e-05, "loss": 0.7976, "step": 140550 }, { "epoch": 0.8979977767271891, "grad_norm": 0.4455210864543915, "learning_rate": 5.8004245069132714e-05, "loss": 1.0168, "step": 140560 }, { "epoch": 0.8980616638769278, "grad_norm": 1.461052417755127, "learning_rate": 5.799929205566296e-05, "loss": 0.7563, "step": 140570 }, { "epoch": 0.8981255510266665, "grad_norm": 0.7567191123962402, "learning_rate": 5.799433896163342e-05, "loss": 0.7135, "step": 140580 }, { "epoch": 0.8981894381764052, "grad_norm": 1.1161195039749146, "learning_rate": 5.7989385787093965e-05, "loss": 1.2382, "step": 140590 }, { "epoch": 0.8982533253261439, "grad_norm": 1.0105892419815063, "learning_rate": 5.798443253209449e-05, "loss": 0.9698, "step": 140600 }, { "epoch": 0.8983172124758826, "grad_norm": 0.8450389504432678, "learning_rate": 5.797947919668486e-05, "loss": 1.1442, "step": 140610 }, { "epoch": 0.8983810996256213, "grad_norm": 1.1079736948013306, "learning_rate": 5.797452578091498e-05, "loss": 0.9697, "step": 140620 }, { "epoch": 0.89844498677536, "grad_norm": 1.0912140607833862, "learning_rate": 5.796957228483473e-05, "loss": 0.9725, "step": 140630 }, { "epoch": 0.8985088739250987, "grad_norm": 1.1677342653274536, "learning_rate": 5.7964618708493966e-05, "loss": 0.996, "step": 140640 }, { "epoch": 0.8985727610748374, "grad_norm": 0.8014145493507385, "learning_rate": 5.7959665051942626e-05, "loss": 0.8948, "step": 140650 }, { "epoch": 0.8986366482245761, "grad_norm": 0.5833203792572021, "learning_rate": 5.795471131523057e-05, "loss": 0.8698, "step": 140660 }, { "epoch": 0.8987005353743148, "grad_norm": 1.0062291622161865, "learning_rate": 5.7949757498407686e-05, "loss": 0.8926, "step": 140670 }, { "epoch": 0.8987644225240535, "grad_norm": 0.8668988943099976, "learning_rate": 5.7944803601523866e-05, "loss": 0.9216, "step": 140680 }, { "epoch": 0.8988283096737922, "grad_norm": 0.9266487956047058, "learning_rate": 5.793984962462901e-05, "loss": 0.887, "step": 140690 }, { "epoch": 0.8988921968235309, "grad_norm": 0.897591769695282, "learning_rate": 5.793489556777299e-05, "loss": 0.815, "step": 140700 }, { "epoch": 0.8989560839732696, "grad_norm": 1.11785089969635, "learning_rate": 5.792994143100571e-05, "loss": 0.7505, "step": 140710 }, { "epoch": 0.8990199711230084, "grad_norm": 0.7704851627349854, "learning_rate": 5.7924987214377056e-05, "loss": 0.7002, "step": 140720 }, { "epoch": 0.8990838582727471, "grad_norm": 1.0531551837921143, "learning_rate": 5.7920032917936925e-05, "loss": 0.8227, "step": 140730 }, { "epoch": 0.8991477454224858, "grad_norm": 1.1784263849258423, "learning_rate": 5.791507854173521e-05, "loss": 1.0551, "step": 140740 }, { "epoch": 0.8992116325722245, "grad_norm": 1.1204239130020142, "learning_rate": 5.791012408582182e-05, "loss": 0.7425, "step": 140750 }, { "epoch": 0.8992755197219632, "grad_norm": 1.3130213022232056, "learning_rate": 5.790516955024662e-05, "loss": 0.6903, "step": 140760 }, { "epoch": 0.8993394068717018, "grad_norm": 1.0173828601837158, "learning_rate": 5.790021493505953e-05, "loss": 1.0036, "step": 140770 }, { "epoch": 0.8994032940214405, "grad_norm": 0.8122161030769348, "learning_rate": 5.789526024031044e-05, "loss": 0.909, "step": 140780 }, { "epoch": 0.8994671811711792, "grad_norm": 1.2210887670516968, "learning_rate": 5.7890305466049255e-05, "loss": 0.9721, "step": 140790 }, { "epoch": 0.8995310683209179, "grad_norm": 1.316361665725708, "learning_rate": 5.788535061232586e-05, "loss": 0.8888, "step": 140800 }, { "epoch": 0.8995949554706566, "grad_norm": 0.7551913857460022, "learning_rate": 5.788039567919017e-05, "loss": 1.071, "step": 140810 }, { "epoch": 0.8996588426203953, "grad_norm": 1.1682409048080444, "learning_rate": 5.787544066669207e-05, "loss": 0.7179, "step": 140820 }, { "epoch": 0.899722729770134, "grad_norm": 1.1482545137405396, "learning_rate": 5.787048557488147e-05, "loss": 0.8796, "step": 140830 }, { "epoch": 0.8997866169198727, "grad_norm": 0.6773545145988464, "learning_rate": 5.786553040380828e-05, "loss": 0.8058, "step": 140840 }, { "epoch": 0.8998505040696114, "grad_norm": 1.3889111280441284, "learning_rate": 5.7860575153522375e-05, "loss": 0.7399, "step": 140850 }, { "epoch": 0.8999143912193501, "grad_norm": 1.0887017250061035, "learning_rate": 5.785561982407371e-05, "loss": 0.7941, "step": 140860 }, { "epoch": 0.8999782783690888, "grad_norm": 0.843041718006134, "learning_rate": 5.785066441551212e-05, "loss": 0.9538, "step": 140870 }, { "epoch": 0.9000421655188275, "grad_norm": 0.9653436541557312, "learning_rate": 5.784570892788758e-05, "loss": 1.5095, "step": 140880 }, { "epoch": 0.9001060526685662, "grad_norm": 0.824621319770813, "learning_rate": 5.7840753361249945e-05, "loss": 0.9549, "step": 140890 }, { "epoch": 0.900169939818305, "grad_norm": 0.7692892551422119, "learning_rate": 5.783579771564914e-05, "loss": 0.8578, "step": 140900 }, { "epoch": 0.9002338269680437, "grad_norm": 0.9946816563606262, "learning_rate": 5.7830841991135086e-05, "loss": 0.8378, "step": 140910 }, { "epoch": 0.9002977141177824, "grad_norm": 0.8059331178665161, "learning_rate": 5.782588618775766e-05, "loss": 0.8517, "step": 140920 }, { "epoch": 0.9003616012675211, "grad_norm": 0.7814688086509705, "learning_rate": 5.782093030556681e-05, "loss": 0.8913, "step": 140930 }, { "epoch": 0.9004254884172598, "grad_norm": 0.7162201404571533, "learning_rate": 5.781597434461241e-05, "loss": 0.7852, "step": 140940 }, { "epoch": 0.9004893755669985, "grad_norm": 0.7033975720405579, "learning_rate": 5.78110183049444e-05, "loss": 0.8596, "step": 140950 }, { "epoch": 0.9005532627167372, "grad_norm": 1.4916331768035889, "learning_rate": 5.7806062186612666e-05, "loss": 0.9765, "step": 140960 }, { "epoch": 0.9006171498664759, "grad_norm": 0.9655159115791321, "learning_rate": 5.7801105989667134e-05, "loss": 0.6591, "step": 140970 }, { "epoch": 0.9006810370162146, "grad_norm": 1.061277985572815, "learning_rate": 5.7796149714157724e-05, "loss": 1.0504, "step": 140980 }, { "epoch": 0.9007449241659533, "grad_norm": 0.7868290543556213, "learning_rate": 5.779119336013433e-05, "loss": 0.8025, "step": 140990 }, { "epoch": 0.900808811315692, "grad_norm": 0.7589150667190552, "learning_rate": 5.7786236927646886e-05, "loss": 0.704, "step": 141000 }, { "epoch": 0.9008726984654306, "grad_norm": 0.7982025742530823, "learning_rate": 5.77812804167453e-05, "loss": 0.8338, "step": 141010 }, { "epoch": 0.9009365856151693, "grad_norm": 1.031693458557129, "learning_rate": 5.7776323827479484e-05, "loss": 0.8807, "step": 141020 }, { "epoch": 0.901000472764908, "grad_norm": 0.9753697514533997, "learning_rate": 5.777136715989936e-05, "loss": 0.7684, "step": 141030 }, { "epoch": 0.9010643599146467, "grad_norm": 0.9821022152900696, "learning_rate": 5.776641041405485e-05, "loss": 0.7901, "step": 141040 }, { "epoch": 0.9011282470643854, "grad_norm": 0.5876692533493042, "learning_rate": 5.776145358999587e-05, "loss": 0.8646, "step": 141050 }, { "epoch": 0.9011921342141241, "grad_norm": 1.2983272075653076, "learning_rate": 5.7756496687772346e-05, "loss": 0.721, "step": 141060 }, { "epoch": 0.9012560213638628, "grad_norm": 0.7570874691009521, "learning_rate": 5.775153970743418e-05, "loss": 0.776, "step": 141070 }, { "epoch": 0.9013199085136016, "grad_norm": 0.7375748157501221, "learning_rate": 5.77465826490313e-05, "loss": 0.7279, "step": 141080 }, { "epoch": 0.9013837956633403, "grad_norm": 1.1768232583999634, "learning_rate": 5.774162551261363e-05, "loss": 0.8846, "step": 141090 }, { "epoch": 0.901447682813079, "grad_norm": 0.6223422884941101, "learning_rate": 5.7736668298231103e-05, "loss": 0.8627, "step": 141100 }, { "epoch": 0.9015115699628177, "grad_norm": 0.9526621699333191, "learning_rate": 5.773171100593362e-05, "loss": 1.0312, "step": 141110 }, { "epoch": 0.9015754571125564, "grad_norm": 1.0160198211669922, "learning_rate": 5.772675363577112e-05, "loss": 1.0077, "step": 141120 }, { "epoch": 0.9016393442622951, "grad_norm": 0.5634738802909851, "learning_rate": 5.772179618779354e-05, "loss": 0.8606, "step": 141130 }, { "epoch": 0.9017032314120338, "grad_norm": 0.93465656042099, "learning_rate": 5.7716838662050784e-05, "loss": 1.0614, "step": 141140 }, { "epoch": 0.9017671185617725, "grad_norm": 0.9534331560134888, "learning_rate": 5.7711881058592786e-05, "loss": 0.796, "step": 141150 }, { "epoch": 0.9018310057115112, "grad_norm": 1.255573034286499, "learning_rate": 5.7706923377469477e-05, "loss": 0.7282, "step": 141160 }, { "epoch": 0.9018948928612499, "grad_norm": 0.8945760726928711, "learning_rate": 5.770196561873077e-05, "loss": 0.7917, "step": 141170 }, { "epoch": 0.9019587800109886, "grad_norm": 1.1224944591522217, "learning_rate": 5.769700778242661e-05, "loss": 1.3042, "step": 141180 }, { "epoch": 0.9020226671607273, "grad_norm": 1.421687126159668, "learning_rate": 5.769204986860692e-05, "loss": 0.8137, "step": 141190 }, { "epoch": 0.902086554310466, "grad_norm": 0.8872746229171753, "learning_rate": 5.7687091877321654e-05, "loss": 0.7287, "step": 141200 }, { "epoch": 0.9021504414602047, "grad_norm": 0.8511372804641724, "learning_rate": 5.7682133808620706e-05, "loss": 0.6813, "step": 141210 }, { "epoch": 0.9022143286099434, "grad_norm": 0.7379246354103088, "learning_rate": 5.7677175662554025e-05, "loss": 0.9436, "step": 141220 }, { "epoch": 0.9022782157596821, "grad_norm": 0.7902219891548157, "learning_rate": 5.767221743917155e-05, "loss": 1.0224, "step": 141230 }, { "epoch": 0.9023421029094209, "grad_norm": 1.0444148778915405, "learning_rate": 5.766725913852321e-05, "loss": 0.7918, "step": 141240 }, { "epoch": 0.9024059900591594, "grad_norm": 0.641941249370575, "learning_rate": 5.766230076065893e-05, "loss": 0.749, "step": 141250 }, { "epoch": 0.9024698772088982, "grad_norm": 0.4563290476799011, "learning_rate": 5.7657342305628647e-05, "loss": 0.6918, "step": 141260 }, { "epoch": 0.9025337643586369, "grad_norm": 0.9786915183067322, "learning_rate": 5.765238377348232e-05, "loss": 0.6863, "step": 141270 }, { "epoch": 0.9025976515083756, "grad_norm": 0.9505366086959839, "learning_rate": 5.764742516426985e-05, "loss": 0.8688, "step": 141280 }, { "epoch": 0.9026615386581143, "grad_norm": 0.6907140612602234, "learning_rate": 5.76424664780412e-05, "loss": 0.6706, "step": 141290 }, { "epoch": 0.902725425807853, "grad_norm": 0.9045562148094177, "learning_rate": 5.7637507714846304e-05, "loss": 0.9848, "step": 141300 }, { "epoch": 0.9027893129575917, "grad_norm": 1.1987589597702026, "learning_rate": 5.763254887473512e-05, "loss": 0.8481, "step": 141310 }, { "epoch": 0.9028532001073304, "grad_norm": 1.0016857385635376, "learning_rate": 5.7627589957757535e-05, "loss": 0.9294, "step": 141320 }, { "epoch": 0.9029170872570691, "grad_norm": 0.8415845632553101, "learning_rate": 5.762263096396351e-05, "loss": 0.7345, "step": 141330 }, { "epoch": 0.9029809744068078, "grad_norm": 0.8191704154014587, "learning_rate": 5.761767189340302e-05, "loss": 0.9255, "step": 141340 }, { "epoch": 0.9030448615565465, "grad_norm": 0.7609559893608093, "learning_rate": 5.761271274612597e-05, "loss": 0.7549, "step": 141350 }, { "epoch": 0.9031087487062852, "grad_norm": 0.6478745341300964, "learning_rate": 5.7607753522182326e-05, "loss": 0.882, "step": 141360 }, { "epoch": 0.9031726358560239, "grad_norm": 1.0700238943099976, "learning_rate": 5.7602794221622024e-05, "loss": 0.8373, "step": 141370 }, { "epoch": 0.9032365230057626, "grad_norm": 0.638687252998352, "learning_rate": 5.7597834844495005e-05, "loss": 0.8512, "step": 141380 }, { "epoch": 0.9033004101555013, "grad_norm": 0.8440176248550415, "learning_rate": 5.759287539085121e-05, "loss": 0.8574, "step": 141390 }, { "epoch": 0.90336429730524, "grad_norm": 1.7291699647903442, "learning_rate": 5.7587915860740596e-05, "loss": 0.9808, "step": 141400 }, { "epoch": 0.9034281844549787, "grad_norm": 1.2312055826187134, "learning_rate": 5.758295625421311e-05, "loss": 0.7749, "step": 141410 }, { "epoch": 0.9034920716047175, "grad_norm": 0.8590479493141174, "learning_rate": 5.757799657131868e-05, "loss": 0.9986, "step": 141420 }, { "epoch": 0.9035559587544562, "grad_norm": 0.7374904155731201, "learning_rate": 5.757303681210728e-05, "loss": 0.7731, "step": 141430 }, { "epoch": 0.9036198459041949, "grad_norm": 1.1177715063095093, "learning_rate": 5.756807697662885e-05, "loss": 1.0543, "step": 141440 }, { "epoch": 0.9036837330539336, "grad_norm": 0.9571028351783752, "learning_rate": 5.7563117064933327e-05, "loss": 0.9526, "step": 141450 }, { "epoch": 0.9037476202036723, "grad_norm": 0.8054457902908325, "learning_rate": 5.755815707707067e-05, "loss": 1.079, "step": 141460 }, { "epoch": 0.903811507353411, "grad_norm": 1.010640263557434, "learning_rate": 5.755319701309084e-05, "loss": 0.9706, "step": 141470 }, { "epoch": 0.9038753945031497, "grad_norm": 0.7342219948768616, "learning_rate": 5.7548236873043795e-05, "loss": 0.6875, "step": 141480 }, { "epoch": 0.9039392816528884, "grad_norm": 0.8823431730270386, "learning_rate": 5.754327665697945e-05, "loss": 0.8113, "step": 141490 }, { "epoch": 0.904003168802627, "grad_norm": 0.9259976744651794, "learning_rate": 5.75383163649478e-05, "loss": 0.7433, "step": 141500 }, { "epoch": 0.9040670559523657, "grad_norm": 0.7435508370399475, "learning_rate": 5.753335599699877e-05, "loss": 0.725, "step": 141510 }, { "epoch": 0.9041309431021044, "grad_norm": 1.2026573419570923, "learning_rate": 5.752839555318235e-05, "loss": 0.9269, "step": 141520 }, { "epoch": 0.9041948302518431, "grad_norm": 0.9192835092544556, "learning_rate": 5.752343503354844e-05, "loss": 0.7876, "step": 141530 }, { "epoch": 0.9042587174015818, "grad_norm": 0.9267190098762512, "learning_rate": 5.7518474438147054e-05, "loss": 0.9516, "step": 141540 }, { "epoch": 0.9043226045513205, "grad_norm": 0.72882479429245, "learning_rate": 5.7513513767028124e-05, "loss": 0.8018, "step": 141550 }, { "epoch": 0.9043864917010592, "grad_norm": 0.7397010326385498, "learning_rate": 5.7508553020241606e-05, "loss": 0.7555, "step": 141560 }, { "epoch": 0.9044503788507979, "grad_norm": 1.5903676748275757, "learning_rate": 5.750359219783746e-05, "loss": 0.8326, "step": 141570 }, { "epoch": 0.9045142660005366, "grad_norm": 1.2328916788101196, "learning_rate": 5.749863129986566e-05, "loss": 1.1779, "step": 141580 }, { "epoch": 0.9045781531502753, "grad_norm": 1.1340621709823608, "learning_rate": 5.7493670326376146e-05, "loss": 1.0768, "step": 141590 }, { "epoch": 0.904642040300014, "grad_norm": 0.6127023100852966, "learning_rate": 5.74887092774189e-05, "loss": 0.9385, "step": 141600 }, { "epoch": 0.9047059274497528, "grad_norm": 1.0815454721450806, "learning_rate": 5.748374815304386e-05, "loss": 0.6736, "step": 141610 }, { "epoch": 0.9047698145994915, "grad_norm": 1.3410221338272095, "learning_rate": 5.7478786953301014e-05, "loss": 0.8431, "step": 141620 }, { "epoch": 0.9048337017492302, "grad_norm": 1.1525499820709229, "learning_rate": 5.74738256782403e-05, "loss": 0.872, "step": 141630 }, { "epoch": 0.9048975888989689, "grad_norm": 0.8780061602592468, "learning_rate": 5.74688643279117e-05, "loss": 0.9206, "step": 141640 }, { "epoch": 0.9049614760487076, "grad_norm": 1.385847568511963, "learning_rate": 5.7463902902365174e-05, "loss": 0.8192, "step": 141650 }, { "epoch": 0.9050253631984463, "grad_norm": 1.0178450345993042, "learning_rate": 5.745894140165069e-05, "loss": 0.8582, "step": 141660 }, { "epoch": 0.905089250348185, "grad_norm": 1.3906409740447998, "learning_rate": 5.745397982581822e-05, "loss": 0.79, "step": 141670 }, { "epoch": 0.9051531374979237, "grad_norm": 0.7807207107543945, "learning_rate": 5.7449018174917726e-05, "loss": 0.721, "step": 141680 }, { "epoch": 0.9052170246476624, "grad_norm": 0.7866250872612, "learning_rate": 5.744405644899916e-05, "loss": 0.7746, "step": 141690 }, { "epoch": 0.9052809117974011, "grad_norm": 0.7729105949401855, "learning_rate": 5.74390946481125e-05, "loss": 0.682, "step": 141700 }, { "epoch": 0.9053447989471398, "grad_norm": 1.1368606090545654, "learning_rate": 5.7434132772307735e-05, "loss": 1.2501, "step": 141710 }, { "epoch": 0.9054086860968785, "grad_norm": 2.2718279361724854, "learning_rate": 5.742917082163483e-05, "loss": 0.8217, "step": 141720 }, { "epoch": 0.9054725732466172, "grad_norm": 0.683323323726654, "learning_rate": 5.742420879614373e-05, "loss": 1.092, "step": 141730 }, { "epoch": 0.9055364603963558, "grad_norm": 0.8295241594314575, "learning_rate": 5.741924669588443e-05, "loss": 0.6993, "step": 141740 }, { "epoch": 0.9056003475460945, "grad_norm": 0.8456636667251587, "learning_rate": 5.7414284520906905e-05, "loss": 0.6898, "step": 141750 }, { "epoch": 0.9056642346958332, "grad_norm": 0.8255101442337036, "learning_rate": 5.7409322271261115e-05, "loss": 0.9425, "step": 141760 }, { "epoch": 0.9057281218455719, "grad_norm": 0.7259197235107422, "learning_rate": 5.740435994699704e-05, "loss": 0.7357, "step": 141770 }, { "epoch": 0.9057920089953106, "grad_norm": 0.8073982000350952, "learning_rate": 5.739939754816466e-05, "loss": 1.0821, "step": 141780 }, { "epoch": 0.9058558961450494, "grad_norm": 0.9572609663009644, "learning_rate": 5.7394435074813944e-05, "loss": 0.9942, "step": 141790 }, { "epoch": 0.9059197832947881, "grad_norm": 0.7269392013549805, "learning_rate": 5.738947252699487e-05, "loss": 0.8068, "step": 141800 }, { "epoch": 0.9059836704445268, "grad_norm": 0.710191547870636, "learning_rate": 5.738450990475741e-05, "loss": 0.8417, "step": 141810 }, { "epoch": 0.9060475575942655, "grad_norm": 1.1868984699249268, "learning_rate": 5.7379547208151554e-05, "loss": 0.7552, "step": 141820 }, { "epoch": 0.9061114447440042, "grad_norm": 0.8787739872932434, "learning_rate": 5.737458443722726e-05, "loss": 0.9598, "step": 141830 }, { "epoch": 0.9061753318937429, "grad_norm": 0.5496005415916443, "learning_rate": 5.736962159203453e-05, "loss": 0.801, "step": 141840 }, { "epoch": 0.9062392190434816, "grad_norm": 1.013084053993225, "learning_rate": 5.736465867262333e-05, "loss": 0.9996, "step": 141850 }, { "epoch": 0.9063031061932203, "grad_norm": 0.8402630686759949, "learning_rate": 5.735969567904363e-05, "loss": 0.7736, "step": 141860 }, { "epoch": 0.906366993342959, "grad_norm": 0.7525802850723267, "learning_rate": 5.735473261134545e-05, "loss": 0.6874, "step": 141870 }, { "epoch": 0.9064308804926977, "grad_norm": 0.8910543918609619, "learning_rate": 5.734976946957875e-05, "loss": 0.8928, "step": 141880 }, { "epoch": 0.9064947676424364, "grad_norm": 1.0312716960906982, "learning_rate": 5.7344806253793504e-05, "loss": 0.8406, "step": 141890 }, { "epoch": 0.9065586547921751, "grad_norm": 0.8210422396659851, "learning_rate": 5.733984296403971e-05, "loss": 1.0218, "step": 141900 }, { "epoch": 0.9066225419419138, "grad_norm": 1.0540539026260376, "learning_rate": 5.733487960036735e-05, "loss": 0.9388, "step": 141910 }, { "epoch": 0.9066864290916525, "grad_norm": 1.0182119607925415, "learning_rate": 5.73299161628264e-05, "loss": 0.9436, "step": 141920 }, { "epoch": 0.9067503162413912, "grad_norm": 0.9228383898735046, "learning_rate": 5.732495265146687e-05, "loss": 0.7732, "step": 141930 }, { "epoch": 0.90681420339113, "grad_norm": 0.8174379467964172, "learning_rate": 5.731998906633871e-05, "loss": 1.08, "step": 141940 }, { "epoch": 0.9068780905408687, "grad_norm": 0.807985246181488, "learning_rate": 5.731502540749194e-05, "loss": 0.9834, "step": 141950 }, { "epoch": 0.9069419776906074, "grad_norm": 0.8408271670341492, "learning_rate": 5.7310061674976526e-05, "loss": 0.958, "step": 141960 }, { "epoch": 0.9070058648403461, "grad_norm": 0.7872259616851807, "learning_rate": 5.730509786884247e-05, "loss": 1.0426, "step": 141970 }, { "epoch": 0.9070697519900847, "grad_norm": 0.7922796010971069, "learning_rate": 5.730013398913976e-05, "loss": 1.0874, "step": 141980 }, { "epoch": 0.9071336391398234, "grad_norm": 0.8514977693557739, "learning_rate": 5.729517003591839e-05, "loss": 1.1897, "step": 141990 }, { "epoch": 0.9071975262895621, "grad_norm": 1.1067577600479126, "learning_rate": 5.729020600922833e-05, "loss": 1.1056, "step": 142000 }, { "epoch": 0.9072614134393008, "grad_norm": 1.1628286838531494, "learning_rate": 5.7285241909119606e-05, "loss": 1.0116, "step": 142010 }, { "epoch": 0.9073253005890395, "grad_norm": 1.2236912250518799, "learning_rate": 5.7280277735642184e-05, "loss": 0.7426, "step": 142020 }, { "epoch": 0.9073891877387782, "grad_norm": 1.162598729133606, "learning_rate": 5.727531348884607e-05, "loss": 0.8706, "step": 142030 }, { "epoch": 0.9074530748885169, "grad_norm": 1.4383586645126343, "learning_rate": 5.7270349168781256e-05, "loss": 0.8572, "step": 142040 }, { "epoch": 0.9075169620382556, "grad_norm": 0.8928152322769165, "learning_rate": 5.726538477549774e-05, "loss": 0.8158, "step": 142050 }, { "epoch": 0.9075808491879943, "grad_norm": 0.8349987864494324, "learning_rate": 5.7260420309045507e-05, "loss": 1.1324, "step": 142060 }, { "epoch": 0.907644736337733, "grad_norm": 0.7434611320495605, "learning_rate": 5.725545576947456e-05, "loss": 0.9721, "step": 142070 }, { "epoch": 0.9077086234874717, "grad_norm": 0.7635177373886108, "learning_rate": 5.72504911568349e-05, "loss": 0.8982, "step": 142080 }, { "epoch": 0.9077725106372104, "grad_norm": 0.7675304412841797, "learning_rate": 5.724552647117653e-05, "loss": 0.9434, "step": 142090 }, { "epoch": 0.9078363977869491, "grad_norm": 0.9475330710411072, "learning_rate": 5.724056171254942e-05, "loss": 0.6869, "step": 142100 }, { "epoch": 0.9079002849366878, "grad_norm": 1.1535788774490356, "learning_rate": 5.7235596881003604e-05, "loss": 0.7812, "step": 142110 }, { "epoch": 0.9079641720864265, "grad_norm": 0.9977142214775085, "learning_rate": 5.723063197658907e-05, "loss": 1.0825, "step": 142120 }, { "epoch": 0.9080280592361653, "grad_norm": 0.7173039317131042, "learning_rate": 5.722566699935581e-05, "loss": 0.843, "step": 142130 }, { "epoch": 0.908091946385904, "grad_norm": 1.8091074228286743, "learning_rate": 5.7220701949353825e-05, "loss": 0.8146, "step": 142140 }, { "epoch": 0.9081558335356427, "grad_norm": 1.055882453918457, "learning_rate": 5.7215736826633135e-05, "loss": 0.7627, "step": 142150 }, { "epoch": 0.9082197206853814, "grad_norm": 0.6005893349647522, "learning_rate": 5.721077163124373e-05, "loss": 1.0631, "step": 142160 }, { "epoch": 0.9082836078351201, "grad_norm": 0.9407183527946472, "learning_rate": 5.7205806363235616e-05, "loss": 0.982, "step": 142170 }, { "epoch": 0.9083474949848588, "grad_norm": 1.6951545476913452, "learning_rate": 5.7200841022658804e-05, "loss": 0.8731, "step": 142180 }, { "epoch": 0.9084113821345975, "grad_norm": 1.102827787399292, "learning_rate": 5.719587560956327e-05, "loss": 0.9827, "step": 142190 }, { "epoch": 0.9084752692843362, "grad_norm": 1.1529837846755981, "learning_rate": 5.719091012399907e-05, "loss": 0.8706, "step": 142200 }, { "epoch": 0.9085391564340749, "grad_norm": 0.8350210785865784, "learning_rate": 5.718594456601618e-05, "loss": 0.8377, "step": 142210 }, { "epoch": 0.9086030435838135, "grad_norm": 0.9454380869865417, "learning_rate": 5.71809789356646e-05, "loss": 0.893, "step": 142220 }, { "epoch": 0.9086669307335522, "grad_norm": 0.8189147710800171, "learning_rate": 5.7176013232994354e-05, "loss": 0.9778, "step": 142230 }, { "epoch": 0.9087308178832909, "grad_norm": 0.8861716389656067, "learning_rate": 5.717104745805545e-05, "loss": 1.0042, "step": 142240 }, { "epoch": 0.9087947050330296, "grad_norm": 0.785626232624054, "learning_rate": 5.716608161089789e-05, "loss": 0.8079, "step": 142250 }, { "epoch": 0.9088585921827683, "grad_norm": 1.1369320154190063, "learning_rate": 5.716111569157169e-05, "loss": 0.7916, "step": 142260 }, { "epoch": 0.908922479332507, "grad_norm": 1.1900125741958618, "learning_rate": 5.715614970012686e-05, "loss": 0.9075, "step": 142270 }, { "epoch": 0.9089863664822457, "grad_norm": 1.0034444332122803, "learning_rate": 5.7151183636613425e-05, "loss": 0.9664, "step": 142280 }, { "epoch": 0.9090502536319844, "grad_norm": 0.9154879450798035, "learning_rate": 5.714621750108138e-05, "loss": 0.9571, "step": 142290 }, { "epoch": 0.9091141407817231, "grad_norm": 0.6897664070129395, "learning_rate": 5.714125129358072e-05, "loss": 0.9097, "step": 142300 }, { "epoch": 0.9091780279314619, "grad_norm": 0.6371667385101318, "learning_rate": 5.7136285014161506e-05, "loss": 0.7985, "step": 142310 }, { "epoch": 0.9092419150812006, "grad_norm": 1.1131823062896729, "learning_rate": 5.713131866287371e-05, "loss": 0.9823, "step": 142320 }, { "epoch": 0.9093058022309393, "grad_norm": 1.771092176437378, "learning_rate": 5.712635223976738e-05, "loss": 1.0425, "step": 142330 }, { "epoch": 0.909369689380678, "grad_norm": 1.0553698539733887, "learning_rate": 5.712138574489251e-05, "loss": 0.9531, "step": 142340 }, { "epoch": 0.9094335765304167, "grad_norm": 0.4885224401950836, "learning_rate": 5.711641917829913e-05, "loss": 1.0004, "step": 142350 }, { "epoch": 0.9094974636801554, "grad_norm": 0.8409444093704224, "learning_rate": 5.7111452540037245e-05, "loss": 0.6038, "step": 142360 }, { "epoch": 0.9095613508298941, "grad_norm": 0.7915950417518616, "learning_rate": 5.7106485830156885e-05, "loss": 0.72, "step": 142370 }, { "epoch": 0.9096252379796328, "grad_norm": 0.8521913290023804, "learning_rate": 5.710151904870806e-05, "loss": 0.9703, "step": 142380 }, { "epoch": 0.9096891251293715, "grad_norm": 0.759997546672821, "learning_rate": 5.7096552195740797e-05, "loss": 0.8757, "step": 142390 }, { "epoch": 0.9097530122791102, "grad_norm": 1.0440067052841187, "learning_rate": 5.7091585271305113e-05, "loss": 0.9393, "step": 142400 }, { "epoch": 0.9098168994288489, "grad_norm": 0.5940192341804504, "learning_rate": 5.7086618275451034e-05, "loss": 0.6164, "step": 142410 }, { "epoch": 0.9098807865785876, "grad_norm": 0.9643594622612, "learning_rate": 5.708165120822857e-05, "loss": 0.9354, "step": 142420 }, { "epoch": 0.9099446737283263, "grad_norm": 1.3111646175384521, "learning_rate": 5.707668406968776e-05, "loss": 0.8814, "step": 142430 }, { "epoch": 0.910008560878065, "grad_norm": 0.7643352746963501, "learning_rate": 5.7071716859878624e-05, "loss": 0.7996, "step": 142440 }, { "epoch": 0.9100724480278037, "grad_norm": 0.6600208878517151, "learning_rate": 5.7066749578851163e-05, "loss": 0.9294, "step": 142450 }, { "epoch": 0.9101363351775424, "grad_norm": 0.8577162027359009, "learning_rate": 5.706178222665543e-05, "loss": 0.8142, "step": 142460 }, { "epoch": 0.910200222327281, "grad_norm": 0.6898266077041626, "learning_rate": 5.7056814803341454e-05, "loss": 0.7175, "step": 142470 }, { "epoch": 0.9102641094770197, "grad_norm": 0.6253551840782166, "learning_rate": 5.705184730895924e-05, "loss": 0.8652, "step": 142480 }, { "epoch": 0.9103279966267585, "grad_norm": 1.0927127599716187, "learning_rate": 5.704687974355881e-05, "loss": 0.9197, "step": 142490 }, { "epoch": 0.9103918837764972, "grad_norm": 1.2770670652389526, "learning_rate": 5.7041912107190223e-05, "loss": 0.7483, "step": 142500 }, { "epoch": 0.9104557709262359, "grad_norm": 0.7076462507247925, "learning_rate": 5.703694439990348e-05, "loss": 0.9537, "step": 142510 }, { "epoch": 0.9105196580759746, "grad_norm": 1.3433187007904053, "learning_rate": 5.703197662174863e-05, "loss": 0.8636, "step": 142520 }, { "epoch": 0.9105835452257133, "grad_norm": 1.6730338335037231, "learning_rate": 5.702700877277568e-05, "loss": 0.9131, "step": 142530 }, { "epoch": 0.910647432375452, "grad_norm": 1.130669355392456, "learning_rate": 5.702204085303468e-05, "loss": 0.9168, "step": 142540 }, { "epoch": 0.9107113195251907, "grad_norm": 0.9546806216239929, "learning_rate": 5.7017072862575626e-05, "loss": 0.8348, "step": 142550 }, { "epoch": 0.9107752066749294, "grad_norm": 0.6770340800285339, "learning_rate": 5.701210480144861e-05, "loss": 0.9684, "step": 142560 }, { "epoch": 0.9108390938246681, "grad_norm": 1.0904873609542847, "learning_rate": 5.700713666970361e-05, "loss": 0.8426, "step": 142570 }, { "epoch": 0.9109029809744068, "grad_norm": 0.8043915629386902, "learning_rate": 5.7002168467390694e-05, "loss": 1.1309, "step": 142580 }, { "epoch": 0.9109668681241455, "grad_norm": 0.7632177472114563, "learning_rate": 5.699720019455989e-05, "loss": 0.8359, "step": 142590 }, { "epoch": 0.9110307552738842, "grad_norm": 0.8584951758384705, "learning_rate": 5.699223185126121e-05, "loss": 0.8166, "step": 142600 }, { "epoch": 0.9110946424236229, "grad_norm": 0.704011082649231, "learning_rate": 5.698726343754472e-05, "loss": 0.7772, "step": 142610 }, { "epoch": 0.9111585295733616, "grad_norm": 0.954410970211029, "learning_rate": 5.698229495346044e-05, "loss": 0.9824, "step": 142620 }, { "epoch": 0.9112224167231003, "grad_norm": 1.6050761938095093, "learning_rate": 5.697732639905841e-05, "loss": 0.986, "step": 142630 }, { "epoch": 0.911286303872839, "grad_norm": 0.8036152720451355, "learning_rate": 5.697235777438866e-05, "loss": 1.1253, "step": 142640 }, { "epoch": 0.9113501910225777, "grad_norm": 1.0591254234313965, "learning_rate": 5.6967389079501234e-05, "loss": 0.7433, "step": 142650 }, { "epoch": 0.9114140781723165, "grad_norm": 0.63569176197052, "learning_rate": 5.6962420314446186e-05, "loss": 0.7921, "step": 142660 }, { "epoch": 0.9114779653220552, "grad_norm": 0.6590409278869629, "learning_rate": 5.6957451479273526e-05, "loss": 0.7715, "step": 142670 }, { "epoch": 0.9115418524717939, "grad_norm": 0.8721647262573242, "learning_rate": 5.695248257403332e-05, "loss": 0.9248, "step": 142680 }, { "epoch": 0.9116057396215326, "grad_norm": 0.8576902151107788, "learning_rate": 5.6947513598775605e-05, "loss": 0.8463, "step": 142690 }, { "epoch": 0.9116696267712713, "grad_norm": 1.4896668195724487, "learning_rate": 5.69425445535504e-05, "loss": 0.816, "step": 142700 }, { "epoch": 0.9117335139210099, "grad_norm": 0.9425134062767029, "learning_rate": 5.693757543840779e-05, "loss": 0.9642, "step": 142710 }, { "epoch": 0.9117974010707486, "grad_norm": 0.9548724889755249, "learning_rate": 5.693260625339777e-05, "loss": 0.6938, "step": 142720 }, { "epoch": 0.9118612882204873, "grad_norm": 1.2961103916168213, "learning_rate": 5.692763699857042e-05, "loss": 0.993, "step": 142730 }, { "epoch": 0.911925175370226, "grad_norm": 0.8667069673538208, "learning_rate": 5.692266767397576e-05, "loss": 0.8989, "step": 142740 }, { "epoch": 0.9119890625199647, "grad_norm": 0.678893506526947, "learning_rate": 5.691769827966386e-05, "loss": 0.7032, "step": 142750 }, { "epoch": 0.9120529496697034, "grad_norm": 0.5294567942619324, "learning_rate": 5.6912728815684744e-05, "loss": 0.9135, "step": 142760 }, { "epoch": 0.9121168368194421, "grad_norm": 0.7864146828651428, "learning_rate": 5.690775928208848e-05, "loss": 0.7941, "step": 142770 }, { "epoch": 0.9121807239691808, "grad_norm": 0.7031729817390442, "learning_rate": 5.690278967892511e-05, "loss": 0.8931, "step": 142780 }, { "epoch": 0.9122446111189195, "grad_norm": 0.7443121671676636, "learning_rate": 5.689782000624466e-05, "loss": 0.8238, "step": 142790 }, { "epoch": 0.9123084982686582, "grad_norm": 0.7489047050476074, "learning_rate": 5.68928502640972e-05, "loss": 0.731, "step": 142800 }, { "epoch": 0.9123723854183969, "grad_norm": 1.1270229816436768, "learning_rate": 5.688788045253277e-05, "loss": 0.9834, "step": 142810 }, { "epoch": 0.9124362725681356, "grad_norm": 0.6103757619857788, "learning_rate": 5.688291057160143e-05, "loss": 1.0271, "step": 142820 }, { "epoch": 0.9125001597178743, "grad_norm": 0.7932960987091064, "learning_rate": 5.687794062135322e-05, "loss": 0.7008, "step": 142830 }, { "epoch": 0.912564046867613, "grad_norm": 1.3297314643859863, "learning_rate": 5.687297060183821e-05, "loss": 0.9021, "step": 142840 }, { "epoch": 0.9126279340173518, "grad_norm": 0.9981955289840698, "learning_rate": 5.6868000513106435e-05, "loss": 0.9884, "step": 142850 }, { "epoch": 0.9126918211670905, "grad_norm": 1.4859464168548584, "learning_rate": 5.6863030355207945e-05, "loss": 1.0916, "step": 142860 }, { "epoch": 0.9127557083168292, "grad_norm": 1.1081634759902954, "learning_rate": 5.685806012819281e-05, "loss": 0.7292, "step": 142870 }, { "epoch": 0.9128195954665679, "grad_norm": 0.9023155570030212, "learning_rate": 5.6853089832111076e-05, "loss": 0.8272, "step": 142880 }, { "epoch": 0.9128834826163066, "grad_norm": 0.712682843208313, "learning_rate": 5.6848119467012795e-05, "loss": 1.0256, "step": 142890 }, { "epoch": 0.9129473697660453, "grad_norm": 1.0017153024673462, "learning_rate": 5.684314903294803e-05, "loss": 0.9368, "step": 142900 }, { "epoch": 0.913011256915784, "grad_norm": 1.385031819343567, "learning_rate": 5.6838178529966825e-05, "loss": 0.8932, "step": 142910 }, { "epoch": 0.9130751440655227, "grad_norm": 0.9716565608978271, "learning_rate": 5.683320795811925e-05, "loss": 1.127, "step": 142920 }, { "epoch": 0.9131390312152614, "grad_norm": 2.391300916671753, "learning_rate": 5.6828237317455365e-05, "loss": 1.1424, "step": 142930 }, { "epoch": 0.9132029183650001, "grad_norm": 2.1368186473846436, "learning_rate": 5.682326660802523e-05, "loss": 1.1169, "step": 142940 }, { "epoch": 0.9132668055147387, "grad_norm": 1.1230798959732056, "learning_rate": 5.6818295829878874e-05, "loss": 0.791, "step": 142950 }, { "epoch": 0.9133306926644774, "grad_norm": 0.8756389617919922, "learning_rate": 5.6813324983066404e-05, "loss": 0.7168, "step": 142960 }, { "epoch": 0.9133945798142161, "grad_norm": 4.043021202087402, "learning_rate": 5.680835406763785e-05, "loss": 0.8593, "step": 142970 }, { "epoch": 0.9134584669639548, "grad_norm": 0.9189384579658508, "learning_rate": 5.680338308364328e-05, "loss": 0.7222, "step": 142980 }, { "epoch": 0.9135223541136935, "grad_norm": 0.9267463088035583, "learning_rate": 5.679841203113275e-05, "loss": 0.9252, "step": 142990 }, { "epoch": 0.9135862412634322, "grad_norm": 0.9476075768470764, "learning_rate": 5.6793440910156336e-05, "loss": 0.8154, "step": 143000 }, { "epoch": 0.913650128413171, "grad_norm": 0.7349272966384888, "learning_rate": 5.67884697207641e-05, "loss": 1.0062, "step": 143010 }, { "epoch": 0.9137140155629097, "grad_norm": 0.6401690244674683, "learning_rate": 5.67834984630061e-05, "loss": 0.9801, "step": 143020 }, { "epoch": 0.9137779027126484, "grad_norm": 0.9111157655715942, "learning_rate": 5.677852713693239e-05, "loss": 0.8259, "step": 143030 }, { "epoch": 0.9138417898623871, "grad_norm": 0.5553478598594666, "learning_rate": 5.6773555742593065e-05, "loss": 0.8509, "step": 143040 }, { "epoch": 0.9139056770121258, "grad_norm": 0.6528088450431824, "learning_rate": 5.676858428003815e-05, "loss": 0.6361, "step": 143050 }, { "epoch": 0.9139695641618645, "grad_norm": 0.7725486755371094, "learning_rate": 5.676361274931775e-05, "loss": 0.9128, "step": 143060 }, { "epoch": 0.9140334513116032, "grad_norm": 0.8535653948783875, "learning_rate": 5.67586411504819e-05, "loss": 0.9194, "step": 143070 }, { "epoch": 0.9140973384613419, "grad_norm": 0.4683364927768707, "learning_rate": 5.675366948358072e-05, "loss": 0.8434, "step": 143080 }, { "epoch": 0.9141612256110806, "grad_norm": 1.8194103240966797, "learning_rate": 5.6748697748664225e-05, "loss": 1.1056, "step": 143090 }, { "epoch": 0.9142251127608193, "grad_norm": 0.8043122887611389, "learning_rate": 5.674372594578251e-05, "loss": 0.7171, "step": 143100 }, { "epoch": 0.914288999910558, "grad_norm": 0.7784401774406433, "learning_rate": 5.673875407498563e-05, "loss": 0.8041, "step": 143110 }, { "epoch": 0.9143528870602967, "grad_norm": 1.7115349769592285, "learning_rate": 5.673378213632368e-05, "loss": 0.863, "step": 143120 }, { "epoch": 0.9144167742100354, "grad_norm": 0.8383505940437317, "learning_rate": 5.672881012984672e-05, "loss": 0.8503, "step": 143130 }, { "epoch": 0.9144806613597741, "grad_norm": 0.8597167134284973, "learning_rate": 5.672383805560482e-05, "loss": 0.9283, "step": 143140 }, { "epoch": 0.9145445485095128, "grad_norm": 1.1670582294464111, "learning_rate": 5.6718865913648044e-05, "loss": 0.9148, "step": 143150 }, { "epoch": 0.9146084356592515, "grad_norm": 0.7308951616287231, "learning_rate": 5.671389370402648e-05, "loss": 0.8466, "step": 143160 }, { "epoch": 0.9146723228089902, "grad_norm": 0.7195169925689697, "learning_rate": 5.6708921426790194e-05, "loss": 0.9054, "step": 143170 }, { "epoch": 0.914736209958729, "grad_norm": 0.6677785515785217, "learning_rate": 5.670394908198927e-05, "loss": 0.7878, "step": 143180 }, { "epoch": 0.9148000971084677, "grad_norm": 1.0069258213043213, "learning_rate": 5.669897666967378e-05, "loss": 0.8408, "step": 143190 }, { "epoch": 0.9148639842582063, "grad_norm": 0.9950356483459473, "learning_rate": 5.66940041898938e-05, "loss": 0.9091, "step": 143200 }, { "epoch": 0.914927871407945, "grad_norm": 0.8431347012519836, "learning_rate": 5.6689031642699405e-05, "loss": 0.7076, "step": 143210 }, { "epoch": 0.9149917585576837, "grad_norm": 0.8251869678497314, "learning_rate": 5.668405902814067e-05, "loss": 0.9349, "step": 143220 }, { "epoch": 0.9150556457074224, "grad_norm": 1.2934318780899048, "learning_rate": 5.6679086346267685e-05, "loss": 0.913, "step": 143230 }, { "epoch": 0.9151195328571611, "grad_norm": 0.7144001722335815, "learning_rate": 5.6674113597130515e-05, "loss": 0.9816, "step": 143240 }, { "epoch": 0.9151834200068998, "grad_norm": 0.8660211563110352, "learning_rate": 5.666914078077926e-05, "loss": 0.9166, "step": 143250 }, { "epoch": 0.9152473071566385, "grad_norm": 0.8381525874137878, "learning_rate": 5.6664167897263975e-05, "loss": 0.8673, "step": 143260 }, { "epoch": 0.9153111943063772, "grad_norm": 1.0956501960754395, "learning_rate": 5.6659194946634764e-05, "loss": 0.8773, "step": 143270 }, { "epoch": 0.9153750814561159, "grad_norm": 0.6203900575637817, "learning_rate": 5.6654221928941685e-05, "loss": 0.9247, "step": 143280 }, { "epoch": 0.9154389686058546, "grad_norm": 0.8943018317222595, "learning_rate": 5.664924884423485e-05, "loss": 0.8628, "step": 143290 }, { "epoch": 0.9155028557555933, "grad_norm": 1.1351211071014404, "learning_rate": 5.664427569256432e-05, "loss": 0.7142, "step": 143300 }, { "epoch": 0.915566742905332, "grad_norm": 0.7743847370147705, "learning_rate": 5.663930247398018e-05, "loss": 0.9053, "step": 143310 }, { "epoch": 0.9156306300550707, "grad_norm": 0.7925605177879333, "learning_rate": 5.663432918853253e-05, "loss": 0.7516, "step": 143320 }, { "epoch": 0.9156945172048094, "grad_norm": 0.9708541631698608, "learning_rate": 5.6629355836271435e-05, "loss": 1.1418, "step": 143330 }, { "epoch": 0.9157584043545481, "grad_norm": 1.0878440141677856, "learning_rate": 5.6624382417247004e-05, "loss": 0.8489, "step": 143340 }, { "epoch": 0.9158222915042868, "grad_norm": 0.7541216015815735, "learning_rate": 5.66194089315093e-05, "loss": 1.2666, "step": 143350 }, { "epoch": 0.9158861786540256, "grad_norm": 0.780955970287323, "learning_rate": 5.6614435379108434e-05, "loss": 0.6196, "step": 143360 }, { "epoch": 0.9159500658037643, "grad_norm": 0.8869837522506714, "learning_rate": 5.6609461760094476e-05, "loss": 0.7971, "step": 143370 }, { "epoch": 0.916013952953503, "grad_norm": 0.9061605930328369, "learning_rate": 5.660448807451752e-05, "loss": 1.1207, "step": 143380 }, { "epoch": 0.9160778401032417, "grad_norm": 0.8924505710601807, "learning_rate": 5.659951432242765e-05, "loss": 0.8292, "step": 143390 }, { "epoch": 0.9161417272529804, "grad_norm": 1.1079713106155396, "learning_rate": 5.659454050387496e-05, "loss": 0.7443, "step": 143400 }, { "epoch": 0.9162056144027191, "grad_norm": 1.7120331525802612, "learning_rate": 5.658956661890955e-05, "loss": 0.9726, "step": 143410 }, { "epoch": 0.9162695015524578, "grad_norm": 0.6569525003433228, "learning_rate": 5.6584592667581494e-05, "loss": 1.1294, "step": 143420 }, { "epoch": 0.9163333887021965, "grad_norm": 1.015156626701355, "learning_rate": 5.65796186499409e-05, "loss": 0.9498, "step": 143430 }, { "epoch": 0.9163972758519351, "grad_norm": 0.7955893874168396, "learning_rate": 5.657464456603785e-05, "loss": 0.7562, "step": 143440 }, { "epoch": 0.9164611630016738, "grad_norm": 0.87772136926651, "learning_rate": 5.6569670415922436e-05, "loss": 0.8287, "step": 143450 }, { "epoch": 0.9165250501514125, "grad_norm": 0.7367571592330933, "learning_rate": 5.656469619964477e-05, "loss": 0.9409, "step": 143460 }, { "epoch": 0.9165889373011512, "grad_norm": 0.940991997718811, "learning_rate": 5.6559721917254924e-05, "loss": 1.0162, "step": 143470 }, { "epoch": 0.9166528244508899, "grad_norm": 0.8817263841629028, "learning_rate": 5.655474756880301e-05, "loss": 0.8085, "step": 143480 }, { "epoch": 0.9167167116006286, "grad_norm": 0.982813835144043, "learning_rate": 5.654977315433914e-05, "loss": 0.9593, "step": 143490 }, { "epoch": 0.9167805987503673, "grad_norm": 1.1913138628005981, "learning_rate": 5.6544798673913354e-05, "loss": 0.8467, "step": 143500 }, { "epoch": 0.916844485900106, "grad_norm": 0.7284004092216492, "learning_rate": 5.653982412757579e-05, "loss": 0.8006, "step": 143510 }, { "epoch": 0.9169083730498447, "grad_norm": 1.0794998407363892, "learning_rate": 5.653484951537655e-05, "loss": 0.794, "step": 143520 }, { "epoch": 0.9169722601995834, "grad_norm": 1.2948535680770874, "learning_rate": 5.652987483736572e-05, "loss": 0.7273, "step": 143530 }, { "epoch": 0.9170361473493222, "grad_norm": 0.7950011491775513, "learning_rate": 5.652490009359339e-05, "loss": 0.9643, "step": 143540 }, { "epoch": 0.9171000344990609, "grad_norm": 0.5536199808120728, "learning_rate": 5.651992528410967e-05, "loss": 0.8933, "step": 143550 }, { "epoch": 0.9171639216487996, "grad_norm": 0.880750298500061, "learning_rate": 5.6514950408964685e-05, "loss": 1.0077, "step": 143560 }, { "epoch": 0.9172278087985383, "grad_norm": 1.2838913202285767, "learning_rate": 5.6509975468208484e-05, "loss": 0.7484, "step": 143570 }, { "epoch": 0.917291695948277, "grad_norm": 0.8207671046257019, "learning_rate": 5.650500046189122e-05, "loss": 0.8739, "step": 143580 }, { "epoch": 0.9173555830980157, "grad_norm": 0.9268253445625305, "learning_rate": 5.650002539006296e-05, "loss": 1.0368, "step": 143590 }, { "epoch": 0.9174194702477544, "grad_norm": 0.7340649962425232, "learning_rate": 5.649505025277382e-05, "loss": 0.9109, "step": 143600 }, { "epoch": 0.9174833573974931, "grad_norm": 1.0350323915481567, "learning_rate": 5.649007505007391e-05, "loss": 0.9756, "step": 143610 }, { "epoch": 0.9175472445472318, "grad_norm": 0.784234344959259, "learning_rate": 5.6485099782013326e-05, "loss": 0.7373, "step": 143620 }, { "epoch": 0.9176111316969705, "grad_norm": 0.8511738181114197, "learning_rate": 5.648012444864219e-05, "loss": 0.7554, "step": 143630 }, { "epoch": 0.9176750188467092, "grad_norm": 0.9637295007705688, "learning_rate": 5.647514905001059e-05, "loss": 0.6863, "step": 143640 }, { "epoch": 0.9177389059964479, "grad_norm": 0.6820176243782043, "learning_rate": 5.6470173586168625e-05, "loss": 0.974, "step": 143650 }, { "epoch": 0.9178027931461866, "grad_norm": 0.7366352081298828, "learning_rate": 5.646519805716643e-05, "loss": 0.6999, "step": 143660 }, { "epoch": 0.9178666802959253, "grad_norm": 0.7290107607841492, "learning_rate": 5.646022246305409e-05, "loss": 1.0206, "step": 143670 }, { "epoch": 0.9179305674456639, "grad_norm": 1.0230958461761475, "learning_rate": 5.645524680388172e-05, "loss": 0.8786, "step": 143680 }, { "epoch": 0.9179944545954026, "grad_norm": 1.1091904640197754, "learning_rate": 5.645027107969942e-05, "loss": 0.8807, "step": 143690 }, { "epoch": 0.9180583417451413, "grad_norm": 1.9306013584136963, "learning_rate": 5.644529529055733e-05, "loss": 1.2533, "step": 143700 }, { "epoch": 0.91812222889488, "grad_norm": 1.0572997331619263, "learning_rate": 5.644031943650553e-05, "loss": 0.7035, "step": 143710 }, { "epoch": 0.9181861160446187, "grad_norm": 0.9027218818664551, "learning_rate": 5.643534351759414e-05, "loss": 0.8836, "step": 143720 }, { "epoch": 0.9182500031943575, "grad_norm": 0.7371615767478943, "learning_rate": 5.643036753387328e-05, "loss": 0.6614, "step": 143730 }, { "epoch": 0.9183138903440962, "grad_norm": 1.505537986755371, "learning_rate": 5.642539148539306e-05, "loss": 0.8683, "step": 143740 }, { "epoch": 0.9183777774938349, "grad_norm": 0.8361949920654297, "learning_rate": 5.64204153722036e-05, "loss": 0.9926, "step": 143750 }, { "epoch": 0.9184416646435736, "grad_norm": 1.110982894897461, "learning_rate": 5.641543919435496e-05, "loss": 0.8015, "step": 143760 }, { "epoch": 0.9185055517933123, "grad_norm": 1.32961106300354, "learning_rate": 5.641046295189733e-05, "loss": 0.8, "step": 143770 }, { "epoch": 0.918569438943051, "grad_norm": 0.8517293334007263, "learning_rate": 5.640548664488078e-05, "loss": 0.6818, "step": 143780 }, { "epoch": 0.9186333260927897, "grad_norm": 0.8187686204910278, "learning_rate": 5.6400510273355446e-05, "loss": 0.793, "step": 143790 }, { "epoch": 0.9186972132425284, "grad_norm": 2.4176223278045654, "learning_rate": 5.639553383737143e-05, "loss": 0.9948, "step": 143800 }, { "epoch": 0.9187611003922671, "grad_norm": 0.7014775276184082, "learning_rate": 5.6390557336978855e-05, "loss": 1.1245, "step": 143810 }, { "epoch": 0.9188249875420058, "grad_norm": 1.063392996788025, "learning_rate": 5.638558077222784e-05, "loss": 1.0894, "step": 143820 }, { "epoch": 0.9188888746917445, "grad_norm": 0.49587613344192505, "learning_rate": 5.63806041431685e-05, "loss": 0.6732, "step": 143830 }, { "epoch": 0.9189527618414832, "grad_norm": 0.848645031452179, "learning_rate": 5.637562744985097e-05, "loss": 0.9066, "step": 143840 }, { "epoch": 0.9190166489912219, "grad_norm": 0.8816063404083252, "learning_rate": 5.637065069232534e-05, "loss": 0.9941, "step": 143850 }, { "epoch": 0.9190805361409606, "grad_norm": 1.5030635595321655, "learning_rate": 5.6365673870641755e-05, "loss": 0.7683, "step": 143860 }, { "epoch": 0.9191444232906993, "grad_norm": 1.574723482131958, "learning_rate": 5.6360696984850324e-05, "loss": 1.0703, "step": 143870 }, { "epoch": 0.919208310440438, "grad_norm": 1.3411686420440674, "learning_rate": 5.635572003500117e-05, "loss": 0.9338, "step": 143880 }, { "epoch": 0.9192721975901768, "grad_norm": 1.3527319431304932, "learning_rate": 5.6350743021144416e-05, "loss": 1.2366, "step": 143890 }, { "epoch": 0.9193360847399155, "grad_norm": 0.8805691599845886, "learning_rate": 5.634576594333019e-05, "loss": 0.8299, "step": 143900 }, { "epoch": 0.9193999718896542, "grad_norm": 0.6124188899993896, "learning_rate": 5.634078880160861e-05, "loss": 0.8055, "step": 143910 }, { "epoch": 0.9194638590393928, "grad_norm": 1.0732533931732178, "learning_rate": 5.63358115960298e-05, "loss": 1.1149, "step": 143920 }, { "epoch": 0.9195277461891315, "grad_norm": 0.5931747555732727, "learning_rate": 5.633083432664389e-05, "loss": 1.1673, "step": 143930 }, { "epoch": 0.9195916333388702, "grad_norm": 1.0256335735321045, "learning_rate": 5.632585699350099e-05, "loss": 0.8732, "step": 143940 }, { "epoch": 0.9196555204886089, "grad_norm": 0.9125732779502869, "learning_rate": 5.632087959665124e-05, "loss": 0.756, "step": 143950 }, { "epoch": 0.9197194076383476, "grad_norm": 1.4520151615142822, "learning_rate": 5.6315902136144784e-05, "loss": 0.906, "step": 143960 }, { "epoch": 0.9197832947880863, "grad_norm": 0.8130024671554565, "learning_rate": 5.63109246120317e-05, "loss": 0.7474, "step": 143970 }, { "epoch": 0.919847181937825, "grad_norm": 0.40209460258483887, "learning_rate": 5.630594702436217e-05, "loss": 0.9289, "step": 143980 }, { "epoch": 0.9199110690875637, "grad_norm": 0.8355494737625122, "learning_rate": 5.630096937318629e-05, "loss": 0.8839, "step": 143990 }, { "epoch": 0.9199749562373024, "grad_norm": 1.9247257709503174, "learning_rate": 5.629599165855419e-05, "loss": 0.9603, "step": 144000 }, { "epoch": 0.9200388433870411, "grad_norm": 0.7350730299949646, "learning_rate": 5.629101388051602e-05, "loss": 1.0115, "step": 144010 }, { "epoch": 0.9201027305367798, "grad_norm": 0.9462405443191528, "learning_rate": 5.628603603912188e-05, "loss": 1.123, "step": 144020 }, { "epoch": 0.9201666176865185, "grad_norm": 0.7234540581703186, "learning_rate": 5.628105813442194e-05, "loss": 0.8485, "step": 144030 }, { "epoch": 0.9202305048362572, "grad_norm": 1.3080463409423828, "learning_rate": 5.6276080166466294e-05, "loss": 1.0137, "step": 144040 }, { "epoch": 0.9202943919859959, "grad_norm": 1.0539608001708984, "learning_rate": 5.62711021353051e-05, "loss": 0.6729, "step": 144050 }, { "epoch": 0.9203582791357346, "grad_norm": 0.9095763564109802, "learning_rate": 5.626612404098848e-05, "loss": 0.6638, "step": 144060 }, { "epoch": 0.9204221662854734, "grad_norm": 1.001478672027588, "learning_rate": 5.626114588356657e-05, "loss": 0.9633, "step": 144070 }, { "epoch": 0.9204860534352121, "grad_norm": 0.8986124396324158, "learning_rate": 5.62561676630895e-05, "loss": 0.8985, "step": 144080 }, { "epoch": 0.9205499405849508, "grad_norm": 0.4983496367931366, "learning_rate": 5.6251189379607415e-05, "loss": 0.8668, "step": 144090 }, { "epoch": 0.9206138277346895, "grad_norm": 1.6429502964019775, "learning_rate": 5.6246211033170434e-05, "loss": 0.9987, "step": 144100 }, { "epoch": 0.9206777148844282, "grad_norm": 1.0039671659469604, "learning_rate": 5.624123262382872e-05, "loss": 0.8202, "step": 144110 }, { "epoch": 0.9207416020341669, "grad_norm": 1.0661791563034058, "learning_rate": 5.6236254151632385e-05, "loss": 0.7685, "step": 144120 }, { "epoch": 0.9208054891839056, "grad_norm": 0.973341166973114, "learning_rate": 5.623177347295643e-05, "loss": 0.8032, "step": 144130 }, { "epoch": 0.9208693763336443, "grad_norm": 1.365503191947937, "learning_rate": 5.6226794881474464e-05, "loss": 0.7741, "step": 144140 }, { "epoch": 0.920933263483383, "grad_norm": 0.7111403942108154, "learning_rate": 5.622181622728329e-05, "loss": 0.7476, "step": 144150 }, { "epoch": 0.9209971506331217, "grad_norm": 0.641816258430481, "learning_rate": 5.621683751043304e-05, "loss": 0.9546, "step": 144160 }, { "epoch": 0.9210610377828603, "grad_norm": 0.6329175233840942, "learning_rate": 5.6211858730973856e-05, "loss": 0.9538, "step": 144170 }, { "epoch": 0.921124924932599, "grad_norm": 1.5208319425582886, "learning_rate": 5.620687988895589e-05, "loss": 0.8583, "step": 144180 }, { "epoch": 0.9211888120823377, "grad_norm": 1.1061300039291382, "learning_rate": 5.6201900984429255e-05, "loss": 0.8138, "step": 144190 }, { "epoch": 0.9212526992320764, "grad_norm": 0.8774361610412598, "learning_rate": 5.619692201744413e-05, "loss": 0.7494, "step": 144200 }, { "epoch": 0.9213165863818151, "grad_norm": 1.0457643270492554, "learning_rate": 5.6191942988050626e-05, "loss": 0.8806, "step": 144210 }, { "epoch": 0.9213804735315538, "grad_norm": 0.7023540139198303, "learning_rate": 5.618696389629892e-05, "loss": 0.739, "step": 144220 }, { "epoch": 0.9214443606812925, "grad_norm": 0.671416699886322, "learning_rate": 5.6181984742239117e-05, "loss": 0.6146, "step": 144230 }, { "epoch": 0.9215082478310312, "grad_norm": 0.7087250351905823, "learning_rate": 5.6177005525921376e-05, "loss": 1.0222, "step": 144240 }, { "epoch": 0.92157213498077, "grad_norm": 1.0113321542739868, "learning_rate": 5.617202624739585e-05, "loss": 0.7315, "step": 144250 }, { "epoch": 0.9216360221305087, "grad_norm": 1.1815264225006104, "learning_rate": 5.616704690671267e-05, "loss": 0.8089, "step": 144260 }, { "epoch": 0.9216999092802474, "grad_norm": 0.7599946856498718, "learning_rate": 5.616206750392201e-05, "loss": 0.7554, "step": 144270 }, { "epoch": 0.9217637964299861, "grad_norm": 1.0004082918167114, "learning_rate": 5.6157088039074e-05, "loss": 0.8352, "step": 144280 }, { "epoch": 0.9218276835797248, "grad_norm": 1.208046317100525, "learning_rate": 5.615210851221878e-05, "loss": 0.9545, "step": 144290 }, { "epoch": 0.9218915707294635, "grad_norm": 1.0685365200042725, "learning_rate": 5.61471289234065e-05, "loss": 1.0576, "step": 144300 }, { "epoch": 0.9219554578792022, "grad_norm": 1.0764927864074707, "learning_rate": 5.614214927268733e-05, "loss": 0.946, "step": 144310 }, { "epoch": 0.9220193450289409, "grad_norm": 1.0587629079818726, "learning_rate": 5.613716956011139e-05, "loss": 1.0278, "step": 144320 }, { "epoch": 0.9220832321786796, "grad_norm": 1.0095889568328857, "learning_rate": 5.613218978572884e-05, "loss": 1.0609, "step": 144330 }, { "epoch": 0.9221471193284183, "grad_norm": 0.8525044322013855, "learning_rate": 5.6127209949589845e-05, "loss": 1.2406, "step": 144340 }, { "epoch": 0.922211006478157, "grad_norm": 0.7696043252944946, "learning_rate": 5.612223005174454e-05, "loss": 0.8016, "step": 144350 }, { "epoch": 0.9222748936278957, "grad_norm": 0.8853589296340942, "learning_rate": 5.6117250092243076e-05, "loss": 0.929, "step": 144360 }, { "epoch": 0.9223387807776344, "grad_norm": 1.2267171144485474, "learning_rate": 5.611227007113563e-05, "loss": 0.7504, "step": 144370 }, { "epoch": 0.9224026679273731, "grad_norm": 0.9859304428100586, "learning_rate": 5.6107289988472325e-05, "loss": 0.9157, "step": 144380 }, { "epoch": 0.9224665550771118, "grad_norm": 0.824668288230896, "learning_rate": 5.6102309844303324e-05, "loss": 1.1008, "step": 144390 }, { "epoch": 0.9225304422268505, "grad_norm": 0.9470674991607666, "learning_rate": 5.609732963867879e-05, "loss": 0.852, "step": 144400 }, { "epoch": 0.9225943293765891, "grad_norm": 0.7566165924072266, "learning_rate": 5.609234937164886e-05, "loss": 0.8084, "step": 144410 }, { "epoch": 0.9226582165263278, "grad_norm": 0.963897168636322, "learning_rate": 5.60873690432637e-05, "loss": 1.0114, "step": 144420 }, { "epoch": 0.9227221036760666, "grad_norm": 0.7856702208518982, "learning_rate": 5.608238865357348e-05, "loss": 0.9301, "step": 144430 }, { "epoch": 0.9227859908258053, "grad_norm": 1.138763666152954, "learning_rate": 5.6077408202628334e-05, "loss": 0.7774, "step": 144440 }, { "epoch": 0.922849877975544, "grad_norm": 1.0540574789047241, "learning_rate": 5.607242769047843e-05, "loss": 1.1135, "step": 144450 }, { "epoch": 0.9229137651252827, "grad_norm": 0.8265545964241028, "learning_rate": 5.606744711717393e-05, "loss": 0.8917, "step": 144460 }, { "epoch": 0.9229776522750214, "grad_norm": 0.5889626741409302, "learning_rate": 5.6062466482765e-05, "loss": 0.8798, "step": 144470 }, { "epoch": 0.9230415394247601, "grad_norm": 0.7544617652893066, "learning_rate": 5.6057485787301765e-05, "loss": 0.9106, "step": 144480 }, { "epoch": 0.9231054265744988, "grad_norm": 0.7426086664199829, "learning_rate": 5.6052505030834425e-05, "loss": 0.8295, "step": 144490 }, { "epoch": 0.9231693137242375, "grad_norm": 0.8017638921737671, "learning_rate": 5.6047524213413116e-05, "loss": 0.7987, "step": 144500 }, { "epoch": 0.9232332008739762, "grad_norm": 1.0374419689178467, "learning_rate": 5.604254333508802e-05, "loss": 0.885, "step": 144510 }, { "epoch": 0.9232970880237149, "grad_norm": 1.0828471183776855, "learning_rate": 5.603756239590926e-05, "loss": 0.8636, "step": 144520 }, { "epoch": 0.9233609751734536, "grad_norm": 0.7223926186561584, "learning_rate": 5.603258139592704e-05, "loss": 0.8139, "step": 144530 }, { "epoch": 0.9234248623231923, "grad_norm": 0.8874870538711548, "learning_rate": 5.60276003351915e-05, "loss": 0.7938, "step": 144540 }, { "epoch": 0.923488749472931, "grad_norm": 0.7778692841529846, "learning_rate": 5.6022619213752816e-05, "loss": 0.7878, "step": 144550 }, { "epoch": 0.9235526366226697, "grad_norm": 0.779015302658081, "learning_rate": 5.6017638031661144e-05, "loss": 1.0048, "step": 144560 }, { "epoch": 0.9236165237724084, "grad_norm": 0.6786310076713562, "learning_rate": 5.6012656788966656e-05, "loss": 0.7291, "step": 144570 }, { "epoch": 0.9236804109221471, "grad_norm": 0.8063673377037048, "learning_rate": 5.6007675485719504e-05, "loss": 1.0574, "step": 144580 }, { "epoch": 0.9237442980718858, "grad_norm": 1.407467007637024, "learning_rate": 5.600269412196986e-05, "loss": 0.9079, "step": 144590 }, { "epoch": 0.9238081852216246, "grad_norm": 0.8846006393432617, "learning_rate": 5.59977126977679e-05, "loss": 0.9646, "step": 144600 }, { "epoch": 0.9238720723713633, "grad_norm": 0.824082612991333, "learning_rate": 5.5992731213163785e-05, "loss": 0.9403, "step": 144610 }, { "epoch": 0.923935959521102, "grad_norm": 0.7869988083839417, "learning_rate": 5.598774966820768e-05, "loss": 0.9048, "step": 144620 }, { "epoch": 0.9239998466708407, "grad_norm": 1.9946730136871338, "learning_rate": 5.5982768062949755e-05, "loss": 1.2423, "step": 144630 }, { "epoch": 0.9240637338205794, "grad_norm": 0.8373878598213196, "learning_rate": 5.597778639744018e-05, "loss": 1.0149, "step": 144640 }, { "epoch": 0.924127620970318, "grad_norm": 0.6496232151985168, "learning_rate": 5.5972804671729116e-05, "loss": 0.8685, "step": 144650 }, { "epoch": 0.9241915081200567, "grad_norm": 0.8842912912368774, "learning_rate": 5.596782288586676e-05, "loss": 0.9352, "step": 144660 }, { "epoch": 0.9242553952697954, "grad_norm": 0.9695751667022705, "learning_rate": 5.596284103990326e-05, "loss": 1.1568, "step": 144670 }, { "epoch": 0.9243192824195341, "grad_norm": 1.1602424383163452, "learning_rate": 5.595785913388878e-05, "loss": 0.8338, "step": 144680 }, { "epoch": 0.9243831695692728, "grad_norm": 0.8936864733695984, "learning_rate": 5.595287716787351e-05, "loss": 0.915, "step": 144690 }, { "epoch": 0.9244470567190115, "grad_norm": 1.0186363458633423, "learning_rate": 5.5947895141907624e-05, "loss": 0.9493, "step": 144700 }, { "epoch": 0.9245109438687502, "grad_norm": 0.8145350217819214, "learning_rate": 5.594291305604128e-05, "loss": 0.8706, "step": 144710 }, { "epoch": 0.9245748310184889, "grad_norm": 1.0934886932373047, "learning_rate": 5.5937930910324666e-05, "loss": 0.8845, "step": 144720 }, { "epoch": 0.9246387181682276, "grad_norm": 1.0088940858840942, "learning_rate": 5.593294870480794e-05, "loss": 1.0438, "step": 144730 }, { "epoch": 0.9247026053179663, "grad_norm": 0.632199764251709, "learning_rate": 5.5927966439541304e-05, "loss": 0.7483, "step": 144740 }, { "epoch": 0.924766492467705, "grad_norm": 1.0264745950698853, "learning_rate": 5.5922984114574904e-05, "loss": 1.069, "step": 144750 }, { "epoch": 0.9248303796174437, "grad_norm": 0.7027975916862488, "learning_rate": 5.591800172995894e-05, "loss": 1.0355, "step": 144760 }, { "epoch": 0.9248942667671824, "grad_norm": 1.1865230798721313, "learning_rate": 5.591301928574355e-05, "loss": 0.7578, "step": 144770 }, { "epoch": 0.9249581539169212, "grad_norm": 0.8094105124473572, "learning_rate": 5.5908036781978966e-05, "loss": 0.9229, "step": 144780 }, { "epoch": 0.9250220410666599, "grad_norm": 1.438416600227356, "learning_rate": 5.590305421871534e-05, "loss": 1.0329, "step": 144790 }, { "epoch": 0.9250859282163986, "grad_norm": 1.4989200830459595, "learning_rate": 5.5898071596002855e-05, "loss": 1.2769, "step": 144800 }, { "epoch": 0.9251498153661373, "grad_norm": 1.0811078548431396, "learning_rate": 5.589308891389168e-05, "loss": 0.896, "step": 144810 }, { "epoch": 0.925213702515876, "grad_norm": 0.8900967836380005, "learning_rate": 5.5888106172431995e-05, "loss": 0.8607, "step": 144820 }, { "epoch": 0.9252775896656147, "grad_norm": 0.7876869440078735, "learning_rate": 5.5883123371673995e-05, "loss": 0.9373, "step": 144830 }, { "epoch": 0.9253414768153534, "grad_norm": 1.5476784706115723, "learning_rate": 5.5878140511667855e-05, "loss": 0.9396, "step": 144840 }, { "epoch": 0.9254053639650921, "grad_norm": 0.9162848591804504, "learning_rate": 5.587315759246376e-05, "loss": 0.9302, "step": 144850 }, { "epoch": 0.9254692511148308, "grad_norm": 1.4244049787521362, "learning_rate": 5.586817461411188e-05, "loss": 0.8326, "step": 144860 }, { "epoch": 0.9255331382645695, "grad_norm": 1.2231682538986206, "learning_rate": 5.586319157666241e-05, "loss": 0.6845, "step": 144870 }, { "epoch": 0.9255970254143082, "grad_norm": 0.9394405484199524, "learning_rate": 5.585820848016552e-05, "loss": 0.6608, "step": 144880 }, { "epoch": 0.9256609125640469, "grad_norm": 3.920628786087036, "learning_rate": 5.585322532467141e-05, "loss": 0.9313, "step": 144890 }, { "epoch": 0.9257247997137855, "grad_norm": 0.7618585824966431, "learning_rate": 5.5848242110230245e-05, "loss": 0.8513, "step": 144900 }, { "epoch": 0.9257886868635242, "grad_norm": 1.0813332796096802, "learning_rate": 5.5843258836892234e-05, "loss": 0.7353, "step": 144910 }, { "epoch": 0.9258525740132629, "grad_norm": 1.2577931880950928, "learning_rate": 5.583827550470755e-05, "loss": 0.6659, "step": 144920 }, { "epoch": 0.9259164611630016, "grad_norm": 1.2664730548858643, "learning_rate": 5.583329211372637e-05, "loss": 0.7771, "step": 144930 }, { "epoch": 0.9259803483127403, "grad_norm": 0.9695154428482056, "learning_rate": 5.582830866399888e-05, "loss": 1.2447, "step": 144940 }, { "epoch": 0.926044235462479, "grad_norm": 1.5105254650115967, "learning_rate": 5.5823325155575314e-05, "loss": 0.7378, "step": 144950 }, { "epoch": 0.9261081226122178, "grad_norm": 0.6907379627227783, "learning_rate": 5.5818341588505806e-05, "loss": 1.249, "step": 144960 }, { "epoch": 0.9261720097619565, "grad_norm": 0.8063596487045288, "learning_rate": 5.581335796284057e-05, "loss": 0.8475, "step": 144970 }, { "epoch": 0.9262358969116952, "grad_norm": 1.5274707078933716, "learning_rate": 5.5808374278629795e-05, "loss": 1.2363, "step": 144980 }, { "epoch": 0.9262997840614339, "grad_norm": 0.9278043508529663, "learning_rate": 5.580339053592366e-05, "loss": 0.9631, "step": 144990 }, { "epoch": 0.9263636712111726, "grad_norm": 0.9493452310562134, "learning_rate": 5.579840673477236e-05, "loss": 0.8491, "step": 145000 }, { "epoch": 0.9264275583609113, "grad_norm": 0.5410763621330261, "learning_rate": 5.579342287522609e-05, "loss": 0.9043, "step": 145010 }, { "epoch": 0.92649144551065, "grad_norm": 0.8283954858779907, "learning_rate": 5.578843895733504e-05, "loss": 0.8748, "step": 145020 }, { "epoch": 0.9265553326603887, "grad_norm": 0.9432761073112488, "learning_rate": 5.57834549811494e-05, "loss": 1.3108, "step": 145030 }, { "epoch": 0.9266192198101274, "grad_norm": 0.6497068405151367, "learning_rate": 5.5778470946719366e-05, "loss": 0.8953, "step": 145040 }, { "epoch": 0.9266831069598661, "grad_norm": 0.8885396122932434, "learning_rate": 5.5773486854095134e-05, "loss": 0.7763, "step": 145050 }, { "epoch": 0.9267469941096048, "grad_norm": 0.7947267293930054, "learning_rate": 5.576850270332689e-05, "loss": 0.6814, "step": 145060 }, { "epoch": 0.9268108812593435, "grad_norm": 0.9257674813270569, "learning_rate": 5.576351849446484e-05, "loss": 0.8433, "step": 145070 }, { "epoch": 0.9268747684090822, "grad_norm": 0.6727604866027832, "learning_rate": 5.575853422755917e-05, "loss": 0.8165, "step": 145080 }, { "epoch": 0.9269386555588209, "grad_norm": 0.8554814457893372, "learning_rate": 5.5753549902660076e-05, "loss": 0.9877, "step": 145090 }, { "epoch": 0.9270025427085596, "grad_norm": 1.5152359008789062, "learning_rate": 5.574856551981775e-05, "loss": 0.9839, "step": 145100 }, { "epoch": 0.9270664298582983, "grad_norm": 1.0759176015853882, "learning_rate": 5.5743581079082405e-05, "loss": 0.6801, "step": 145110 }, { "epoch": 0.927130317008037, "grad_norm": 1.2060096263885498, "learning_rate": 5.573859658050423e-05, "loss": 0.7048, "step": 145120 }, { "epoch": 0.9271942041577758, "grad_norm": 2.348177671432495, "learning_rate": 5.5733612024133416e-05, "loss": 1.1837, "step": 145130 }, { "epoch": 0.9272580913075144, "grad_norm": 0.8047994375228882, "learning_rate": 5.572862741002017e-05, "loss": 0.7672, "step": 145140 }, { "epoch": 0.9273219784572531, "grad_norm": 1.255176305770874, "learning_rate": 5.57236427382147e-05, "loss": 0.7974, "step": 145150 }, { "epoch": 0.9273858656069918, "grad_norm": 0.7443729639053345, "learning_rate": 5.571865800876719e-05, "loss": 1.0677, "step": 145160 }, { "epoch": 0.9274497527567305, "grad_norm": 0.9563167691230774, "learning_rate": 5.571367322172785e-05, "loss": 0.7602, "step": 145170 }, { "epoch": 0.9275136399064692, "grad_norm": 1.492719292640686, "learning_rate": 5.5708688377146866e-05, "loss": 0.8196, "step": 145180 }, { "epoch": 0.9275775270562079, "grad_norm": 1.4552744626998901, "learning_rate": 5.570370347507446e-05, "loss": 0.7228, "step": 145190 }, { "epoch": 0.9276414142059466, "grad_norm": 1.105858564376831, "learning_rate": 5.569871851556082e-05, "loss": 0.9919, "step": 145200 }, { "epoch": 0.9277053013556853, "grad_norm": 0.6973342299461365, "learning_rate": 5.5693733498656165e-05, "loss": 0.6737, "step": 145210 }, { "epoch": 0.927769188505424, "grad_norm": 0.8317189812660217, "learning_rate": 5.5688748424410675e-05, "loss": 0.7846, "step": 145220 }, { "epoch": 0.9278330756551627, "grad_norm": 1.7505478858947754, "learning_rate": 5.568376329287458e-05, "loss": 0.8123, "step": 145230 }, { "epoch": 0.9278969628049014, "grad_norm": 1.1510494947433472, "learning_rate": 5.567877810409806e-05, "loss": 0.9568, "step": 145240 }, { "epoch": 0.9279608499546401, "grad_norm": 0.599578857421875, "learning_rate": 5.567379285813135e-05, "loss": 1.1179, "step": 145250 }, { "epoch": 0.9280247371043788, "grad_norm": 0.6046319007873535, "learning_rate": 5.566880755502462e-05, "loss": 0.9872, "step": 145260 }, { "epoch": 0.9280886242541175, "grad_norm": 0.9385390281677246, "learning_rate": 5.5663822194828095e-05, "loss": 0.6484, "step": 145270 }, { "epoch": 0.9281525114038562, "grad_norm": 1.1661081314086914, "learning_rate": 5.565883677759198e-05, "loss": 0.7681, "step": 145280 }, { "epoch": 0.928216398553595, "grad_norm": 0.9337068200111389, "learning_rate": 5.565385130336649e-05, "loss": 1.2074, "step": 145290 }, { "epoch": 0.9282802857033337, "grad_norm": 0.7268047332763672, "learning_rate": 5.564886577220181e-05, "loss": 1.2321, "step": 145300 }, { "epoch": 0.9283441728530724, "grad_norm": 0.8471580743789673, "learning_rate": 5.564388018414818e-05, "loss": 0.7306, "step": 145310 }, { "epoch": 0.9284080600028111, "grad_norm": 1.361351728439331, "learning_rate": 5.563889453925579e-05, "loss": 0.8582, "step": 145320 }, { "epoch": 0.9284719471525498, "grad_norm": 0.8351898789405823, "learning_rate": 5.563390883757485e-05, "loss": 0.9428, "step": 145330 }, { "epoch": 0.9285358343022885, "grad_norm": 0.5664870142936707, "learning_rate": 5.562892307915559e-05, "loss": 0.8726, "step": 145340 }, { "epoch": 0.9285997214520272, "grad_norm": 0.9608287215232849, "learning_rate": 5.56239372640482e-05, "loss": 0.8297, "step": 145350 }, { "epoch": 0.9286636086017659, "grad_norm": 0.5610537528991699, "learning_rate": 5.5618951392302886e-05, "loss": 1.07, "step": 145360 }, { "epoch": 0.9287274957515046, "grad_norm": 1.0210973024368286, "learning_rate": 5.561396546396988e-05, "loss": 0.8373, "step": 145370 }, { "epoch": 0.9287913829012432, "grad_norm": 0.8455613255500793, "learning_rate": 5.560897947909938e-05, "loss": 0.7514, "step": 145380 }, { "epoch": 0.9288552700509819, "grad_norm": 0.7670028805732727, "learning_rate": 5.56039934377416e-05, "loss": 0.7576, "step": 145390 }, { "epoch": 0.9289191572007206, "grad_norm": 0.9094594717025757, "learning_rate": 5.559900733994676e-05, "loss": 0.9407, "step": 145400 }, { "epoch": 0.9289830443504593, "grad_norm": 0.8494235873222351, "learning_rate": 5.559402118576508e-05, "loss": 0.7876, "step": 145410 }, { "epoch": 0.929046931500198, "grad_norm": 1.263748049736023, "learning_rate": 5.558903497524676e-05, "loss": 0.9246, "step": 145420 }, { "epoch": 0.9291108186499367, "grad_norm": 0.7942246794700623, "learning_rate": 5.558404870844201e-05, "loss": 0.8941, "step": 145430 }, { "epoch": 0.9291747057996754, "grad_norm": 1.4969481229782104, "learning_rate": 5.557906238540108e-05, "loss": 1.0584, "step": 145440 }, { "epoch": 0.9292385929494141, "grad_norm": 1.607496738433838, "learning_rate": 5.557407600617416e-05, "loss": 1.0476, "step": 145450 }, { "epoch": 0.9293024800991528, "grad_norm": 0.9495954513549805, "learning_rate": 5.5569089570811464e-05, "loss": 1.1759, "step": 145460 }, { "epoch": 0.9293663672488915, "grad_norm": 1.1366685628890991, "learning_rate": 5.556410307936322e-05, "loss": 1.1819, "step": 145470 }, { "epoch": 0.9294302543986303, "grad_norm": 1.110145926475525, "learning_rate": 5.555911653187964e-05, "loss": 0.8672, "step": 145480 }, { "epoch": 0.929494141548369, "grad_norm": 0.8252993822097778, "learning_rate": 5.5554129928410957e-05, "loss": 0.7987, "step": 145490 }, { "epoch": 0.9295580286981077, "grad_norm": 1.1678286790847778, "learning_rate": 5.554914326900739e-05, "loss": 0.8637, "step": 145500 }, { "epoch": 0.9296219158478464, "grad_norm": 2.4226267337799072, "learning_rate": 5.554415655371913e-05, "loss": 1.0453, "step": 145510 }, { "epoch": 0.9296858029975851, "grad_norm": 1.283092975616455, "learning_rate": 5.553916978259642e-05, "loss": 0.7881, "step": 145520 }, { "epoch": 0.9297496901473238, "grad_norm": 1.1610386371612549, "learning_rate": 5.55341829556895e-05, "loss": 0.7663, "step": 145530 }, { "epoch": 0.9298135772970625, "grad_norm": 1.2389222383499146, "learning_rate": 5.552919607304854e-05, "loss": 0.88, "step": 145540 }, { "epoch": 0.9298774644468012, "grad_norm": 0.8321581482887268, "learning_rate": 5.552420913472381e-05, "loss": 0.7218, "step": 145550 }, { "epoch": 0.9299413515965399, "grad_norm": 1.4073259830474854, "learning_rate": 5.5519222140765514e-05, "loss": 0.9816, "step": 145560 }, { "epoch": 0.9300052387462786, "grad_norm": 0.8335140347480774, "learning_rate": 5.5514235091223877e-05, "loss": 1.096, "step": 145570 }, { "epoch": 0.9300691258960173, "grad_norm": 1.2483246326446533, "learning_rate": 5.5509247986149126e-05, "loss": 0.8831, "step": 145580 }, { "epoch": 0.930133013045756, "grad_norm": 0.7681006193161011, "learning_rate": 5.550426082559147e-05, "loss": 0.7553, "step": 145590 }, { "epoch": 0.9301969001954947, "grad_norm": 0.8035659193992615, "learning_rate": 5.5499273609601154e-05, "loss": 0.8508, "step": 145600 }, { "epoch": 0.9302607873452334, "grad_norm": 1.2194147109985352, "learning_rate": 5.5494286338228384e-05, "loss": 0.9484, "step": 145610 }, { "epoch": 0.930324674494972, "grad_norm": 1.6273916959762573, "learning_rate": 5.54892990115234e-05, "loss": 0.8937, "step": 145620 }, { "epoch": 0.9303885616447107, "grad_norm": 0.48322996497154236, "learning_rate": 5.5484311629536425e-05, "loss": 0.6711, "step": 145630 }, { "epoch": 0.9304524487944494, "grad_norm": 0.7854679822921753, "learning_rate": 5.5479324192317694e-05, "loss": 0.8923, "step": 145640 }, { "epoch": 0.9305163359441881, "grad_norm": 0.9290236830711365, "learning_rate": 5.547433669991743e-05, "loss": 0.8368, "step": 145650 }, { "epoch": 0.9305802230939269, "grad_norm": 1.1597179174423218, "learning_rate": 5.546934915238585e-05, "loss": 0.7749, "step": 145660 }, { "epoch": 0.9306441102436656, "grad_norm": 1.3749240636825562, "learning_rate": 5.54643615497732e-05, "loss": 0.9082, "step": 145670 }, { "epoch": 0.9307079973934043, "grad_norm": 0.7642197608947754, "learning_rate": 5.54593738921297e-05, "loss": 1.0149, "step": 145680 }, { "epoch": 0.930771884543143, "grad_norm": 0.974104642868042, "learning_rate": 5.545438617950558e-05, "loss": 0.635, "step": 145690 }, { "epoch": 0.9308357716928817, "grad_norm": 0.8045002818107605, "learning_rate": 5.544939841195108e-05, "loss": 0.7151, "step": 145700 }, { "epoch": 0.9308996588426204, "grad_norm": 1.036468267440796, "learning_rate": 5.544441058951641e-05, "loss": 0.9897, "step": 145710 }, { "epoch": 0.9309635459923591, "grad_norm": 0.5775062441825867, "learning_rate": 5.5439422712251835e-05, "loss": 0.7542, "step": 145720 }, { "epoch": 0.9310274331420978, "grad_norm": 0.88588947057724, "learning_rate": 5.543443478020754e-05, "loss": 0.7627, "step": 145730 }, { "epoch": 0.9310913202918365, "grad_norm": 1.2807823419570923, "learning_rate": 5.5429446793433814e-05, "loss": 1.1104, "step": 145740 }, { "epoch": 0.9311552074415752, "grad_norm": 1.0863866806030273, "learning_rate": 5.5424458751980844e-05, "loss": 0.6739, "step": 145750 }, { "epoch": 0.9312190945913139, "grad_norm": 1.238873839378357, "learning_rate": 5.5419470655898883e-05, "loss": 0.9149, "step": 145760 }, { "epoch": 0.9312829817410526, "grad_norm": 0.8662456274032593, "learning_rate": 5.541448250523817e-05, "loss": 0.8491, "step": 145770 }, { "epoch": 0.9313468688907913, "grad_norm": 1.59992253780365, "learning_rate": 5.5409494300048935e-05, "loss": 1.2597, "step": 145780 }, { "epoch": 0.93141075604053, "grad_norm": 0.7041088342666626, "learning_rate": 5.540450604038141e-05, "loss": 0.8117, "step": 145790 }, { "epoch": 0.9314746431902687, "grad_norm": 0.9120072722434998, "learning_rate": 5.539951772628583e-05, "loss": 0.8138, "step": 145800 }, { "epoch": 0.9315385303400074, "grad_norm": 0.865118682384491, "learning_rate": 5.539452935781244e-05, "loss": 0.7262, "step": 145810 }, { "epoch": 0.9316024174897461, "grad_norm": 0.5491125583648682, "learning_rate": 5.5389540935011466e-05, "loss": 0.7411, "step": 145820 }, { "epoch": 0.9316663046394849, "grad_norm": 0.8732421398162842, "learning_rate": 5.538455245793316e-05, "loss": 0.8556, "step": 145830 }, { "epoch": 0.9317301917892236, "grad_norm": 0.7927635312080383, "learning_rate": 5.5379563926627745e-05, "loss": 1.0176, "step": 145840 }, { "epoch": 0.9317940789389623, "grad_norm": 1.8838095664978027, "learning_rate": 5.5374575341145476e-05, "loss": 0.8793, "step": 145850 }, { "epoch": 0.931857966088701, "grad_norm": 0.998735785484314, "learning_rate": 5.536958670153658e-05, "loss": 1.2281, "step": 145860 }, { "epoch": 0.9319218532384396, "grad_norm": 0.7903896570205688, "learning_rate": 5.53645980078513e-05, "loss": 1.2174, "step": 145870 }, { "epoch": 0.9319857403881783, "grad_norm": 0.8727949857711792, "learning_rate": 5.535960926013987e-05, "loss": 1.0978, "step": 145880 }, { "epoch": 0.932049627537917, "grad_norm": 1.2408875226974487, "learning_rate": 5.5354620458452546e-05, "loss": 0.937, "step": 145890 }, { "epoch": 0.9321135146876557, "grad_norm": 0.8686769604682922, "learning_rate": 5.5349631602839557e-05, "loss": 0.7722, "step": 145900 }, { "epoch": 0.9321774018373944, "grad_norm": 0.914225697517395, "learning_rate": 5.534464269335116e-05, "loss": 0.7863, "step": 145910 }, { "epoch": 0.9322412889871331, "grad_norm": 1.2484056949615479, "learning_rate": 5.533965373003758e-05, "loss": 0.8891, "step": 145920 }, { "epoch": 0.9323051761368718, "grad_norm": 0.7330338358879089, "learning_rate": 5.533466471294906e-05, "loss": 0.9297, "step": 145930 }, { "epoch": 0.9323690632866105, "grad_norm": 1.5863157510757446, "learning_rate": 5.532967564213586e-05, "loss": 0.926, "step": 145940 }, { "epoch": 0.9324329504363492, "grad_norm": 1.0102980136871338, "learning_rate": 5.532468651764822e-05, "loss": 0.9029, "step": 145950 }, { "epoch": 0.9324968375860879, "grad_norm": 1.0613675117492676, "learning_rate": 5.531969733953637e-05, "loss": 0.8485, "step": 145960 }, { "epoch": 0.9325607247358266, "grad_norm": 0.8373472690582275, "learning_rate": 5.531470810785057e-05, "loss": 0.8047, "step": 145970 }, { "epoch": 0.9326246118855653, "grad_norm": 3.7152907848358154, "learning_rate": 5.5309718822641054e-05, "loss": 0.8467, "step": 145980 }, { "epoch": 0.932688499035304, "grad_norm": 1.0984721183776855, "learning_rate": 5.5304729483958073e-05, "loss": 0.6046, "step": 145990 }, { "epoch": 0.9327523861850427, "grad_norm": 0.7963919043540955, "learning_rate": 5.529974009185189e-05, "loss": 0.9892, "step": 146000 }, { "epoch": 0.9328162733347815, "grad_norm": 1.0803087949752808, "learning_rate": 5.529475064637274e-05, "loss": 0.965, "step": 146010 }, { "epoch": 0.9328801604845202, "grad_norm": 0.7040061950683594, "learning_rate": 5.528976114757086e-05, "loss": 0.9054, "step": 146020 }, { "epoch": 0.9329440476342589, "grad_norm": 1.2263482809066772, "learning_rate": 5.528477159549652e-05, "loss": 0.6155, "step": 146030 }, { "epoch": 0.9330079347839976, "grad_norm": 0.532351016998291, "learning_rate": 5.5279781990199954e-05, "loss": 0.8038, "step": 146040 }, { "epoch": 0.9330718219337363, "grad_norm": 0.8017789125442505, "learning_rate": 5.527479233173142e-05, "loss": 0.8907, "step": 146050 }, { "epoch": 0.933135709083475, "grad_norm": 3.3494420051574707, "learning_rate": 5.5269802620141155e-05, "loss": 1.1866, "step": 146060 }, { "epoch": 0.9331995962332137, "grad_norm": 0.9478211402893066, "learning_rate": 5.526481285547943e-05, "loss": 0.8094, "step": 146070 }, { "epoch": 0.9332634833829524, "grad_norm": 0.8863970637321472, "learning_rate": 5.525982303779648e-05, "loss": 0.7409, "step": 146080 }, { "epoch": 0.9333273705326911, "grad_norm": 1.6237622499465942, "learning_rate": 5.525483316714256e-05, "loss": 0.9142, "step": 146090 }, { "epoch": 0.9333912576824298, "grad_norm": 0.8216589689254761, "learning_rate": 5.524984324356792e-05, "loss": 0.7625, "step": 146100 }, { "epoch": 0.9334551448321684, "grad_norm": 2.1370699405670166, "learning_rate": 5.524485326712282e-05, "loss": 0.9642, "step": 146110 }, { "epoch": 0.9335190319819071, "grad_norm": 1.2563636302947998, "learning_rate": 5.5239863237857516e-05, "loss": 1.0334, "step": 146120 }, { "epoch": 0.9335829191316458, "grad_norm": 0.9968759417533875, "learning_rate": 5.5234873155822256e-05, "loss": 1.0448, "step": 146130 }, { "epoch": 0.9336468062813845, "grad_norm": 1.1890705823898315, "learning_rate": 5.5229883021067286e-05, "loss": 0.9123, "step": 146140 }, { "epoch": 0.9337106934311232, "grad_norm": 0.9829585552215576, "learning_rate": 5.522489283364286e-05, "loss": 0.8136, "step": 146150 }, { "epoch": 0.9337745805808619, "grad_norm": 1.1096529960632324, "learning_rate": 5.521990259359925e-05, "loss": 1.0181, "step": 146160 }, { "epoch": 0.9338384677306006, "grad_norm": 0.9074599146842957, "learning_rate": 5.521491230098671e-05, "loss": 0.8634, "step": 146170 }, { "epoch": 0.9339023548803393, "grad_norm": 0.8308917284011841, "learning_rate": 5.520992195585549e-05, "loss": 0.7422, "step": 146180 }, { "epoch": 0.933966242030078, "grad_norm": 0.6945448517799377, "learning_rate": 5.5204931558255857e-05, "loss": 0.7908, "step": 146190 }, { "epoch": 0.9340301291798168, "grad_norm": 1.085972547531128, "learning_rate": 5.519994110823805e-05, "loss": 1.0072, "step": 146200 }, { "epoch": 0.9340940163295555, "grad_norm": 1.0287305116653442, "learning_rate": 5.519495060585235e-05, "loss": 1.133, "step": 146210 }, { "epoch": 0.9341579034792942, "grad_norm": 0.679003894329071, "learning_rate": 5.5189960051148995e-05, "loss": 0.726, "step": 146220 }, { "epoch": 0.9342217906290329, "grad_norm": 1.126184105873108, "learning_rate": 5.5184969444178246e-05, "loss": 1.1845, "step": 146230 }, { "epoch": 0.9342856777787716, "grad_norm": 0.5712942481040955, "learning_rate": 5.517997878499037e-05, "loss": 0.7351, "step": 146240 }, { "epoch": 0.9343495649285103, "grad_norm": 0.9215618371963501, "learning_rate": 5.517498807363564e-05, "loss": 0.9922, "step": 146250 }, { "epoch": 0.934413452078249, "grad_norm": 1.0021543502807617, "learning_rate": 5.516999731016429e-05, "loss": 0.9386, "step": 146260 }, { "epoch": 0.9344773392279877, "grad_norm": 0.5854945778846741, "learning_rate": 5.516500649462659e-05, "loss": 0.6703, "step": 146270 }, { "epoch": 0.9345412263777264, "grad_norm": 0.7984985709190369, "learning_rate": 5.5160015627072824e-05, "loss": 0.8576, "step": 146280 }, { "epoch": 0.9346051135274651, "grad_norm": 2.2100253105163574, "learning_rate": 5.5155024707553226e-05, "loss": 1.2271, "step": 146290 }, { "epoch": 0.9346690006772038, "grad_norm": 0.6737677454948425, "learning_rate": 5.5150033736118065e-05, "loss": 0.617, "step": 146300 }, { "epoch": 0.9347328878269425, "grad_norm": 0.48655664920806885, "learning_rate": 5.514504271281762e-05, "loss": 0.6696, "step": 146310 }, { "epoch": 0.9347967749766812, "grad_norm": 2.0812628269195557, "learning_rate": 5.514005163770214e-05, "loss": 1.1076, "step": 146320 }, { "epoch": 0.9348606621264199, "grad_norm": 0.6728872060775757, "learning_rate": 5.513506051082189e-05, "loss": 0.9832, "step": 146330 }, { "epoch": 0.9349245492761586, "grad_norm": 1.2701008319854736, "learning_rate": 5.513006933222714e-05, "loss": 0.9593, "step": 146340 }, { "epoch": 0.9349884364258972, "grad_norm": 1.0993520021438599, "learning_rate": 5.5125078101968155e-05, "loss": 0.846, "step": 146350 }, { "epoch": 0.935052323575636, "grad_norm": 0.947177529335022, "learning_rate": 5.5120086820095195e-05, "loss": 0.9143, "step": 146360 }, { "epoch": 0.9351162107253747, "grad_norm": 0.9848998785018921, "learning_rate": 5.511509548665854e-05, "loss": 0.8256, "step": 146370 }, { "epoch": 0.9351800978751134, "grad_norm": 1.219247579574585, "learning_rate": 5.511010410170844e-05, "loss": 0.7707, "step": 146380 }, { "epoch": 0.9352439850248521, "grad_norm": 0.7544047832489014, "learning_rate": 5.510511266529518e-05, "loss": 0.9864, "step": 146390 }, { "epoch": 0.9353078721745908, "grad_norm": 0.7642074227333069, "learning_rate": 5.510012117746901e-05, "loss": 0.7728, "step": 146400 }, { "epoch": 0.9353717593243295, "grad_norm": 0.9915320873260498, "learning_rate": 5.509512963828021e-05, "loss": 1.2914, "step": 146410 }, { "epoch": 0.9354356464740682, "grad_norm": 0.9371116757392883, "learning_rate": 5.509013804777904e-05, "loss": 0.8414, "step": 146420 }, { "epoch": 0.9354995336238069, "grad_norm": 0.4904581904411316, "learning_rate": 5.508514640601579e-05, "loss": 0.9459, "step": 146430 }, { "epoch": 0.9355634207735456, "grad_norm": 0.9960023760795593, "learning_rate": 5.508015471304071e-05, "loss": 0.8509, "step": 146440 }, { "epoch": 0.9356273079232843, "grad_norm": 0.8758112788200378, "learning_rate": 5.507516296890407e-05, "loss": 0.8133, "step": 146450 }, { "epoch": 0.935691195073023, "grad_norm": 0.8281605243682861, "learning_rate": 5.507017117365616e-05, "loss": 0.9745, "step": 146460 }, { "epoch": 0.9357550822227617, "grad_norm": 0.9094333052635193, "learning_rate": 5.5065179327347224e-05, "loss": 0.9971, "step": 146470 }, { "epoch": 0.9358189693725004, "grad_norm": 1.2272045612335205, "learning_rate": 5.5060187430027565e-05, "loss": 0.8394, "step": 146480 }, { "epoch": 0.9358828565222391, "grad_norm": 1.1439415216445923, "learning_rate": 5.505519548174745e-05, "loss": 0.9516, "step": 146490 }, { "epoch": 0.9359467436719778, "grad_norm": 0.7094035744667053, "learning_rate": 5.5050203482557115e-05, "loss": 1.0136, "step": 146500 }, { "epoch": 0.9360106308217165, "grad_norm": 0.6839352250099182, "learning_rate": 5.5045211432506884e-05, "loss": 0.7337, "step": 146510 }, { "epoch": 0.9360745179714552, "grad_norm": 2.7014598846435547, "learning_rate": 5.504021933164699e-05, "loss": 1.0982, "step": 146520 }, { "epoch": 0.936138405121194, "grad_norm": 1.0742672681808472, "learning_rate": 5.503522718002774e-05, "loss": 0.9552, "step": 146530 }, { "epoch": 0.9362022922709327, "grad_norm": 0.7901598811149597, "learning_rate": 5.5030234977699394e-05, "loss": 0.8024, "step": 146540 }, { "epoch": 0.9362661794206714, "grad_norm": 0.9857130646705627, "learning_rate": 5.502524272471223e-05, "loss": 1.0354, "step": 146550 }, { "epoch": 0.9363300665704101, "grad_norm": 0.9056035280227661, "learning_rate": 5.502025042111654e-05, "loss": 0.9316, "step": 146560 }, { "epoch": 0.9363939537201488, "grad_norm": 0.8060906529426575, "learning_rate": 5.501525806696257e-05, "loss": 0.9312, "step": 146570 }, { "epoch": 0.9364578408698875, "grad_norm": 1.0485197305679321, "learning_rate": 5.5010265662300606e-05, "loss": 0.8028, "step": 146580 }, { "epoch": 0.9365217280196262, "grad_norm": 0.9534648656845093, "learning_rate": 5.500527320718094e-05, "loss": 0.6943, "step": 146590 }, { "epoch": 0.9365856151693648, "grad_norm": 0.6805192828178406, "learning_rate": 5.500028070165385e-05, "loss": 0.8013, "step": 146600 }, { "epoch": 0.9366495023191035, "grad_norm": 0.996015191078186, "learning_rate": 5.49952881457696e-05, "loss": 0.7835, "step": 146610 }, { "epoch": 0.9367133894688422, "grad_norm": 1.0089763402938843, "learning_rate": 5.4990295539578474e-05, "loss": 0.7471, "step": 146620 }, { "epoch": 0.9367772766185809, "grad_norm": 1.1047331094741821, "learning_rate": 5.498530288313075e-05, "loss": 0.8188, "step": 146630 }, { "epoch": 0.9368411637683196, "grad_norm": 1.0767524242401123, "learning_rate": 5.4980310176476726e-05, "loss": 0.7166, "step": 146640 }, { "epoch": 0.9369050509180583, "grad_norm": 1.266998291015625, "learning_rate": 5.497531741966666e-05, "loss": 0.8517, "step": 146650 }, { "epoch": 0.936968938067797, "grad_norm": 0.7340264320373535, "learning_rate": 5.497032461275085e-05, "loss": 0.8263, "step": 146660 }, { "epoch": 0.9370328252175357, "grad_norm": 0.8520843386650085, "learning_rate": 5.496533175577957e-05, "loss": 1.0392, "step": 146670 }, { "epoch": 0.9370967123672744, "grad_norm": 1.1523327827453613, "learning_rate": 5.4960338848803084e-05, "loss": 0.8068, "step": 146680 }, { "epoch": 0.9371605995170131, "grad_norm": 1.1300572156906128, "learning_rate": 5.4955345891871716e-05, "loss": 0.7149, "step": 146690 }, { "epoch": 0.9372244866667518, "grad_norm": 1.2153987884521484, "learning_rate": 5.495035288503573e-05, "loss": 0.8757, "step": 146700 }, { "epoch": 0.9372883738164905, "grad_norm": 0.7240068912506104, "learning_rate": 5.49453598283454e-05, "loss": 0.7754, "step": 146710 }, { "epoch": 0.9373522609662293, "grad_norm": 0.8426817059516907, "learning_rate": 5.494036672185102e-05, "loss": 0.8532, "step": 146720 }, { "epoch": 0.937416148115968, "grad_norm": 0.8055009245872498, "learning_rate": 5.4935373565602864e-05, "loss": 0.8759, "step": 146730 }, { "epoch": 0.9374800352657067, "grad_norm": 1.1292035579681396, "learning_rate": 5.4930380359651244e-05, "loss": 0.9746, "step": 146740 }, { "epoch": 0.9375439224154454, "grad_norm": 0.588070809841156, "learning_rate": 5.492538710404642e-05, "loss": 0.8524, "step": 146750 }, { "epoch": 0.9376078095651841, "grad_norm": 0.8825819492340088, "learning_rate": 5.492039379883869e-05, "loss": 0.8543, "step": 146760 }, { "epoch": 0.9376716967149228, "grad_norm": 0.8504408001899719, "learning_rate": 5.491540044407833e-05, "loss": 0.8576, "step": 146770 }, { "epoch": 0.9377355838646615, "grad_norm": 0.845874011516571, "learning_rate": 5.491040703981564e-05, "loss": 0.6989, "step": 146780 }, { "epoch": 0.9377994710144002, "grad_norm": 0.8666792511940002, "learning_rate": 5.4905413586100904e-05, "loss": 0.8939, "step": 146790 }, { "epoch": 0.9378633581641389, "grad_norm": 1.1524525880813599, "learning_rate": 5.4900420082984416e-05, "loss": 1.1092, "step": 146800 }, { "epoch": 0.9379272453138776, "grad_norm": 0.5927907824516296, "learning_rate": 5.489542653051646e-05, "loss": 0.6445, "step": 146810 }, { "epoch": 0.9379911324636163, "grad_norm": 0.8610501885414124, "learning_rate": 5.4890432928747306e-05, "loss": 0.5961, "step": 146820 }, { "epoch": 0.938055019613355, "grad_norm": 0.6616340279579163, "learning_rate": 5.488543927772727e-05, "loss": 0.952, "step": 146830 }, { "epoch": 0.9381189067630936, "grad_norm": 0.5525166988372803, "learning_rate": 5.488044557750662e-05, "loss": 1.0917, "step": 146840 }, { "epoch": 0.9381827939128323, "grad_norm": 1.3952975273132324, "learning_rate": 5.487545182813568e-05, "loss": 0.979, "step": 146850 }, { "epoch": 0.938246681062571, "grad_norm": 0.9401289224624634, "learning_rate": 5.4870458029664714e-05, "loss": 0.8715, "step": 146860 }, { "epoch": 0.9383105682123097, "grad_norm": 1.1923482418060303, "learning_rate": 5.486546418214402e-05, "loss": 1.0171, "step": 146870 }, { "epoch": 0.9383744553620484, "grad_norm": 1.33669912815094, "learning_rate": 5.486047028562391e-05, "loss": 0.7589, "step": 146880 }, { "epoch": 0.9384383425117871, "grad_norm": 0.8979326486587524, "learning_rate": 5.4855476340154647e-05, "loss": 0.6521, "step": 146890 }, { "epoch": 0.9385022296615259, "grad_norm": 0.5857362747192383, "learning_rate": 5.4850482345786534e-05, "loss": 0.6615, "step": 146900 }, { "epoch": 0.9385661168112646, "grad_norm": 1.1938517093658447, "learning_rate": 5.484548830256987e-05, "loss": 0.7288, "step": 146910 }, { "epoch": 0.9386300039610033, "grad_norm": 0.7541248202323914, "learning_rate": 5.484049421055495e-05, "loss": 0.613, "step": 146920 }, { "epoch": 0.938693891110742, "grad_norm": 1.009811282157898, "learning_rate": 5.483550006979206e-05, "loss": 0.7539, "step": 146930 }, { "epoch": 0.9387577782604807, "grad_norm": 1.518933653831482, "learning_rate": 5.4830505880331496e-05, "loss": 0.7572, "step": 146940 }, { "epoch": 0.9388216654102194, "grad_norm": 1.1620988845825195, "learning_rate": 5.482551164222357e-05, "loss": 0.8322, "step": 146950 }, { "epoch": 0.9388855525599581, "grad_norm": 0.6771840453147888, "learning_rate": 5.482051735551856e-05, "loss": 0.8801, "step": 146960 }, { "epoch": 0.9389494397096968, "grad_norm": 1.18392813205719, "learning_rate": 5.481552302026678e-05, "loss": 0.6552, "step": 146970 }, { "epoch": 0.9390133268594355, "grad_norm": 0.7388846278190613, "learning_rate": 5.481052863651851e-05, "loss": 0.8432, "step": 146980 }, { "epoch": 0.9390772140091742, "grad_norm": 0.739513099193573, "learning_rate": 5.480553420432405e-05, "loss": 0.732, "step": 146990 }, { "epoch": 0.9391411011589129, "grad_norm": 1.6718555688858032, "learning_rate": 5.4800539723733714e-05, "loss": 0.7453, "step": 147000 }, { "epoch": 0.9392049883086516, "grad_norm": 1.3321524858474731, "learning_rate": 5.479554519479778e-05, "loss": 0.9719, "step": 147010 }, { "epoch": 0.9392688754583903, "grad_norm": 0.7319386601448059, "learning_rate": 5.479055061756656e-05, "loss": 0.7486, "step": 147020 }, { "epoch": 0.939332762608129, "grad_norm": 1.211982250213623, "learning_rate": 5.478555599209035e-05, "loss": 0.8766, "step": 147030 }, { "epoch": 0.9393966497578677, "grad_norm": 1.0847975015640259, "learning_rate": 5.478056131841947e-05, "loss": 0.9276, "step": 147040 }, { "epoch": 0.9394605369076064, "grad_norm": 1.0367181301116943, "learning_rate": 5.477556659660418e-05, "loss": 0.7712, "step": 147050 }, { "epoch": 0.9395244240573452, "grad_norm": 1.7131720781326294, "learning_rate": 5.4770571826694806e-05, "loss": 0.9039, "step": 147060 }, { "epoch": 0.9395883112070839, "grad_norm": 0.8213444948196411, "learning_rate": 5.4765577008741644e-05, "loss": 0.7636, "step": 147070 }, { "epoch": 0.9396521983568225, "grad_norm": 0.7397197484970093, "learning_rate": 5.4760582142795006e-05, "loss": 0.8527, "step": 147080 }, { "epoch": 0.9397160855065612, "grad_norm": 0.7857769131660461, "learning_rate": 5.475558722890518e-05, "loss": 0.8647, "step": 147090 }, { "epoch": 0.9397799726562999, "grad_norm": 0.7695388197898865, "learning_rate": 5.4750592267122494e-05, "loss": 0.8865, "step": 147100 }, { "epoch": 0.9398438598060386, "grad_norm": 0.926115870475769, "learning_rate": 5.4745597257497215e-05, "loss": 1.1938, "step": 147110 }, { "epoch": 0.9399077469557773, "grad_norm": 0.6379870772361755, "learning_rate": 5.474060220007967e-05, "loss": 0.7275, "step": 147120 }, { "epoch": 0.939971634105516, "grad_norm": 0.6762766242027283, "learning_rate": 5.473560709492016e-05, "loss": 1.1361, "step": 147130 }, { "epoch": 0.9400355212552547, "grad_norm": 0.9891712069511414, "learning_rate": 5.4730611942069e-05, "loss": 0.7849, "step": 147140 }, { "epoch": 0.9400994084049934, "grad_norm": 1.2105982303619385, "learning_rate": 5.472561674157647e-05, "loss": 0.9796, "step": 147150 }, { "epoch": 0.9401632955547321, "grad_norm": 0.9586398601531982, "learning_rate": 5.47206214934929e-05, "loss": 0.9349, "step": 147160 }, { "epoch": 0.9402271827044708, "grad_norm": 1.1456085443496704, "learning_rate": 5.471562619786858e-05, "loss": 0.9046, "step": 147170 }, { "epoch": 0.9402910698542095, "grad_norm": 0.9945286512374878, "learning_rate": 5.471063085475383e-05, "loss": 0.8429, "step": 147180 }, { "epoch": 0.9403549570039482, "grad_norm": 0.9011921882629395, "learning_rate": 5.4705635464198954e-05, "loss": 0.8381, "step": 147190 }, { "epoch": 0.9404188441536869, "grad_norm": 0.6775206327438354, "learning_rate": 5.4700640026254246e-05, "loss": 0.9295, "step": 147200 }, { "epoch": 0.9404827313034256, "grad_norm": 0.9664705991744995, "learning_rate": 5.469564454097004e-05, "loss": 1.06, "step": 147210 }, { "epoch": 0.9405466184531643, "grad_norm": 2.8239731788635254, "learning_rate": 5.469064900839662e-05, "loss": 0.8501, "step": 147220 }, { "epoch": 0.940610505602903, "grad_norm": 1.0002690553665161, "learning_rate": 5.4685653428584314e-05, "loss": 0.9444, "step": 147230 }, { "epoch": 0.9406743927526418, "grad_norm": 0.9000080227851868, "learning_rate": 5.468065780158343e-05, "loss": 0.912, "step": 147240 }, { "epoch": 0.9407382799023805, "grad_norm": 1.004228115081787, "learning_rate": 5.467566212744427e-05, "loss": 0.8445, "step": 147250 }, { "epoch": 0.9408021670521192, "grad_norm": 0.896317720413208, "learning_rate": 5.467066640621714e-05, "loss": 1.1256, "step": 147260 }, { "epoch": 0.9408660542018579, "grad_norm": 1.0601049661636353, "learning_rate": 5.466567063795237e-05, "loss": 0.944, "step": 147270 }, { "epoch": 0.9409299413515966, "grad_norm": 1.2464497089385986, "learning_rate": 5.4660674822700264e-05, "loss": 0.7599, "step": 147280 }, { "epoch": 0.9409938285013353, "grad_norm": 0.9628870487213135, "learning_rate": 5.4655678960511116e-05, "loss": 0.9559, "step": 147290 }, { "epoch": 0.941057715651074, "grad_norm": 0.6807940602302551, "learning_rate": 5.465068305143526e-05, "loss": 0.9153, "step": 147300 }, { "epoch": 0.9411216028008127, "grad_norm": 0.7045243382453918, "learning_rate": 5.4645687095523004e-05, "loss": 0.6033, "step": 147310 }, { "epoch": 0.9411854899505514, "grad_norm": 1.0960919857025146, "learning_rate": 5.464069109282465e-05, "loss": 0.9625, "step": 147320 }, { "epoch": 0.94124937710029, "grad_norm": 0.8039271235466003, "learning_rate": 5.4635695043390526e-05, "loss": 1.0624, "step": 147330 }, { "epoch": 0.9413132642500287, "grad_norm": 0.8753572106361389, "learning_rate": 5.463069894727094e-05, "loss": 0.7895, "step": 147340 }, { "epoch": 0.9413771513997674, "grad_norm": 0.9138633608818054, "learning_rate": 5.462570280451622e-05, "loss": 1.1042, "step": 147350 }, { "epoch": 0.9414410385495061, "grad_norm": 0.7882958054542542, "learning_rate": 5.4620706615176645e-05, "loss": 0.759, "step": 147360 }, { "epoch": 0.9415049256992448, "grad_norm": 0.7644445896148682, "learning_rate": 5.4615710379302574e-05, "loss": 0.6677, "step": 147370 }, { "epoch": 0.9415688128489835, "grad_norm": 0.8131184577941895, "learning_rate": 5.461071409694432e-05, "loss": 0.8138, "step": 147380 }, { "epoch": 0.9416326999987222, "grad_norm": 1.2795737981796265, "learning_rate": 5.460571776815216e-05, "loss": 0.9862, "step": 147390 }, { "epoch": 0.9416965871484609, "grad_norm": 0.9743437767028809, "learning_rate": 5.460072139297646e-05, "loss": 0.9635, "step": 147400 }, { "epoch": 0.9417604742981996, "grad_norm": 0.8721929788589478, "learning_rate": 5.459572497146751e-05, "loss": 0.9526, "step": 147410 }, { "epoch": 0.9418243614479384, "grad_norm": 0.8396299481391907, "learning_rate": 5.459072850367563e-05, "loss": 0.9158, "step": 147420 }, { "epoch": 0.9418882485976771, "grad_norm": 0.7873830795288086, "learning_rate": 5.4585731989651144e-05, "loss": 0.8138, "step": 147430 }, { "epoch": 0.9419521357474158, "grad_norm": 1.116898775100708, "learning_rate": 5.458073542944436e-05, "loss": 0.9122, "step": 147440 }, { "epoch": 0.9420160228971545, "grad_norm": 0.5282606482505798, "learning_rate": 5.4575738823105626e-05, "loss": 0.7972, "step": 147450 }, { "epoch": 0.9420799100468932, "grad_norm": 0.8728241920471191, "learning_rate": 5.457074217068523e-05, "loss": 0.6552, "step": 147460 }, { "epoch": 0.9421437971966319, "grad_norm": 0.9961313009262085, "learning_rate": 5.456574547223351e-05, "loss": 0.896, "step": 147470 }, { "epoch": 0.9422076843463706, "grad_norm": 0.6567425727844238, "learning_rate": 5.456074872780078e-05, "loss": 0.6517, "step": 147480 }, { "epoch": 0.9422715714961093, "grad_norm": 1.023400068283081, "learning_rate": 5.455575193743737e-05, "loss": 1.0409, "step": 147490 }, { "epoch": 0.942335458645848, "grad_norm": 1.032383918762207, "learning_rate": 5.455075510119359e-05, "loss": 1.1724, "step": 147500 }, { "epoch": 0.9423993457955867, "grad_norm": 1.1703791618347168, "learning_rate": 5.454575821911978e-05, "loss": 0.9594, "step": 147510 }, { "epoch": 0.9424632329453254, "grad_norm": 0.9019293785095215, "learning_rate": 5.454076129126624e-05, "loss": 0.8071, "step": 147520 }, { "epoch": 0.9425271200950641, "grad_norm": 0.9911213517189026, "learning_rate": 5.4535764317683314e-05, "loss": 0.856, "step": 147530 }, { "epoch": 0.9425910072448028, "grad_norm": 1.0215911865234375, "learning_rate": 5.4530767298421315e-05, "loss": 0.9103, "step": 147540 }, { "epoch": 0.9426548943945415, "grad_norm": 1.2681025266647339, "learning_rate": 5.452577023353057e-05, "loss": 0.9712, "step": 147550 }, { "epoch": 0.9427187815442802, "grad_norm": 1.3751388788223267, "learning_rate": 5.4520773123061406e-05, "loss": 1.0203, "step": 147560 }, { "epoch": 0.9427826686940188, "grad_norm": 0.6695000529289246, "learning_rate": 5.4515775967064145e-05, "loss": 0.9345, "step": 147570 }, { "epoch": 0.9428465558437575, "grad_norm": 0.8419767618179321, "learning_rate": 5.4510778765589096e-05, "loss": 0.8482, "step": 147580 }, { "epoch": 0.9429104429934962, "grad_norm": 0.9437578320503235, "learning_rate": 5.4505781518686626e-05, "loss": 0.9328, "step": 147590 }, { "epoch": 0.942974330143235, "grad_norm": 0.9840037226676941, "learning_rate": 5.450078422640703e-05, "loss": 1.0883, "step": 147600 }, { "epoch": 0.9430382172929737, "grad_norm": 0.8432072401046753, "learning_rate": 5.449578688880064e-05, "loss": 0.7759, "step": 147610 }, { "epoch": 0.9431021044427124, "grad_norm": 0.9873720407485962, "learning_rate": 5.44907895059178e-05, "loss": 0.7869, "step": 147620 }, { "epoch": 0.9431659915924511, "grad_norm": 1.06051504611969, "learning_rate": 5.44857920778088e-05, "loss": 0.6161, "step": 147630 }, { "epoch": 0.9432298787421898, "grad_norm": 0.6839306950569153, "learning_rate": 5.448079460452401e-05, "loss": 0.8623, "step": 147640 }, { "epoch": 0.9432937658919285, "grad_norm": 0.9109451174736023, "learning_rate": 5.4475797086113736e-05, "loss": 0.7093, "step": 147650 }, { "epoch": 0.9433576530416672, "grad_norm": 0.6764558553695679, "learning_rate": 5.447079952262831e-05, "loss": 0.715, "step": 147660 }, { "epoch": 0.9434215401914059, "grad_norm": 1.947048306465149, "learning_rate": 5.446580191411808e-05, "loss": 1.3239, "step": 147670 }, { "epoch": 0.9434854273411446, "grad_norm": 0.9166133403778076, "learning_rate": 5.446080426063335e-05, "loss": 1.124, "step": 147680 }, { "epoch": 0.9435493144908833, "grad_norm": 0.7818967700004578, "learning_rate": 5.4455806562224466e-05, "loss": 0.9505, "step": 147690 }, { "epoch": 0.943613201640622, "grad_norm": 1.0316641330718994, "learning_rate": 5.445080881894174e-05, "loss": 0.9351, "step": 147700 }, { "epoch": 0.9436770887903607, "grad_norm": 0.9473081231117249, "learning_rate": 5.444581103083553e-05, "loss": 0.6593, "step": 147710 }, { "epoch": 0.9437409759400994, "grad_norm": 2.7547168731689453, "learning_rate": 5.4440813197956165e-05, "loss": 0.8583, "step": 147720 }, { "epoch": 0.9438048630898381, "grad_norm": 1.0668847560882568, "learning_rate": 5.443581532035396e-05, "loss": 0.8896, "step": 147730 }, { "epoch": 0.9438687502395768, "grad_norm": 0.618463933467865, "learning_rate": 5.443081739807926e-05, "loss": 0.7973, "step": 147740 }, { "epoch": 0.9439326373893155, "grad_norm": 0.8581937551498413, "learning_rate": 5.442581943118239e-05, "loss": 0.6358, "step": 147750 }, { "epoch": 0.9439965245390542, "grad_norm": 0.7288326621055603, "learning_rate": 5.44208214197137e-05, "loss": 0.8207, "step": 147760 }, { "epoch": 0.944060411688793, "grad_norm": 1.2387948036193848, "learning_rate": 5.4415823363723515e-05, "loss": 1.1969, "step": 147770 }, { "epoch": 0.9441242988385317, "grad_norm": 0.8606761693954468, "learning_rate": 5.441082526326217e-05, "loss": 0.7147, "step": 147780 }, { "epoch": 0.9441881859882704, "grad_norm": 0.8213832378387451, "learning_rate": 5.4405827118379984e-05, "loss": 0.8981, "step": 147790 }, { "epoch": 0.9442520731380091, "grad_norm": 1.2024768590927124, "learning_rate": 5.440082892912731e-05, "loss": 0.7657, "step": 147800 }, { "epoch": 0.9443159602877477, "grad_norm": 1.0111289024353027, "learning_rate": 5.439583069555448e-05, "loss": 0.9585, "step": 147810 }, { "epoch": 0.9443798474374864, "grad_norm": 0.8769100904464722, "learning_rate": 5.439083241771185e-05, "loss": 0.8346, "step": 147820 }, { "epoch": 0.9444437345872251, "grad_norm": 2.532590389251709, "learning_rate": 5.438583409564972e-05, "loss": 0.7687, "step": 147830 }, { "epoch": 0.9445076217369638, "grad_norm": 0.7972337007522583, "learning_rate": 5.4380835729418454e-05, "loss": 0.9489, "step": 147840 }, { "epoch": 0.9445715088867025, "grad_norm": 1.492598295211792, "learning_rate": 5.437583731906838e-05, "loss": 0.9452, "step": 147850 }, { "epoch": 0.9446353960364412, "grad_norm": 0.8178605437278748, "learning_rate": 5.4370838864649845e-05, "loss": 0.8965, "step": 147860 }, { "epoch": 0.9446992831861799, "grad_norm": 0.7244489789009094, "learning_rate": 5.436584036621317e-05, "loss": 0.7104, "step": 147870 }, { "epoch": 0.9447631703359186, "grad_norm": 1.0198410749435425, "learning_rate": 5.4360841823808715e-05, "loss": 1.0496, "step": 147880 }, { "epoch": 0.9448270574856573, "grad_norm": 0.7587177157402039, "learning_rate": 5.435584323748679e-05, "loss": 0.9377, "step": 147890 }, { "epoch": 0.944890944635396, "grad_norm": 0.8727468252182007, "learning_rate": 5.4350844607297776e-05, "loss": 0.712, "step": 147900 }, { "epoch": 0.9449548317851347, "grad_norm": 0.5229664444923401, "learning_rate": 5.4345845933291984e-05, "loss": 0.6849, "step": 147910 }, { "epoch": 0.9450187189348734, "grad_norm": 0.65986567735672, "learning_rate": 5.4340847215519776e-05, "loss": 0.9049, "step": 147920 }, { "epoch": 0.9450826060846121, "grad_norm": 1.3852014541625977, "learning_rate": 5.4335848454031466e-05, "loss": 0.7575, "step": 147930 }, { "epoch": 0.9451464932343508, "grad_norm": 1.0181684494018555, "learning_rate": 5.433084964887742e-05, "loss": 0.879, "step": 147940 }, { "epoch": 0.9452103803840896, "grad_norm": 2.153229236602783, "learning_rate": 5.432585080010797e-05, "loss": 0.8431, "step": 147950 }, { "epoch": 0.9452742675338283, "grad_norm": 0.9575214385986328, "learning_rate": 5.432085190777346e-05, "loss": 0.659, "step": 147960 }, { "epoch": 0.945338154683567, "grad_norm": 0.5660369396209717, "learning_rate": 5.431585297192423e-05, "loss": 0.967, "step": 147970 }, { "epoch": 0.9454020418333057, "grad_norm": 1.3022630214691162, "learning_rate": 5.431085399261063e-05, "loss": 0.8355, "step": 147980 }, { "epoch": 0.9454659289830444, "grad_norm": 1.3143341541290283, "learning_rate": 5.4305854969883006e-05, "loss": 0.8243, "step": 147990 }, { "epoch": 0.9455298161327831, "grad_norm": 1.7055933475494385, "learning_rate": 5.4300855903791694e-05, "loss": 1.1814, "step": 148000 }, { "epoch": 0.9455937032825218, "grad_norm": 0.5489552021026611, "learning_rate": 5.429585679438705e-05, "loss": 0.761, "step": 148010 }, { "epoch": 0.9456575904322605, "grad_norm": 1.1143733263015747, "learning_rate": 5.429085764171939e-05, "loss": 1.1932, "step": 148020 }, { "epoch": 0.9457214775819992, "grad_norm": 1.6179147958755493, "learning_rate": 5.42858584458391e-05, "loss": 1.4239, "step": 148030 }, { "epoch": 0.9457853647317379, "grad_norm": 0.9910181164741516, "learning_rate": 5.4280859206796506e-05, "loss": 0.8706, "step": 148040 }, { "epoch": 0.9458492518814765, "grad_norm": 1.4709466695785522, "learning_rate": 5.4275859924641936e-05, "loss": 0.6839, "step": 148050 }, { "epoch": 0.9459131390312152, "grad_norm": 0.8617550730705261, "learning_rate": 5.4270860599425775e-05, "loss": 1.1046, "step": 148060 }, { "epoch": 0.9459770261809539, "grad_norm": 1.1911267042160034, "learning_rate": 5.426586123119835e-05, "loss": 0.8552, "step": 148070 }, { "epoch": 0.9460409133306926, "grad_norm": 0.9309574365615845, "learning_rate": 5.426086182001001e-05, "loss": 0.8093, "step": 148080 }, { "epoch": 0.9461048004804313, "grad_norm": 0.9242094159126282, "learning_rate": 5.425586236591112e-05, "loss": 0.8593, "step": 148090 }, { "epoch": 0.94616868763017, "grad_norm": 0.7709175944328308, "learning_rate": 5.4250862868951994e-05, "loss": 0.9407, "step": 148100 }, { "epoch": 0.9462325747799087, "grad_norm": 2.212761878967285, "learning_rate": 5.424586332918301e-05, "loss": 0.8686, "step": 148110 }, { "epoch": 0.9462964619296474, "grad_norm": 0.9792400598526001, "learning_rate": 5.424086374665451e-05, "loss": 0.8417, "step": 148120 }, { "epoch": 0.9463603490793862, "grad_norm": 0.8189619183540344, "learning_rate": 5.423586412141685e-05, "loss": 0.7651, "step": 148130 }, { "epoch": 0.9464242362291249, "grad_norm": 0.6825985312461853, "learning_rate": 5.423086445352036e-05, "loss": 0.6932, "step": 148140 }, { "epoch": 0.9464881233788636, "grad_norm": 1.7145543098449707, "learning_rate": 5.422586474301541e-05, "loss": 0.789, "step": 148150 }, { "epoch": 0.9465520105286023, "grad_norm": 0.9677301645278931, "learning_rate": 5.4220864989952345e-05, "loss": 0.9292, "step": 148160 }, { "epoch": 0.946615897678341, "grad_norm": 0.8834055662155151, "learning_rate": 5.421586519438152e-05, "loss": 0.7546, "step": 148170 }, { "epoch": 0.9466797848280797, "grad_norm": 1.360954999923706, "learning_rate": 5.421086535635328e-05, "loss": 0.9588, "step": 148180 }, { "epoch": 0.9467436719778184, "grad_norm": 0.9436541795730591, "learning_rate": 5.4205865475918e-05, "loss": 0.7112, "step": 148190 }, { "epoch": 0.9468075591275571, "grad_norm": 1.135512351989746, "learning_rate": 5.420086555312599e-05, "loss": 1.0807, "step": 148200 }, { "epoch": 0.9468714462772958, "grad_norm": NaN, "learning_rate": 5.419636558643983e-05, "loss": 0.9952, "step": 148210 }, { "epoch": 0.9469353334270345, "grad_norm": 0.8383281230926514, "learning_rate": 5.4191365583308814e-05, "loss": 0.9675, "step": 148220 }, { "epoch": 0.9469992205767732, "grad_norm": 0.6859557032585144, "learning_rate": 5.418636553796713e-05, "loss": 0.9006, "step": 148230 }, { "epoch": 0.9470631077265119, "grad_norm": 0.9620136022567749, "learning_rate": 5.4181365450465125e-05, "loss": 0.8653, "step": 148240 }, { "epoch": 0.9471269948762506, "grad_norm": 0.5614147186279297, "learning_rate": 5.417636532085315e-05, "loss": 0.7507, "step": 148250 }, { "epoch": 0.9471908820259893, "grad_norm": 1.3918198347091675, "learning_rate": 5.417136514918156e-05, "loss": 0.9124, "step": 148260 }, { "epoch": 0.947254769175728, "grad_norm": 0.8057130575180054, "learning_rate": 5.416636493550071e-05, "loss": 0.8625, "step": 148270 }, { "epoch": 0.9473186563254667, "grad_norm": 1.2097066640853882, "learning_rate": 5.4161364679860974e-05, "loss": 0.9413, "step": 148280 }, { "epoch": 0.9473825434752055, "grad_norm": 0.8656195402145386, "learning_rate": 5.415636438231269e-05, "loss": 0.851, "step": 148290 }, { "epoch": 0.947446430624944, "grad_norm": 0.9024816751480103, "learning_rate": 5.4151364042906216e-05, "loss": 0.6835, "step": 148300 }, { "epoch": 0.9475103177746828, "grad_norm": 0.9172950983047485, "learning_rate": 5.414636366169191e-05, "loss": 0.9436, "step": 148310 }, { "epoch": 0.9475742049244215, "grad_norm": 1.2229052782058716, "learning_rate": 5.4141363238720144e-05, "loss": 0.7715, "step": 148320 }, { "epoch": 0.9476380920741602, "grad_norm": 0.7805074453353882, "learning_rate": 5.4136362774041274e-05, "loss": 0.7959, "step": 148330 }, { "epoch": 0.9477019792238989, "grad_norm": 1.2638076543807983, "learning_rate": 5.4131362267705635e-05, "loss": 0.8026, "step": 148340 }, { "epoch": 0.9477658663736376, "grad_norm": 0.8353046774864197, "learning_rate": 5.412636171976362e-05, "loss": 0.8111, "step": 148350 }, { "epoch": 0.9478297535233763, "grad_norm": 0.7682856917381287, "learning_rate": 5.4121361130265556e-05, "loss": 1.0184, "step": 148360 }, { "epoch": 0.947893640673115, "grad_norm": 1.0108745098114014, "learning_rate": 5.411636049926183e-05, "loss": 0.9613, "step": 148370 }, { "epoch": 0.9479575278228537, "grad_norm": 1.050826907157898, "learning_rate": 5.4111359826802785e-05, "loss": 0.742, "step": 148380 }, { "epoch": 0.9480214149725924, "grad_norm": 0.868506133556366, "learning_rate": 5.41063591129388e-05, "loss": 1.0165, "step": 148390 }, { "epoch": 0.9480853021223311, "grad_norm": 0.8240692019462585, "learning_rate": 5.410135835772023e-05, "loss": 0.9583, "step": 148400 }, { "epoch": 0.9481491892720698, "grad_norm": 0.7590421438217163, "learning_rate": 5.409635756119742e-05, "loss": 0.9101, "step": 148410 }, { "epoch": 0.9482130764218085, "grad_norm": 0.9311391711235046, "learning_rate": 5.409135672342076e-05, "loss": 0.9968, "step": 148420 }, { "epoch": 0.9482769635715472, "grad_norm": 0.7667366862297058, "learning_rate": 5.408635584444058e-05, "loss": 0.7345, "step": 148430 }, { "epoch": 0.9483408507212859, "grad_norm": 2.864366292953491, "learning_rate": 5.408135492430728e-05, "loss": 0.9094, "step": 148440 }, { "epoch": 0.9484047378710246, "grad_norm": 1.2095937728881836, "learning_rate": 5.407635396307119e-05, "loss": 0.9626, "step": 148450 }, { "epoch": 0.9484686250207633, "grad_norm": 1.3204469680786133, "learning_rate": 5.4071352960782697e-05, "loss": 0.9588, "step": 148460 }, { "epoch": 0.948532512170502, "grad_norm": 0.5598198771476746, "learning_rate": 5.406635191749215e-05, "loss": 0.8083, "step": 148470 }, { "epoch": 0.9485963993202408, "grad_norm": 1.0732495784759521, "learning_rate": 5.406135083324993e-05, "loss": 0.8825, "step": 148480 }, { "epoch": 0.9486602864699795, "grad_norm": 0.9504900574684143, "learning_rate": 5.405634970810639e-05, "loss": 1.174, "step": 148490 }, { "epoch": 0.9487241736197182, "grad_norm": 1.0843398571014404, "learning_rate": 5.40513485421119e-05, "loss": 1.0179, "step": 148500 }, { "epoch": 0.9487880607694569, "grad_norm": 1.0680909156799316, "learning_rate": 5.404634733531683e-05, "loss": 1.0718, "step": 148510 }, { "epoch": 0.9488519479191956, "grad_norm": 0.8766992092132568, "learning_rate": 5.404134608777154e-05, "loss": 0.8991, "step": 148520 }, { "epoch": 0.9489158350689343, "grad_norm": 0.7806427478790283, "learning_rate": 5.4036344799526396e-05, "loss": 1.0212, "step": 148530 }, { "epoch": 0.9489797222186729, "grad_norm": 0.9401262998580933, "learning_rate": 5.4031343470631756e-05, "loss": 0.8057, "step": 148540 }, { "epoch": 0.9490436093684116, "grad_norm": 0.99031001329422, "learning_rate": 5.402634210113801e-05, "loss": 0.8273, "step": 148550 }, { "epoch": 0.9491074965181503, "grad_norm": 0.5672215223312378, "learning_rate": 5.402134069109551e-05, "loss": 1.0363, "step": 148560 }, { "epoch": 0.949171383667889, "grad_norm": 0.7566940784454346, "learning_rate": 5.401633924055464e-05, "loss": 0.8976, "step": 148570 }, { "epoch": 0.9492352708176277, "grad_norm": 1.022157907485962, "learning_rate": 5.401133774956576e-05, "loss": 0.9522, "step": 148580 }, { "epoch": 0.9492991579673664, "grad_norm": 0.8552069664001465, "learning_rate": 5.400633621817923e-05, "loss": 0.7844, "step": 148590 }, { "epoch": 0.9493630451171051, "grad_norm": 1.0440733432769775, "learning_rate": 5.4001334646445436e-05, "loss": 0.6886, "step": 148600 }, { "epoch": 0.9494269322668438, "grad_norm": 1.094836711883545, "learning_rate": 5.399633303441474e-05, "loss": 1.0184, "step": 148610 }, { "epoch": 0.9494908194165825, "grad_norm": 0.8054597973823547, "learning_rate": 5.399133138213751e-05, "loss": 0.9208, "step": 148620 }, { "epoch": 0.9495547065663212, "grad_norm": 0.6301440000534058, "learning_rate": 5.398632968966412e-05, "loss": 0.8177, "step": 148630 }, { "epoch": 0.9496185937160599, "grad_norm": 1.095146894454956, "learning_rate": 5.398182813211199e-05, "loss": 1.1332, "step": 148640 }, { "epoch": 0.9496824808657987, "grad_norm": 1.182285189628601, "learning_rate": 5.3976826363404665e-05, "loss": 0.8807, "step": 148650 }, { "epoch": 0.9497463680155374, "grad_norm": 1.0031092166900635, "learning_rate": 5.397182455464725e-05, "loss": 1.1409, "step": 148660 }, { "epoch": 0.9498102551652761, "grad_norm": 0.9576486945152283, "learning_rate": 5.396682270589015e-05, "loss": 0.9945, "step": 148670 }, { "epoch": 0.9498741423150148, "grad_norm": 1.1300849914550781, "learning_rate": 5.396182081718369e-05, "loss": 0.9599, "step": 148680 }, { "epoch": 0.9499380294647535, "grad_norm": 1.3322839736938477, "learning_rate": 5.395681888857829e-05, "loss": 0.8257, "step": 148690 }, { "epoch": 0.9500019166144922, "grad_norm": 1.616481065750122, "learning_rate": 5.3951816920124285e-05, "loss": 0.9125, "step": 148700 }, { "epoch": 0.9500658037642309, "grad_norm": 0.7100057601928711, "learning_rate": 5.394681491187207e-05, "loss": 1.3379, "step": 148710 }, { "epoch": 0.9501296909139696, "grad_norm": 0.8348981738090515, "learning_rate": 5.394181286387202e-05, "loss": 0.9037, "step": 148720 }, { "epoch": 0.9501935780637083, "grad_norm": 1.0880135297775269, "learning_rate": 5.3936810776174497e-05, "loss": 0.8901, "step": 148730 }, { "epoch": 0.950257465213447, "grad_norm": 1.474798560142517, "learning_rate": 5.3931808648829887e-05, "loss": 1.0315, "step": 148740 }, { "epoch": 0.9503213523631857, "grad_norm": 1.2487114667892456, "learning_rate": 5.392680648188856e-05, "loss": 0.9557, "step": 148750 }, { "epoch": 0.9503852395129244, "grad_norm": 0.5806934833526611, "learning_rate": 5.392180427540089e-05, "loss": 0.8193, "step": 148760 }, { "epoch": 0.9504491266626631, "grad_norm": 0.8657655715942383, "learning_rate": 5.391680202941727e-05, "loss": 0.9947, "step": 148770 }, { "epoch": 0.9505130138124017, "grad_norm": 1.0239652395248413, "learning_rate": 5.3911799743988054e-05, "loss": 0.9196, "step": 148780 }, { "epoch": 0.9505769009621404, "grad_norm": 0.862191379070282, "learning_rate": 5.390679741916365e-05, "loss": 1.0819, "step": 148790 }, { "epoch": 0.9506407881118791, "grad_norm": 0.7813977599143982, "learning_rate": 5.39017950549944e-05, "loss": 0.8585, "step": 148800 }, { "epoch": 0.9507046752616178, "grad_norm": 0.829288125038147, "learning_rate": 5.389679265153069e-05, "loss": 0.9753, "step": 148810 }, { "epoch": 0.9507685624113565, "grad_norm": 1.4242595434188843, "learning_rate": 5.389179020882291e-05, "loss": 0.5942, "step": 148820 }, { "epoch": 0.9508324495610952, "grad_norm": 0.8533827066421509, "learning_rate": 5.388678772692144e-05, "loss": 1.042, "step": 148830 }, { "epoch": 0.950896336710834, "grad_norm": 0.6416082978248596, "learning_rate": 5.388178520587666e-05, "loss": 1.0584, "step": 148840 }, { "epoch": 0.9509602238605727, "grad_norm": 1.0949831008911133, "learning_rate": 5.387678264573893e-05, "loss": 0.8983, "step": 148850 }, { "epoch": 0.9510241110103114, "grad_norm": 1.0142227411270142, "learning_rate": 5.3871780046558664e-05, "loss": 0.7403, "step": 148860 }, { "epoch": 0.9510879981600501, "grad_norm": 0.9484434127807617, "learning_rate": 5.3866777408386217e-05, "loss": 0.9695, "step": 148870 }, { "epoch": 0.9511518853097888, "grad_norm": 0.765370786190033, "learning_rate": 5.386177473127197e-05, "loss": 0.7995, "step": 148880 }, { "epoch": 0.9512157724595275, "grad_norm": 0.7197050452232361, "learning_rate": 5.385677201526631e-05, "loss": 0.8397, "step": 148890 }, { "epoch": 0.9512796596092662, "grad_norm": 0.9084450602531433, "learning_rate": 5.385176926041963e-05, "loss": 1.113, "step": 148900 }, { "epoch": 0.9513435467590049, "grad_norm": 1.0009987354278564, "learning_rate": 5.3846766466782294e-05, "loss": 0.9134, "step": 148910 }, { "epoch": 0.9514074339087436, "grad_norm": 0.9077139496803284, "learning_rate": 5.3841763634404695e-05, "loss": 1.0847, "step": 148920 }, { "epoch": 0.9514713210584823, "grad_norm": 0.6592074632644653, "learning_rate": 5.383676076333721e-05, "loss": 0.7027, "step": 148930 }, { "epoch": 0.951535208208221, "grad_norm": 0.9967330694198608, "learning_rate": 5.383175785363023e-05, "loss": 0.8229, "step": 148940 }, { "epoch": 0.9515990953579597, "grad_norm": 0.9206305146217346, "learning_rate": 5.382675490533413e-05, "loss": 0.8789, "step": 148950 }, { "epoch": 0.9516629825076984, "grad_norm": 2.410933017730713, "learning_rate": 5.38217519184993e-05, "loss": 0.8032, "step": 148960 }, { "epoch": 0.9517268696574371, "grad_norm": 1.0102331638336182, "learning_rate": 5.381674889317612e-05, "loss": 1.1499, "step": 148970 }, { "epoch": 0.9517907568071758, "grad_norm": 1.321588397026062, "learning_rate": 5.3811745829414975e-05, "loss": 0.9381, "step": 148980 }, { "epoch": 0.9518546439569145, "grad_norm": 0.7851718664169312, "learning_rate": 5.3806742727266245e-05, "loss": 0.807, "step": 148990 }, { "epoch": 0.9519185311066533, "grad_norm": 0.8672306537628174, "learning_rate": 5.380173958678033e-05, "loss": 0.8848, "step": 149000 }, { "epoch": 0.951982418256392, "grad_norm": 1.5692130327224731, "learning_rate": 5.379673640800761e-05, "loss": 0.6902, "step": 149010 }, { "epoch": 0.9520463054061307, "grad_norm": 0.9752570986747742, "learning_rate": 5.379173319099845e-05, "loss": 0.9114, "step": 149020 }, { "epoch": 0.9521101925558693, "grad_norm": 0.89276123046875, "learning_rate": 5.378672993580329e-05, "loss": 0.7358, "step": 149030 }, { "epoch": 0.952174079705608, "grad_norm": 0.9652552604675293, "learning_rate": 5.378172664247246e-05, "loss": 0.8794, "step": 149040 }, { "epoch": 0.9522379668553467, "grad_norm": 0.7110236287117004, "learning_rate": 5.377672331105639e-05, "loss": 0.9261, "step": 149050 }, { "epoch": 0.9523018540050854, "grad_norm": 0.5412469506263733, "learning_rate": 5.3771719941605434e-05, "loss": 0.8893, "step": 149060 }, { "epoch": 0.9523657411548241, "grad_norm": 0.5765590071678162, "learning_rate": 5.3766716534170004e-05, "loss": 0.7548, "step": 149070 }, { "epoch": 0.9524296283045628, "grad_norm": 0.9300665855407715, "learning_rate": 5.376171308880047e-05, "loss": 0.982, "step": 149080 }, { "epoch": 0.9524935154543015, "grad_norm": 0.7088594436645508, "learning_rate": 5.375670960554724e-05, "loss": 0.7038, "step": 149090 }, { "epoch": 0.9525574026040402, "grad_norm": 0.7242599129676819, "learning_rate": 5.375170608446068e-05, "loss": 0.8099, "step": 149100 }, { "epoch": 0.9526212897537789, "grad_norm": 0.8361330628395081, "learning_rate": 5.3746702525591205e-05, "loss": 1.1563, "step": 149110 }, { "epoch": 0.9526851769035176, "grad_norm": 1.0023174285888672, "learning_rate": 5.3741698928989194e-05, "loss": 0.8939, "step": 149120 }, { "epoch": 0.9527490640532563, "grad_norm": 0.6952134966850281, "learning_rate": 5.373669529470504e-05, "loss": 0.9099, "step": 149130 }, { "epoch": 0.952812951202995, "grad_norm": 0.7857193946838379, "learning_rate": 5.373169162278913e-05, "loss": 0.8369, "step": 149140 }, { "epoch": 0.9528768383527337, "grad_norm": 0.8915547728538513, "learning_rate": 5.372668791329185e-05, "loss": 1.0329, "step": 149150 }, { "epoch": 0.9529407255024724, "grad_norm": 0.9852863550186157, "learning_rate": 5.372168416626361e-05, "loss": 1.06, "step": 149160 }, { "epoch": 0.9530046126522111, "grad_norm": 0.7365124225616455, "learning_rate": 5.371668038175478e-05, "loss": 0.7519, "step": 149170 }, { "epoch": 0.9530684998019499, "grad_norm": 0.8659708499908447, "learning_rate": 5.371167655981576e-05, "loss": 0.8632, "step": 149180 }, { "epoch": 0.9531323869516886, "grad_norm": 0.8980143070220947, "learning_rate": 5.3706672700496954e-05, "loss": 0.7586, "step": 149190 }, { "epoch": 0.9531962741014273, "grad_norm": 0.9168791174888611, "learning_rate": 5.370166880384875e-05, "loss": 0.9606, "step": 149200 }, { "epoch": 0.953260161251166, "grad_norm": 3.515974760055542, "learning_rate": 5.369666486992153e-05, "loss": 0.9327, "step": 149210 }, { "epoch": 0.9533240484009047, "grad_norm": 0.5968050360679626, "learning_rate": 5.3691660898765705e-05, "loss": 0.7153, "step": 149220 }, { "epoch": 0.9533879355506434, "grad_norm": 0.4954209625720978, "learning_rate": 5.3686656890431665e-05, "loss": 0.7143, "step": 149230 }, { "epoch": 0.9534518227003821, "grad_norm": 0.938412070274353, "learning_rate": 5.3681652844969785e-05, "loss": 0.7915, "step": 149240 }, { "epoch": 0.9535157098501208, "grad_norm": 0.7088356614112854, "learning_rate": 5.3676648762430495e-05, "loss": 0.8184, "step": 149250 }, { "epoch": 0.9535795969998595, "grad_norm": 1.936963677406311, "learning_rate": 5.367164464286416e-05, "loss": 0.7562, "step": 149260 }, { "epoch": 0.9536434841495981, "grad_norm": 0.9665189385414124, "learning_rate": 5.366664048632118e-05, "loss": 0.9453, "step": 149270 }, { "epoch": 0.9537073712993368, "grad_norm": 1.063223123550415, "learning_rate": 5.366163629285198e-05, "loss": 0.8095, "step": 149280 }, { "epoch": 0.9537712584490755, "grad_norm": 1.0628734827041626, "learning_rate": 5.365663206250693e-05, "loss": 1.1281, "step": 149290 }, { "epoch": 0.9538351455988142, "grad_norm": 0.9469806551933289, "learning_rate": 5.365162779533641e-05, "loss": 1.0392, "step": 149300 }, { "epoch": 0.9538990327485529, "grad_norm": 0.8327413201332092, "learning_rate": 5.3646623491390855e-05, "loss": 0.5963, "step": 149310 }, { "epoch": 0.9539629198982916, "grad_norm": 0.7886581420898438, "learning_rate": 5.3641619150720646e-05, "loss": 0.8496, "step": 149320 }, { "epoch": 0.9540268070480303, "grad_norm": 0.7735728621482849, "learning_rate": 5.363661477337618e-05, "loss": 0.7483, "step": 149330 }, { "epoch": 0.954090694197769, "grad_norm": 0.7688968777656555, "learning_rate": 5.363161035940785e-05, "loss": 0.9939, "step": 149340 }, { "epoch": 0.9541545813475077, "grad_norm": 1.1434403657913208, "learning_rate": 5.362660590886607e-05, "loss": 1.1379, "step": 149350 }, { "epoch": 0.9542184684972465, "grad_norm": 1.1655575037002563, "learning_rate": 5.362160142180123e-05, "loss": 0.9408, "step": 149360 }, { "epoch": 0.9542823556469852, "grad_norm": 0.7211439609527588, "learning_rate": 5.361659689826373e-05, "loss": 0.6937, "step": 149370 }, { "epoch": 0.9543462427967239, "grad_norm": 0.9510606527328491, "learning_rate": 5.361159233830396e-05, "loss": 1.044, "step": 149380 }, { "epoch": 0.9544101299464626, "grad_norm": 0.7282498478889465, "learning_rate": 5.360658774197235e-05, "loss": 0.7549, "step": 149390 }, { "epoch": 0.9544740170962013, "grad_norm": 1.0271804332733154, "learning_rate": 5.3601583109319264e-05, "loss": 1.0737, "step": 149400 }, { "epoch": 0.95453790424594, "grad_norm": 0.7570605278015137, "learning_rate": 5.359657844039514e-05, "loss": 0.6157, "step": 149410 }, { "epoch": 0.9546017913956787, "grad_norm": 0.8889819979667664, "learning_rate": 5.3591573735250344e-05, "loss": 1.0411, "step": 149420 }, { "epoch": 0.9546656785454174, "grad_norm": 1.0955427885055542, "learning_rate": 5.358656899393529e-05, "loss": 1.0119, "step": 149430 }, { "epoch": 0.9547295656951561, "grad_norm": 0.8745180368423462, "learning_rate": 5.35815642165004e-05, "loss": 0.9069, "step": 149440 }, { "epoch": 0.9547934528448948, "grad_norm": 0.9385198354721069, "learning_rate": 5.357655940299605e-05, "loss": 0.8872, "step": 149450 }, { "epoch": 0.9548573399946335, "grad_norm": 0.8840739130973816, "learning_rate": 5.357155455347265e-05, "loss": 0.6832, "step": 149460 }, { "epoch": 0.9549212271443722, "grad_norm": 0.6228633522987366, "learning_rate": 5.3566549667980614e-05, "loss": 0.927, "step": 149470 }, { "epoch": 0.9549851142941109, "grad_norm": 1.233453631401062, "learning_rate": 5.356154474657033e-05, "loss": 1.0322, "step": 149480 }, { "epoch": 0.9550490014438496, "grad_norm": 1.1719436645507812, "learning_rate": 5.355653978929222e-05, "loss": 0.9269, "step": 149490 }, { "epoch": 0.9551128885935883, "grad_norm": 0.7223755717277527, "learning_rate": 5.3551534796196656e-05, "loss": 0.8008, "step": 149500 }, { "epoch": 0.9551767757433269, "grad_norm": 0.946700930595398, "learning_rate": 5.3546529767334085e-05, "loss": 0.7767, "step": 149510 }, { "epoch": 0.9552406628930656, "grad_norm": 1.2065627574920654, "learning_rate": 5.3541524702754886e-05, "loss": 1.0349, "step": 149520 }, { "epoch": 0.9553045500428043, "grad_norm": 1.362046718597412, "learning_rate": 5.353651960250946e-05, "loss": 0.6447, "step": 149530 }, { "epoch": 0.955368437192543, "grad_norm": 1.2870509624481201, "learning_rate": 5.353151446664824e-05, "loss": 0.7011, "step": 149540 }, { "epoch": 0.9554323243422818, "grad_norm": 1.1624693870544434, "learning_rate": 5.352650929522159e-05, "loss": 1.0363, "step": 149550 }, { "epoch": 0.9554962114920205, "grad_norm": 1.077800989151001, "learning_rate": 5.352150408827996e-05, "loss": 0.9021, "step": 149560 }, { "epoch": 0.9555600986417592, "grad_norm": 1.4235363006591797, "learning_rate": 5.351649884587373e-05, "loss": 0.9034, "step": 149570 }, { "epoch": 0.9556239857914979, "grad_norm": 0.7878531217575073, "learning_rate": 5.351149356805332e-05, "loss": 1.0202, "step": 149580 }, { "epoch": 0.9556878729412366, "grad_norm": 0.9151926636695862, "learning_rate": 5.3506488254869124e-05, "loss": 0.7615, "step": 149590 }, { "epoch": 0.9557517600909753, "grad_norm": 0.8599022626876831, "learning_rate": 5.3501482906371556e-05, "loss": 0.9448, "step": 149600 }, { "epoch": 0.955815647240714, "grad_norm": 0.8533304929733276, "learning_rate": 5.349647752261103e-05, "loss": 1.0606, "step": 149610 }, { "epoch": 0.9558795343904527, "grad_norm": 1.0226484537124634, "learning_rate": 5.3491472103637955e-05, "loss": 0.9859, "step": 149620 }, { "epoch": 0.9559434215401914, "grad_norm": 0.7206386923789978, "learning_rate": 5.3486466649502733e-05, "loss": 0.9154, "step": 149630 }, { "epoch": 0.9560073086899301, "grad_norm": 1.4392954111099243, "learning_rate": 5.3481461160255773e-05, "loss": 0.74, "step": 149640 }, { "epoch": 0.9560711958396688, "grad_norm": 1.0331584215164185, "learning_rate": 5.3476455635947484e-05, "loss": 0.8146, "step": 149650 }, { "epoch": 0.9561350829894075, "grad_norm": 0.8725119829177856, "learning_rate": 5.3471450076628294e-05, "loss": 0.8874, "step": 149660 }, { "epoch": 0.9561989701391462, "grad_norm": 0.8222038149833679, "learning_rate": 5.346644448234859e-05, "loss": 0.8343, "step": 149670 }, { "epoch": 0.9562628572888849, "grad_norm": 1.1900924444198608, "learning_rate": 5.3461438853158784e-05, "loss": 0.8374, "step": 149680 }, { "epoch": 0.9563267444386236, "grad_norm": 0.8432425260543823, "learning_rate": 5.34564331891093e-05, "loss": 1.0343, "step": 149690 }, { "epoch": 0.9563906315883623, "grad_norm": 1.0364454984664917, "learning_rate": 5.3451427490250535e-05, "loss": 0.9026, "step": 149700 }, { "epoch": 0.9564545187381011, "grad_norm": 0.8370749950408936, "learning_rate": 5.344642175663292e-05, "loss": 0.8584, "step": 149710 }, { "epoch": 0.9565184058878398, "grad_norm": 0.720355212688446, "learning_rate": 5.3441415988306856e-05, "loss": 0.7958, "step": 149720 }, { "epoch": 0.9565822930375785, "grad_norm": 0.9760454297065735, "learning_rate": 5.343641018532275e-05, "loss": 0.653, "step": 149730 }, { "epoch": 0.9566461801873172, "grad_norm": 0.9241121411323547, "learning_rate": 5.3431404347731015e-05, "loss": 0.9755, "step": 149740 }, { "epoch": 0.9567100673370558, "grad_norm": 1.0608195066452026, "learning_rate": 5.3426398475582086e-05, "loss": 0.9359, "step": 149750 }, { "epoch": 0.9567739544867945, "grad_norm": 0.5845559239387512, "learning_rate": 5.3421392568926363e-05, "loss": 0.5841, "step": 149760 }, { "epoch": 0.9568378416365332, "grad_norm": 1.0789985656738281, "learning_rate": 5.341638662781424e-05, "loss": 0.8384, "step": 149770 }, { "epoch": 0.9569017287862719, "grad_norm": 1.286792278289795, "learning_rate": 5.341138065229616e-05, "loss": 0.7882, "step": 149780 }, { "epoch": 0.9569656159360106, "grad_norm": 0.814147412776947, "learning_rate": 5.3406374642422516e-05, "loss": 0.8117, "step": 149790 }, { "epoch": 0.9570295030857493, "grad_norm": 0.7748686671257019, "learning_rate": 5.340136859824374e-05, "loss": 0.7822, "step": 149800 }, { "epoch": 0.957093390235488, "grad_norm": 0.9128410220146179, "learning_rate": 5.339636251981024e-05, "loss": 0.8351, "step": 149810 }, { "epoch": 0.9571572773852267, "grad_norm": 1.1304187774658203, "learning_rate": 5.339135640717242e-05, "loss": 0.66, "step": 149820 }, { "epoch": 0.9572211645349654, "grad_norm": 3.093017816543579, "learning_rate": 5.3386350260380724e-05, "loss": 0.9673, "step": 149830 }, { "epoch": 0.9572850516847041, "grad_norm": 0.8403214812278748, "learning_rate": 5.338134407948554e-05, "loss": 0.8767, "step": 149840 }, { "epoch": 0.9573489388344428, "grad_norm": 0.7375771403312683, "learning_rate": 5.33763378645373e-05, "loss": 1.0156, "step": 149850 }, { "epoch": 0.9574128259841815, "grad_norm": 1.0833170413970947, "learning_rate": 5.3371331615586405e-05, "loss": 0.7993, "step": 149860 }, { "epoch": 0.9574767131339202, "grad_norm": 0.7025789618492126, "learning_rate": 5.336632533268329e-05, "loss": 0.8754, "step": 149870 }, { "epoch": 0.957540600283659, "grad_norm": 1.240759253501892, "learning_rate": 5.336131901587836e-05, "loss": 1.0206, "step": 149880 }, { "epoch": 0.9576044874333977, "grad_norm": 0.9655282497406006, "learning_rate": 5.335631266522205e-05, "loss": 0.8602, "step": 149890 }, { "epoch": 0.9576683745831364, "grad_norm": 0.8315982818603516, "learning_rate": 5.335130628076478e-05, "loss": 0.6983, "step": 149900 }, { "epoch": 0.9577322617328751, "grad_norm": 0.5027674436569214, "learning_rate": 5.334629986255694e-05, "loss": 0.8194, "step": 149910 }, { "epoch": 0.9577961488826138, "grad_norm": 1.5360819101333618, "learning_rate": 5.3341293410648964e-05, "loss": 0.8028, "step": 149920 }, { "epoch": 0.9578600360323525, "grad_norm": 0.6608107686042786, "learning_rate": 5.333628692509128e-05, "loss": 0.8155, "step": 149930 }, { "epoch": 0.9579239231820912, "grad_norm": 0.8952988982200623, "learning_rate": 5.33312804059343e-05, "loss": 0.7765, "step": 149940 }, { "epoch": 0.9579878103318299, "grad_norm": 0.556867241859436, "learning_rate": 5.3326273853228435e-05, "loss": 0.9709, "step": 149950 }, { "epoch": 0.9580516974815686, "grad_norm": 1.2768900394439697, "learning_rate": 5.332126726702413e-05, "loss": 0.8193, "step": 149960 }, { "epoch": 0.9581155846313073, "grad_norm": 1.526533603668213, "learning_rate": 5.3316260647371785e-05, "loss": 0.84, "step": 149970 }, { "epoch": 0.958179471781046, "grad_norm": 0.7335947155952454, "learning_rate": 5.3311253994321816e-05, "loss": 0.7598, "step": 149980 }, { "epoch": 0.9582433589307847, "grad_norm": 0.5931270718574524, "learning_rate": 5.3306247307924676e-05, "loss": 0.9405, "step": 149990 }, { "epoch": 0.9583072460805233, "grad_norm": 0.8060052394866943, "learning_rate": 5.330124058823074e-05, "loss": 0.8914, "step": 150000 }, { "epoch": 0.958371133230262, "grad_norm": 1.0445197820663452, "learning_rate": 5.3296233835290485e-05, "loss": 1.0033, "step": 150010 }, { "epoch": 0.9584350203800007, "grad_norm": 1.0918605327606201, "learning_rate": 5.329122704915428e-05, "loss": 0.914, "step": 150020 }, { "epoch": 0.9584989075297394, "grad_norm": 1.1090441942214966, "learning_rate": 5.328622022987257e-05, "loss": 1.0159, "step": 150030 }, { "epoch": 0.9585627946794781, "grad_norm": 0.9032556414604187, "learning_rate": 5.328121337749579e-05, "loss": 0.7714, "step": 150040 }, { "epoch": 0.9586266818292168, "grad_norm": 0.678882896900177, "learning_rate": 5.3276206492074344e-05, "loss": 0.8174, "step": 150050 }, { "epoch": 0.9586905689789555, "grad_norm": 1.054736852645874, "learning_rate": 5.327119957365867e-05, "loss": 0.7512, "step": 150060 }, { "epoch": 0.9587544561286943, "grad_norm": 0.6916370987892151, "learning_rate": 5.326619262229918e-05, "loss": 1.21, "step": 150070 }, { "epoch": 0.958818343278433, "grad_norm": 1.1509549617767334, "learning_rate": 5.326118563804632e-05, "loss": 0.9652, "step": 150080 }, { "epoch": 0.9588822304281717, "grad_norm": 0.5059418082237244, "learning_rate": 5.325617862095049e-05, "loss": 0.8353, "step": 150090 }, { "epoch": 0.9589461175779104, "grad_norm": 1.0998530387878418, "learning_rate": 5.325117157106212e-05, "loss": 1.0926, "step": 150100 }, { "epoch": 0.9590100047276491, "grad_norm": 0.799642026424408, "learning_rate": 5.324616448843165e-05, "loss": 0.8287, "step": 150110 }, { "epoch": 0.9590738918773878, "grad_norm": 0.6795291304588318, "learning_rate": 5.3241157373109485e-05, "loss": 0.8267, "step": 150120 }, { "epoch": 0.9591377790271265, "grad_norm": 0.9579530954360962, "learning_rate": 5.323615022514607e-05, "loss": 0.8384, "step": 150130 }, { "epoch": 0.9592016661768652, "grad_norm": 1.0019688606262207, "learning_rate": 5.3231143044591816e-05, "loss": 0.7948, "step": 150140 }, { "epoch": 0.9592655533266039, "grad_norm": 1.0456329584121704, "learning_rate": 5.322613583149715e-05, "loss": 1.0149, "step": 150150 }, { "epoch": 0.9593294404763426, "grad_norm": 1.1081980466842651, "learning_rate": 5.322112858591252e-05, "loss": 1.0692, "step": 150160 }, { "epoch": 0.9593933276260813, "grad_norm": 1.2669342756271362, "learning_rate": 5.3216121307888336e-05, "loss": 0.8101, "step": 150170 }, { "epoch": 0.95945721477582, "grad_norm": 1.001755714416504, "learning_rate": 5.3211113997475016e-05, "loss": 0.6696, "step": 150180 }, { "epoch": 0.9595211019255587, "grad_norm": 0.6659078001976013, "learning_rate": 5.320610665472301e-05, "loss": 0.713, "step": 150190 }, { "epoch": 0.9595849890752974, "grad_norm": 0.7698034048080444, "learning_rate": 5.320109927968273e-05, "loss": 0.8432, "step": 150200 }, { "epoch": 0.9596488762250361, "grad_norm": 0.7013608813285828, "learning_rate": 5.319609187240462e-05, "loss": 1.1162, "step": 150210 }, { "epoch": 0.9597127633747748, "grad_norm": 0.7664918899536133, "learning_rate": 5.319108443293909e-05, "loss": 0.9773, "step": 150220 }, { "epoch": 0.9597766505245136, "grad_norm": 1.7179349660873413, "learning_rate": 5.3186076961336584e-05, "loss": 0.8648, "step": 150230 }, { "epoch": 0.9598405376742521, "grad_norm": 0.5642333030700684, "learning_rate": 5.318106945764752e-05, "loss": 0.8806, "step": 150240 }, { "epoch": 0.9599044248239909, "grad_norm": 1.004900336265564, "learning_rate": 5.317606192192235e-05, "loss": 0.8323, "step": 150250 }, { "epoch": 0.9599683119737296, "grad_norm": 0.6977973580360413, "learning_rate": 5.317105435421148e-05, "loss": 1.0794, "step": 150260 }, { "epoch": 0.9600321991234683, "grad_norm": 0.8252426385879517, "learning_rate": 5.316604675456535e-05, "loss": 0.7651, "step": 150270 }, { "epoch": 0.960096086273207, "grad_norm": 1.0018861293792725, "learning_rate": 5.316103912303438e-05, "loss": 0.9188, "step": 150280 }, { "epoch": 0.9601599734229457, "grad_norm": 0.829059362411499, "learning_rate": 5.3156031459669035e-05, "loss": 0.756, "step": 150290 }, { "epoch": 0.9602238605726844, "grad_norm": 1.1105847358703613, "learning_rate": 5.31510237645197e-05, "loss": 0.7177, "step": 150300 }, { "epoch": 0.9602877477224231, "grad_norm": 0.7792565822601318, "learning_rate": 5.314601603763684e-05, "loss": 1.1403, "step": 150310 }, { "epoch": 0.9603516348721618, "grad_norm": 0.5869541764259338, "learning_rate": 5.314100827907087e-05, "loss": 0.8808, "step": 150320 }, { "epoch": 0.9604155220219005, "grad_norm": 0.8240609169006348, "learning_rate": 5.313600048887224e-05, "loss": 0.8968, "step": 150330 }, { "epoch": 0.9604794091716392, "grad_norm": 0.8057375550270081, "learning_rate": 5.313099266709136e-05, "loss": 0.856, "step": 150340 }, { "epoch": 0.9605432963213779, "grad_norm": 0.7334375977516174, "learning_rate": 5.312598481377869e-05, "loss": 1.004, "step": 150350 }, { "epoch": 0.9606071834711166, "grad_norm": 0.95655757188797, "learning_rate": 5.3120976928984635e-05, "loss": 0.9142, "step": 150360 }, { "epoch": 0.9606710706208553, "grad_norm": 0.6239562630653381, "learning_rate": 5.311596901275965e-05, "loss": 0.9041, "step": 150370 }, { "epoch": 0.960734957770594, "grad_norm": 2.0877952575683594, "learning_rate": 5.3110961065154154e-05, "loss": 1.0505, "step": 150380 }, { "epoch": 0.9607988449203327, "grad_norm": 1.8619037866592407, "learning_rate": 5.31059530862186e-05, "loss": 0.8509, "step": 150390 }, { "epoch": 0.9608627320700714, "grad_norm": 1.0404013395309448, "learning_rate": 5.310094507600338e-05, "loss": 0.9482, "step": 150400 }, { "epoch": 0.9609266192198102, "grad_norm": 0.5970574617385864, "learning_rate": 5.3095937034558994e-05, "loss": 1.0259, "step": 150410 }, { "epoch": 0.9609905063695489, "grad_norm": 0.9825915098190308, "learning_rate": 5.309092896193584e-05, "loss": 1.0407, "step": 150420 }, { "epoch": 0.9610543935192876, "grad_norm": 0.7270873188972473, "learning_rate": 5.308592085818435e-05, "loss": 0.7591, "step": 150430 }, { "epoch": 0.9611182806690263, "grad_norm": 1.053653597831726, "learning_rate": 5.308091272335497e-05, "loss": 0.8987, "step": 150440 }, { "epoch": 0.961182167818765, "grad_norm": 0.8554880023002625, "learning_rate": 5.307590455749812e-05, "loss": 0.7091, "step": 150450 }, { "epoch": 0.9612460549685037, "grad_norm": 0.5443610548973083, "learning_rate": 5.307089636066427e-05, "loss": 0.9389, "step": 150460 }, { "epoch": 0.9613099421182424, "grad_norm": 0.6648747324943542, "learning_rate": 5.306588813290383e-05, "loss": 1.0063, "step": 150470 }, { "epoch": 0.961373829267981, "grad_norm": 1.133631944656372, "learning_rate": 5.306087987426725e-05, "loss": 0.8192, "step": 150480 }, { "epoch": 0.9614377164177197, "grad_norm": 1.77051842212677, "learning_rate": 5.305587158480496e-05, "loss": 0.7231, "step": 150490 }, { "epoch": 0.9615016035674584, "grad_norm": 1.1798468828201294, "learning_rate": 5.3050863264567396e-05, "loss": 0.6667, "step": 150500 }, { "epoch": 0.9615654907171971, "grad_norm": 1.529371976852417, "learning_rate": 5.3045854913605e-05, "loss": 0.6694, "step": 150510 }, { "epoch": 0.9616293778669358, "grad_norm": 0.8843643665313721, "learning_rate": 5.30408465319682e-05, "loss": 0.9087, "step": 150520 }, { "epoch": 0.9616932650166745, "grad_norm": 0.660963773727417, "learning_rate": 5.303583811970746e-05, "loss": 0.7277, "step": 150530 }, { "epoch": 0.9617571521664132, "grad_norm": 1.6010621786117554, "learning_rate": 5.3030829676873196e-05, "loss": 0.89, "step": 150540 }, { "epoch": 0.9618210393161519, "grad_norm": 1.0877952575683594, "learning_rate": 5.3025821203515855e-05, "loss": 0.7486, "step": 150550 }, { "epoch": 0.9618849264658906, "grad_norm": 1.9828286170959473, "learning_rate": 5.302081269968587e-05, "loss": 0.8163, "step": 150560 }, { "epoch": 0.9619488136156293, "grad_norm": 0.7645601630210876, "learning_rate": 5.301580416543369e-05, "loss": 0.7069, "step": 150570 }, { "epoch": 0.962012700765368, "grad_norm": 0.8405600190162659, "learning_rate": 5.301079560080976e-05, "loss": 0.859, "step": 150580 }, { "epoch": 0.9620765879151068, "grad_norm": 1.2271150350570679, "learning_rate": 5.3005787005864515e-05, "loss": 0.7004, "step": 150590 }, { "epoch": 0.9621404750648455, "grad_norm": 1.4471722841262817, "learning_rate": 5.3000778380648396e-05, "loss": 0.8751, "step": 150600 }, { "epoch": 0.9622043622145842, "grad_norm": 0.9273978471755981, "learning_rate": 5.299576972521183e-05, "loss": 0.9883, "step": 150610 }, { "epoch": 0.9622682493643229, "grad_norm": 0.882642924785614, "learning_rate": 5.299076103960528e-05, "loss": 0.8676, "step": 150620 }, { "epoch": 0.9623321365140616, "grad_norm": 3.830977201461792, "learning_rate": 5.298575232387918e-05, "loss": 0.8193, "step": 150630 }, { "epoch": 0.9623960236638003, "grad_norm": 1.230293869972229, "learning_rate": 5.298074357808397e-05, "loss": 0.8701, "step": 150640 }, { "epoch": 0.962459910813539, "grad_norm": 1.0408953428268433, "learning_rate": 5.29757348022701e-05, "loss": 1.0132, "step": 150650 }, { "epoch": 0.9625237979632777, "grad_norm": 1.4850291013717651, "learning_rate": 5.297072599648799e-05, "loss": 0.747, "step": 150660 }, { "epoch": 0.9625876851130164, "grad_norm": 1.36034095287323, "learning_rate": 5.296571716078811e-05, "loss": 0.8511, "step": 150670 }, { "epoch": 0.9626515722627551, "grad_norm": 0.6303446292877197, "learning_rate": 5.29607082952209e-05, "loss": 0.8755, "step": 150680 }, { "epoch": 0.9627154594124938, "grad_norm": 1.7142499685287476, "learning_rate": 5.2955699399836776e-05, "loss": 0.8461, "step": 150690 }, { "epoch": 0.9627793465622325, "grad_norm": 0.9409696459770203, "learning_rate": 5.2950690474686215e-05, "loss": 0.8414, "step": 150700 }, { "epoch": 0.9628432337119712, "grad_norm": 1.2119961977005005, "learning_rate": 5.2945681519819646e-05, "loss": 0.7946, "step": 150710 }, { "epoch": 0.9629071208617099, "grad_norm": 0.9473667740821838, "learning_rate": 5.2940672535287516e-05, "loss": 0.8334, "step": 150720 }, { "epoch": 0.9629710080114485, "grad_norm": 0.7992385625839233, "learning_rate": 5.2935663521140274e-05, "loss": 0.8921, "step": 150730 }, { "epoch": 0.9630348951611872, "grad_norm": 1.3898727893829346, "learning_rate": 5.293065447742835e-05, "loss": 0.6968, "step": 150740 }, { "epoch": 0.9630987823109259, "grad_norm": 0.7555790543556213, "learning_rate": 5.292564540420221e-05, "loss": 0.9248, "step": 150750 }, { "epoch": 0.9631626694606646, "grad_norm": 1.3342205286026, "learning_rate": 5.292063630151228e-05, "loss": 0.6645, "step": 150760 }, { "epoch": 0.9632265566104034, "grad_norm": 1.6979509592056274, "learning_rate": 5.2915627169409035e-05, "loss": 1.0116, "step": 150770 }, { "epoch": 0.9632904437601421, "grad_norm": 0.8660208582878113, "learning_rate": 5.291061800794288e-05, "loss": 0.8999, "step": 150780 }, { "epoch": 0.9633543309098808, "grad_norm": 1.2357333898544312, "learning_rate": 5.29056088171643e-05, "loss": 0.6997, "step": 150790 }, { "epoch": 0.9634182180596195, "grad_norm": 0.9448620676994324, "learning_rate": 5.290059959712371e-05, "loss": 0.7693, "step": 150800 }, { "epoch": 0.9634821052093582, "grad_norm": 0.7834548354148865, "learning_rate": 5.289559034787158e-05, "loss": 0.7257, "step": 150810 }, { "epoch": 0.9635459923590969, "grad_norm": 0.8070199489593506, "learning_rate": 5.2890581069458355e-05, "loss": 0.7485, "step": 150820 }, { "epoch": 0.9636098795088356, "grad_norm": 1.6272724866867065, "learning_rate": 5.288557176193447e-05, "loss": 1.0041, "step": 150830 }, { "epoch": 0.9636737666585743, "grad_norm": 0.8953685164451599, "learning_rate": 5.288056242535039e-05, "loss": 0.9897, "step": 150840 }, { "epoch": 0.963737653808313, "grad_norm": 0.6054562330245972, "learning_rate": 5.2875553059756545e-05, "loss": 0.8495, "step": 150850 }, { "epoch": 0.9638015409580517, "grad_norm": 1.346152663230896, "learning_rate": 5.28705436652034e-05, "loss": 0.9919, "step": 150860 }, { "epoch": 0.9638654281077904, "grad_norm": 1.1231837272644043, "learning_rate": 5.286553424174139e-05, "loss": 0.8157, "step": 150870 }, { "epoch": 0.9639293152575291, "grad_norm": 3.220287561416626, "learning_rate": 5.286052478942097e-05, "loss": 0.8534, "step": 150880 }, { "epoch": 0.9639932024072678, "grad_norm": 0.7934685945510864, "learning_rate": 5.28555153082926e-05, "loss": 0.8603, "step": 150890 }, { "epoch": 0.9640570895570065, "grad_norm": 0.9011366963386536, "learning_rate": 5.2850505798406716e-05, "loss": 0.846, "step": 150900 }, { "epoch": 0.9641209767067452, "grad_norm": 0.7348203659057617, "learning_rate": 5.2845496259813773e-05, "loss": 1.0849, "step": 150910 }, { "epoch": 0.9641848638564839, "grad_norm": 1.1324224472045898, "learning_rate": 5.284098765057728e-05, "loss": 1.0193, "step": 150920 }, { "epoch": 0.9642487510062226, "grad_norm": 1.1051008701324463, "learning_rate": 5.283597805757992e-05, "loss": 1.0603, "step": 150930 }, { "epoch": 0.9643126381559614, "grad_norm": 1.0409125089645386, "learning_rate": 5.28309684360218e-05, "loss": 1.0198, "step": 150940 }, { "epoch": 0.9643765253057001, "grad_norm": 1.0346943140029907, "learning_rate": 5.282595878595338e-05, "loss": 0.9523, "step": 150950 }, { "epoch": 0.9644404124554388, "grad_norm": 0.9301409125328064, "learning_rate": 5.282094910742511e-05, "loss": 0.8966, "step": 150960 }, { "epoch": 0.9645042996051774, "grad_norm": 1.0205413103103638, "learning_rate": 5.281593940048745e-05, "loss": 0.8136, "step": 150970 }, { "epoch": 0.9645681867549161, "grad_norm": 0.4575611650943756, "learning_rate": 5.2810929665190836e-05, "loss": 0.9879, "step": 150980 }, { "epoch": 0.9646320739046548, "grad_norm": 1.4380875825881958, "learning_rate": 5.280591990158572e-05, "loss": 0.8518, "step": 150990 }, { "epoch": 0.9646959610543935, "grad_norm": 1.1791918277740479, "learning_rate": 5.280091010972258e-05, "loss": 1.0687, "step": 151000 }, { "epoch": 0.9647598482041322, "grad_norm": 1.3037655353546143, "learning_rate": 5.279590028965185e-05, "loss": 1.2233, "step": 151010 }, { "epoch": 0.9648237353538709, "grad_norm": 1.227778673171997, "learning_rate": 5.2790890441423965e-05, "loss": 0.9386, "step": 151020 }, { "epoch": 0.9648876225036096, "grad_norm": 0.6218796968460083, "learning_rate": 5.2785880565089416e-05, "loss": 1.1117, "step": 151030 }, { "epoch": 0.9649515096533483, "grad_norm": 1.0044218301773071, "learning_rate": 5.2780870660698634e-05, "loss": 0.8787, "step": 151040 }, { "epoch": 0.965015396803087, "grad_norm": 0.7901304960250854, "learning_rate": 5.2775860728302084e-05, "loss": 0.8695, "step": 151050 }, { "epoch": 0.9650792839528257, "grad_norm": 1.453648328781128, "learning_rate": 5.277085076795021e-05, "loss": 0.8621, "step": 151060 }, { "epoch": 0.9651431711025644, "grad_norm": 0.5584386587142944, "learning_rate": 5.2765840779693474e-05, "loss": 0.8665, "step": 151070 }, { "epoch": 0.9652070582523031, "grad_norm": 0.5596110224723816, "learning_rate": 5.2760830763582326e-05, "loss": 0.8572, "step": 151080 }, { "epoch": 0.9652709454020418, "grad_norm": 1.8040897846221924, "learning_rate": 5.275582071966723e-05, "loss": 0.885, "step": 151090 }, { "epoch": 0.9653348325517805, "grad_norm": 0.8948487043380737, "learning_rate": 5.275081064799864e-05, "loss": 1.2298, "step": 151100 }, { "epoch": 0.9653987197015192, "grad_norm": 0.6678011417388916, "learning_rate": 5.2745800548626986e-05, "loss": 0.9738, "step": 151110 }, { "epoch": 0.965462606851258, "grad_norm": 0.5756567716598511, "learning_rate": 5.274079042160278e-05, "loss": 0.7736, "step": 151120 }, { "epoch": 0.9655264940009967, "grad_norm": 2.063096046447754, "learning_rate": 5.273578026697642e-05, "loss": 0.7518, "step": 151130 }, { "epoch": 0.9655903811507354, "grad_norm": 0.6849361658096313, "learning_rate": 5.2730770084798384e-05, "loss": 0.7166, "step": 151140 }, { "epoch": 0.9656542683004741, "grad_norm": 1.2403055429458618, "learning_rate": 5.272575987511914e-05, "loss": 0.9516, "step": 151150 }, { "epoch": 0.9657181554502128, "grad_norm": 1.0753352642059326, "learning_rate": 5.272074963798913e-05, "loss": 0.6363, "step": 151160 }, { "epoch": 0.9657820425999515, "grad_norm": 0.6305554509162903, "learning_rate": 5.271573937345882e-05, "loss": 0.74, "step": 151170 }, { "epoch": 0.9658459297496902, "grad_norm": 0.7894411087036133, "learning_rate": 5.271072908157866e-05, "loss": 0.7682, "step": 151180 }, { "epoch": 0.9659098168994289, "grad_norm": 0.7625752687454224, "learning_rate": 5.270571876239911e-05, "loss": 0.7678, "step": 151190 }, { "epoch": 0.9659737040491676, "grad_norm": 0.777441143989563, "learning_rate": 5.270070841597062e-05, "loss": 0.939, "step": 151200 }, { "epoch": 0.9660375911989062, "grad_norm": 0.7669252157211304, "learning_rate": 5.269569804234369e-05, "loss": 1.0347, "step": 151210 }, { "epoch": 0.9661014783486449, "grad_norm": 1.6018074750900269, "learning_rate": 5.2691188682866444e-05, "loss": 0.9971, "step": 151220 }, { "epoch": 0.9661653654983836, "grad_norm": 1.066701889038086, "learning_rate": 5.268617825770142e-05, "loss": 0.8429, "step": 151230 }, { "epoch": 0.9662292526481223, "grad_norm": 1.0202710628509521, "learning_rate": 5.268116780548426e-05, "loss": 0.8482, "step": 151240 }, { "epoch": 0.966293139797861, "grad_norm": 0.9673142433166504, "learning_rate": 5.267615732626542e-05, "loss": 0.9325, "step": 151250 }, { "epoch": 0.9663570269475997, "grad_norm": 0.8143606781959534, "learning_rate": 5.2671146820095365e-05, "loss": 0.8128, "step": 151260 }, { "epoch": 0.9664209140973384, "grad_norm": 1.2554757595062256, "learning_rate": 5.266613628702456e-05, "loss": 0.8891, "step": 151270 }, { "epoch": 0.9664848012470771, "grad_norm": 1.1865863800048828, "learning_rate": 5.2661125727103434e-05, "loss": 0.9087, "step": 151280 }, { "epoch": 0.9665486883968158, "grad_norm": 0.8015139102935791, "learning_rate": 5.265611514038248e-05, "loss": 0.9303, "step": 151290 }, { "epoch": 0.9666125755465546, "grad_norm": 0.9343031048774719, "learning_rate": 5.2651104526912145e-05, "loss": 1.0272, "step": 151300 }, { "epoch": 0.9666764626962933, "grad_norm": 0.8166914582252502, "learning_rate": 5.26460938867429e-05, "loss": 1.2548, "step": 151310 }, { "epoch": 0.966740349846032, "grad_norm": 0.9215566515922546, "learning_rate": 5.264108321992518e-05, "loss": 0.8869, "step": 151320 }, { "epoch": 0.9668042369957707, "grad_norm": 1.8220895528793335, "learning_rate": 5.2636072526509486e-05, "loss": 1.0963, "step": 151330 }, { "epoch": 0.9668681241455094, "grad_norm": 1.234108805656433, "learning_rate": 5.2631061806546255e-05, "loss": 0.9705, "step": 151340 }, { "epoch": 0.9669320112952481, "grad_norm": 1.0234565734863281, "learning_rate": 5.2626051060085956e-05, "loss": 0.7106, "step": 151350 }, { "epoch": 0.9669958984449868, "grad_norm": 0.9547996520996094, "learning_rate": 5.262104028717906e-05, "loss": 0.9427, "step": 151360 }, { "epoch": 0.9670597855947255, "grad_norm": 0.819320559501648, "learning_rate": 5.261602948787601e-05, "loss": 0.8402, "step": 151370 }, { "epoch": 0.9671236727444642, "grad_norm": 1.10805082321167, "learning_rate": 5.261101866222728e-05, "loss": 0.9087, "step": 151380 }, { "epoch": 0.9671875598942029, "grad_norm": 0.9255377650260925, "learning_rate": 5.260600781028334e-05, "loss": 0.898, "step": 151390 }, { "epoch": 0.9672514470439416, "grad_norm": 0.827276885509491, "learning_rate": 5.2600996932094634e-05, "loss": 0.8182, "step": 151400 }, { "epoch": 0.9673153341936803, "grad_norm": 1.3312925100326538, "learning_rate": 5.259598602771165e-05, "loss": 0.8368, "step": 151410 }, { "epoch": 0.967379221343419, "grad_norm": 2.995258092880249, "learning_rate": 5.2590975097184844e-05, "loss": 0.7184, "step": 151420 }, { "epoch": 0.9674431084931577, "grad_norm": 0.8039790391921997, "learning_rate": 5.258596414056467e-05, "loss": 0.6866, "step": 151430 }, { "epoch": 0.9675069956428964, "grad_norm": 1.1383510828018188, "learning_rate": 5.25809531579016e-05, "loss": 0.6508, "step": 151440 }, { "epoch": 0.967570882792635, "grad_norm": 0.8737443089485168, "learning_rate": 5.25759421492461e-05, "loss": 0.8217, "step": 151450 }, { "epoch": 0.9676347699423737, "grad_norm": 0.9597667455673218, "learning_rate": 5.257093111464865e-05, "loss": 0.8019, "step": 151460 }, { "epoch": 0.9676986570921124, "grad_norm": 0.9724913239479065, "learning_rate": 5.256592005415968e-05, "loss": 1.0306, "step": 151470 }, { "epoch": 0.9677625442418512, "grad_norm": 0.9813938736915588, "learning_rate": 5.256090896782968e-05, "loss": 1.2299, "step": 151480 }, { "epoch": 0.9678264313915899, "grad_norm": 0.5077504515647888, "learning_rate": 5.2555897855709114e-05, "loss": 0.8514, "step": 151490 }, { "epoch": 0.9678903185413286, "grad_norm": 0.8173817992210388, "learning_rate": 5.2550886717848436e-05, "loss": 1.0456, "step": 151500 }, { "epoch": 0.9679542056910673, "grad_norm": 0.8337730765342712, "learning_rate": 5.254587555429813e-05, "loss": 0.9812, "step": 151510 }, { "epoch": 0.968018092840806, "grad_norm": 0.9300865530967712, "learning_rate": 5.254086436510866e-05, "loss": 0.696, "step": 151520 }, { "epoch": 0.9680819799905447, "grad_norm": 0.712742805480957, "learning_rate": 5.253585315033047e-05, "loss": 0.8063, "step": 151530 }, { "epoch": 0.9681458671402834, "grad_norm": 0.685977578163147, "learning_rate": 5.253084191001406e-05, "loss": 0.9119, "step": 151540 }, { "epoch": 0.9682097542900221, "grad_norm": 1.3182839155197144, "learning_rate": 5.2525830644209885e-05, "loss": 0.8322, "step": 151550 }, { "epoch": 0.9682736414397608, "grad_norm": 1.8923838138580322, "learning_rate": 5.25208193529684e-05, "loss": 0.8107, "step": 151560 }, { "epoch": 0.9683375285894995, "grad_norm": 0.7532184720039368, "learning_rate": 5.251580803634008e-05, "loss": 0.6918, "step": 151570 }, { "epoch": 0.9684014157392382, "grad_norm": 0.9623368382453918, "learning_rate": 5.2510796694375406e-05, "loss": 0.8558, "step": 151580 }, { "epoch": 0.9684653028889769, "grad_norm": 0.7997425198554993, "learning_rate": 5.2505785327124836e-05, "loss": 0.9148, "step": 151590 }, { "epoch": 0.9685291900387156, "grad_norm": 0.9608021378517151, "learning_rate": 5.250077393463884e-05, "loss": 1.0758, "step": 151600 }, { "epoch": 0.9685930771884543, "grad_norm": 1.5573434829711914, "learning_rate": 5.2495762516967886e-05, "loss": 0.8347, "step": 151610 }, { "epoch": 0.968656964338193, "grad_norm": 2.805736541748047, "learning_rate": 5.2490751074162446e-05, "loss": 0.8639, "step": 151620 }, { "epoch": 0.9687208514879317, "grad_norm": 0.5936098098754883, "learning_rate": 5.2485739606272985e-05, "loss": 0.8002, "step": 151630 }, { "epoch": 0.9687847386376705, "grad_norm": 0.6356973648071289, "learning_rate": 5.248072811334997e-05, "loss": 0.8728, "step": 151640 }, { "epoch": 0.9688486257874092, "grad_norm": 1.3857841491699219, "learning_rate": 5.2475716595443894e-05, "loss": 1.0488, "step": 151650 }, { "epoch": 0.9689125129371479, "grad_norm": 1.0003827810287476, "learning_rate": 5.24707050526052e-05, "loss": 0.8302, "step": 151660 }, { "epoch": 0.9689764000868866, "grad_norm": 0.884917140007019, "learning_rate": 5.246569348488436e-05, "loss": 0.734, "step": 151670 }, { "epoch": 0.9690402872366253, "grad_norm": 0.648077130317688, "learning_rate": 5.246068189233186e-05, "loss": 1.0681, "step": 151680 }, { "epoch": 0.969104174386364, "grad_norm": 0.6451058387756348, "learning_rate": 5.245567027499816e-05, "loss": 0.9954, "step": 151690 }, { "epoch": 0.9691680615361026, "grad_norm": 1.31290602684021, "learning_rate": 5.2450658632933736e-05, "loss": 0.9458, "step": 151700 }, { "epoch": 0.9692319486858413, "grad_norm": 0.7243294715881348, "learning_rate": 5.244564696618907e-05, "loss": 0.8819, "step": 151710 }, { "epoch": 0.96929583583558, "grad_norm": 1.8024976253509521, "learning_rate": 5.244063527481462e-05, "loss": 0.7875, "step": 151720 }, { "epoch": 0.9693597229853187, "grad_norm": 1.2114737033843994, "learning_rate": 5.243562355886086e-05, "loss": 0.7601, "step": 151730 }, { "epoch": 0.9694236101350574, "grad_norm": 1.0889559984207153, "learning_rate": 5.243061181837826e-05, "loss": 0.9074, "step": 151740 }, { "epoch": 0.9694874972847961, "grad_norm": 0.7654042840003967, "learning_rate": 5.24256000534173e-05, "loss": 0.7559, "step": 151750 }, { "epoch": 0.9695513844345348, "grad_norm": 0.9789912700653076, "learning_rate": 5.242058826402846e-05, "loss": 0.8855, "step": 151760 }, { "epoch": 0.9696152715842735, "grad_norm": 2.210740089416504, "learning_rate": 5.241557645026219e-05, "loss": 0.9904, "step": 151770 }, { "epoch": 0.9696791587340122, "grad_norm": 0.6800025105476379, "learning_rate": 5.241056461216898e-05, "loss": 0.9026, "step": 151780 }, { "epoch": 0.9697430458837509, "grad_norm": 1.204058289527893, "learning_rate": 5.240555274979929e-05, "loss": 1.0395, "step": 151790 }, { "epoch": 0.9698069330334896, "grad_norm": 0.8177311420440674, "learning_rate": 5.240054086320361e-05, "loss": 1.1084, "step": 151800 }, { "epoch": 0.9698708201832283, "grad_norm": 0.714911699295044, "learning_rate": 5.239552895243241e-05, "loss": 0.8985, "step": 151810 }, { "epoch": 0.969934707332967, "grad_norm": 2.143303155899048, "learning_rate": 5.239051701753614e-05, "loss": 1.1244, "step": 151820 }, { "epoch": 0.9699985944827058, "grad_norm": 0.977552592754364, "learning_rate": 5.2385505058565324e-05, "loss": 1.0981, "step": 151830 }, { "epoch": 0.9700624816324445, "grad_norm": 2.233025312423706, "learning_rate": 5.2380493075570394e-05, "loss": 1.1075, "step": 151840 }, { "epoch": 0.9701263687821832, "grad_norm": 1.9697365760803223, "learning_rate": 5.237548106860183e-05, "loss": 0.8237, "step": 151850 }, { "epoch": 0.9701902559319219, "grad_norm": 3.052886486053467, "learning_rate": 5.237046903771012e-05, "loss": 0.762, "step": 151860 }, { "epoch": 0.9702541430816606, "grad_norm": 2.318892002105713, "learning_rate": 5.236545698294575e-05, "loss": 0.7751, "step": 151870 }, { "epoch": 0.9703180302313993, "grad_norm": 0.842376708984375, "learning_rate": 5.2360444904359176e-05, "loss": 0.8483, "step": 151880 }, { "epoch": 0.970381917381138, "grad_norm": 1.4306411743164062, "learning_rate": 5.235543280200088e-05, "loss": 0.8895, "step": 151890 }, { "epoch": 0.9704458045308767, "grad_norm": 4.7012939453125, "learning_rate": 5.235042067592133e-05, "loss": 1.0352, "step": 151900 }, { "epoch": 0.9705096916806154, "grad_norm": 0.7660112977027893, "learning_rate": 5.234540852617102e-05, "loss": 0.6482, "step": 151910 }, { "epoch": 0.9705735788303541, "grad_norm": 1.0199682712554932, "learning_rate": 5.234039635280041e-05, "loss": 0.9154, "step": 151920 }, { "epoch": 0.9706374659800928, "grad_norm": 0.9796050190925598, "learning_rate": 5.233538415585999e-05, "loss": 0.9538, "step": 151930 }, { "epoch": 0.9707013531298314, "grad_norm": 0.7912867069244385, "learning_rate": 5.233037193540023e-05, "loss": 0.7974, "step": 151940 }, { "epoch": 0.9707652402795701, "grad_norm": 0.8988333344459534, "learning_rate": 5.2325359691471606e-05, "loss": 0.8347, "step": 151950 }, { "epoch": 0.9708291274293088, "grad_norm": 0.6798145174980164, "learning_rate": 5.2320347424124606e-05, "loss": 0.8257, "step": 151960 }, { "epoch": 0.9708930145790475, "grad_norm": 3.4491665363311768, "learning_rate": 5.2315335133409694e-05, "loss": 0.7976, "step": 151970 }, { "epoch": 0.9709569017287862, "grad_norm": 0.5991364121437073, "learning_rate": 5.2310322819377355e-05, "loss": 0.9797, "step": 151980 }, { "epoch": 0.9710207888785249, "grad_norm": 0.987819492816925, "learning_rate": 5.2305310482078064e-05, "loss": 0.9553, "step": 151990 }, { "epoch": 0.9710846760282636, "grad_norm": 0.7812177538871765, "learning_rate": 5.230029812156232e-05, "loss": 0.7681, "step": 152000 }, { "epoch": 0.9711485631780024, "grad_norm": 0.9155138731002808, "learning_rate": 5.229528573788055e-05, "loss": 0.7462, "step": 152010 }, { "epoch": 0.9712124503277411, "grad_norm": 1.2678287029266357, "learning_rate": 5.229027333108328e-05, "loss": 1.0377, "step": 152020 }, { "epoch": 0.9712763374774798, "grad_norm": 0.624622106552124, "learning_rate": 5.228526090122099e-05, "loss": 0.9255, "step": 152030 }, { "epoch": 0.9713402246272185, "grad_norm": 1.6265813112258911, "learning_rate": 5.228024844834414e-05, "loss": 0.7908, "step": 152040 }, { "epoch": 0.9714041117769572, "grad_norm": 0.7967225313186646, "learning_rate": 5.227523597250321e-05, "loss": 0.9871, "step": 152050 }, { "epoch": 0.9714679989266959, "grad_norm": 1.2453161478042603, "learning_rate": 5.227022347374868e-05, "loss": 0.876, "step": 152060 }, { "epoch": 0.9715318860764346, "grad_norm": 1.3441709280014038, "learning_rate": 5.226521095213105e-05, "loss": 0.7978, "step": 152070 }, { "epoch": 0.9715957732261733, "grad_norm": 1.4316071271896362, "learning_rate": 5.2260198407700775e-05, "loss": 0.7867, "step": 152080 }, { "epoch": 0.971659660375912, "grad_norm": 2.101490020751953, "learning_rate": 5.225518584050835e-05, "loss": 0.9115, "step": 152090 }, { "epoch": 0.9717235475256507, "grad_norm": 0.7814741730690002, "learning_rate": 5.225017325060425e-05, "loss": 0.8807, "step": 152100 }, { "epoch": 0.9717874346753894, "grad_norm": 0.9148108959197998, "learning_rate": 5.224516063803897e-05, "loss": 0.876, "step": 152110 }, { "epoch": 0.9718513218251281, "grad_norm": 0.9834555983543396, "learning_rate": 5.2240148002862964e-05, "loss": 0.8846, "step": 152120 }, { "epoch": 0.9719152089748668, "grad_norm": 0.8105610013008118, "learning_rate": 5.223513534512674e-05, "loss": 0.8168, "step": 152130 }, { "epoch": 0.9719790961246055, "grad_norm": 1.4332830905914307, "learning_rate": 5.223012266488076e-05, "loss": 0.8217, "step": 152140 }, { "epoch": 0.9720429832743442, "grad_norm": 0.8803772330284119, "learning_rate": 5.222510996217554e-05, "loss": 0.8183, "step": 152150 }, { "epoch": 0.972106870424083, "grad_norm": 1.188647985458374, "learning_rate": 5.222009723706151e-05, "loss": 0.9345, "step": 152160 }, { "epoch": 0.9721707575738217, "grad_norm": 0.9444614052772522, "learning_rate": 5.2215084489589194e-05, "loss": 0.9522, "step": 152170 }, { "epoch": 0.9722346447235602, "grad_norm": 1.4221155643463135, "learning_rate": 5.2210071719809064e-05, "loss": 0.977, "step": 152180 }, { "epoch": 0.972298531873299, "grad_norm": 1.2089207172393799, "learning_rate": 5.220505892777159e-05, "loss": 0.8424, "step": 152190 }, { "epoch": 0.9723624190230377, "grad_norm": 0.8503243327140808, "learning_rate": 5.220004611352727e-05, "loss": 0.96, "step": 152200 }, { "epoch": 0.9724263061727764, "grad_norm": 0.7801720499992371, "learning_rate": 5.219503327712656e-05, "loss": 0.7537, "step": 152210 }, { "epoch": 0.9724901933225151, "grad_norm": 0.8834246397018433, "learning_rate": 5.219002041861999e-05, "loss": 0.8585, "step": 152220 }, { "epoch": 0.9725540804722538, "grad_norm": 0.9630089998245239, "learning_rate": 5.218500753805802e-05, "loss": 0.7415, "step": 152230 }, { "epoch": 0.9726179676219925, "grad_norm": 0.6178570985794067, "learning_rate": 5.217999463549113e-05, "loss": 0.8743, "step": 152240 }, { "epoch": 0.9726818547717312, "grad_norm": 1.1636070013046265, "learning_rate": 5.217498171096982e-05, "loss": 0.8063, "step": 152250 }, { "epoch": 0.9727457419214699, "grad_norm": 0.6922101378440857, "learning_rate": 5.216996876454454e-05, "loss": 0.806, "step": 152260 }, { "epoch": 0.9728096290712086, "grad_norm": 0.6797016859054565, "learning_rate": 5.2164955796265814e-05, "loss": 0.8855, "step": 152270 }, { "epoch": 0.9728735162209473, "grad_norm": 0.6983970403671265, "learning_rate": 5.21599428061841e-05, "loss": 0.8559, "step": 152280 }, { "epoch": 0.972937403370686, "grad_norm": 1.0314277410507202, "learning_rate": 5.2154929794349894e-05, "loss": 0.6021, "step": 152290 }, { "epoch": 0.9730012905204247, "grad_norm": 0.8680412173271179, "learning_rate": 5.214991676081369e-05, "loss": 0.7651, "step": 152300 }, { "epoch": 0.9730651776701634, "grad_norm": 0.8231766819953918, "learning_rate": 5.214490370562596e-05, "loss": 0.7858, "step": 152310 }, { "epoch": 0.9731290648199021, "grad_norm": 0.8090435266494751, "learning_rate": 5.2139890628837183e-05, "loss": 1.0899, "step": 152320 }, { "epoch": 0.9731929519696408, "grad_norm": 1.0858497619628906, "learning_rate": 5.213487753049787e-05, "loss": 0.8816, "step": 152330 }, { "epoch": 0.9732568391193795, "grad_norm": 0.9355524778366089, "learning_rate": 5.212986441065849e-05, "loss": 1.0293, "step": 152340 }, { "epoch": 0.9733207262691183, "grad_norm": 1.1359403133392334, "learning_rate": 5.2124851269369534e-05, "loss": 0.919, "step": 152350 }, { "epoch": 0.973384613418857, "grad_norm": 1.9253411293029785, "learning_rate": 5.211983810668148e-05, "loss": 1.0969, "step": 152360 }, { "epoch": 0.9734485005685957, "grad_norm": 0.8547667264938354, "learning_rate": 5.2114824922644824e-05, "loss": 0.6838, "step": 152370 }, { "epoch": 0.9735123877183344, "grad_norm": 1.0332297086715698, "learning_rate": 5.210981171731005e-05, "loss": 0.8215, "step": 152380 }, { "epoch": 0.9735762748680731, "grad_norm": 1.0458303689956665, "learning_rate": 5.210479849072765e-05, "loss": 0.7492, "step": 152390 }, { "epoch": 0.9736401620178118, "grad_norm": 0.9344658851623535, "learning_rate": 5.209978524294811e-05, "loss": 0.741, "step": 152400 }, { "epoch": 0.9737040491675505, "grad_norm": 1.4250115156173706, "learning_rate": 5.209477197402192e-05, "loss": 0.6877, "step": 152410 }, { "epoch": 0.9737679363172892, "grad_norm": 0.9061084985733032, "learning_rate": 5.208975868399956e-05, "loss": 1.0584, "step": 152420 }, { "epoch": 0.9738318234670278, "grad_norm": 0.806816041469574, "learning_rate": 5.208474537293152e-05, "loss": 0.6887, "step": 152430 }, { "epoch": 0.9738957106167665, "grad_norm": 0.933032751083374, "learning_rate": 5.207973204086829e-05, "loss": 0.8562, "step": 152440 }, { "epoch": 0.9739595977665052, "grad_norm": 1.0426222085952759, "learning_rate": 5.207471868786036e-05, "loss": 0.871, "step": 152450 }, { "epoch": 0.9740234849162439, "grad_norm": 1.1309046745300293, "learning_rate": 5.206970531395822e-05, "loss": 0.7954, "step": 152460 }, { "epoch": 0.9740873720659826, "grad_norm": 0.8131570816040039, "learning_rate": 5.2064691919212364e-05, "loss": 0.91, "step": 152470 }, { "epoch": 0.9741512592157213, "grad_norm": 0.9664103388786316, "learning_rate": 5.205967850367326e-05, "loss": 1.0082, "step": 152480 }, { "epoch": 0.97421514636546, "grad_norm": 1.055430293083191, "learning_rate": 5.205466506739143e-05, "loss": 0.9536, "step": 152490 }, { "epoch": 0.9742790335151987, "grad_norm": 0.8127159476280212, "learning_rate": 5.2049651610417326e-05, "loss": 0.7859, "step": 152500 }, { "epoch": 0.9743429206649374, "grad_norm": 0.7035512924194336, "learning_rate": 5.204463813280147e-05, "loss": 0.9262, "step": 152510 }, { "epoch": 0.9744068078146761, "grad_norm": 0.881603479385376, "learning_rate": 5.203962463459433e-05, "loss": 0.7748, "step": 152520 }, { "epoch": 0.9744706949644149, "grad_norm": 0.8797648549079895, "learning_rate": 5.203461111584641e-05, "loss": 1.044, "step": 152530 }, { "epoch": 0.9745345821141536, "grad_norm": 0.8272404670715332, "learning_rate": 5.202959757660819e-05, "loss": 0.6735, "step": 152540 }, { "epoch": 0.9745984692638923, "grad_norm": 0.9870911240577698, "learning_rate": 5.202458401693017e-05, "loss": 0.811, "step": 152550 }, { "epoch": 0.974662356413631, "grad_norm": 0.838467001914978, "learning_rate": 5.2019570436862844e-05, "loss": 1.024, "step": 152560 }, { "epoch": 0.9747262435633697, "grad_norm": 0.5068958401679993, "learning_rate": 5.2014556836456685e-05, "loss": 1.1225, "step": 152570 }, { "epoch": 0.9747901307131084, "grad_norm": 0.6512907147407532, "learning_rate": 5.2009543215762204e-05, "loss": 0.8789, "step": 152580 }, { "epoch": 0.9748540178628471, "grad_norm": 0.9875491857528687, "learning_rate": 5.200452957482988e-05, "loss": 0.7917, "step": 152590 }, { "epoch": 0.9749179050125858, "grad_norm": 0.5896627902984619, "learning_rate": 5.199951591371022e-05, "loss": 1.141, "step": 152600 }, { "epoch": 0.9749817921623245, "grad_norm": 0.9969107508659363, "learning_rate": 5.199450223245369e-05, "loss": 0.8511, "step": 152610 }, { "epoch": 0.9750456793120632, "grad_norm": 0.7358691096305847, "learning_rate": 5.1989488531110794e-05, "loss": 0.7029, "step": 152620 }, { "epoch": 0.9751095664618019, "grad_norm": 0.5284615159034729, "learning_rate": 5.198447480973204e-05, "loss": 0.7927, "step": 152630 }, { "epoch": 0.9751734536115406, "grad_norm": 1.0184814929962158, "learning_rate": 5.1979461068367904e-05, "loss": 0.9482, "step": 152640 }, { "epoch": 0.9752373407612793, "grad_norm": 1.5319088697433472, "learning_rate": 5.197444730706889e-05, "loss": 0.871, "step": 152650 }, { "epoch": 0.975301227911018, "grad_norm": 1.3232473134994507, "learning_rate": 5.196943352588548e-05, "loss": 0.8189, "step": 152660 }, { "epoch": 0.9753651150607566, "grad_norm": 0.698314368724823, "learning_rate": 5.196441972486816e-05, "loss": 0.8088, "step": 152670 }, { "epoch": 0.9754290022104953, "grad_norm": 0.8359566926956177, "learning_rate": 5.1959405904067446e-05, "loss": 0.8434, "step": 152680 }, { "epoch": 0.975492889360234, "grad_norm": 0.5400557518005371, "learning_rate": 5.195439206353381e-05, "loss": 0.8659, "step": 152690 }, { "epoch": 0.9755567765099727, "grad_norm": 1.6195552349090576, "learning_rate": 5.1949378203317764e-05, "loss": 0.6596, "step": 152700 }, { "epoch": 0.9756206636597115, "grad_norm": 1.1681946516036987, "learning_rate": 5.1944364323469785e-05, "loss": 0.8251, "step": 152710 }, { "epoch": 0.9756845508094502, "grad_norm": 1.1966689825057983, "learning_rate": 5.1939350424040376e-05, "loss": 0.9019, "step": 152720 }, { "epoch": 0.9757484379591889, "grad_norm": 0.7698038220405579, "learning_rate": 5.193433650508004e-05, "loss": 0.6619, "step": 152730 }, { "epoch": 0.9758123251089276, "grad_norm": 1.1396318674087524, "learning_rate": 5.192932256663925e-05, "loss": 0.9543, "step": 152740 }, { "epoch": 0.9758762122586663, "grad_norm": 0.9183258414268494, "learning_rate": 5.1924308608768524e-05, "loss": 1.0159, "step": 152750 }, { "epoch": 0.975940099408405, "grad_norm": 0.7935616374015808, "learning_rate": 5.1919294631518336e-05, "loss": 0.9497, "step": 152760 }, { "epoch": 0.9760039865581437, "grad_norm": 0.8395205140113831, "learning_rate": 5.1914280634939195e-05, "loss": 0.9478, "step": 152770 }, { "epoch": 0.9760678737078824, "grad_norm": 1.5380463600158691, "learning_rate": 5.190926661908159e-05, "loss": 0.9063, "step": 152780 }, { "epoch": 0.9761317608576211, "grad_norm": 0.8509006500244141, "learning_rate": 5.190425258399601e-05, "loss": 1.1259, "step": 152790 }, { "epoch": 0.9761956480073598, "grad_norm": 1.152294397354126, "learning_rate": 5.189923852973297e-05, "loss": 0.8142, "step": 152800 }, { "epoch": 0.9762595351570985, "grad_norm": 0.7129946947097778, "learning_rate": 5.1894224456342965e-05, "loss": 0.9805, "step": 152810 }, { "epoch": 0.9763234223068372, "grad_norm": 0.9357526302337646, "learning_rate": 5.188921036387646e-05, "loss": 0.9792, "step": 152820 }, { "epoch": 0.9763873094565759, "grad_norm": 0.7643981575965881, "learning_rate": 5.1884196252383986e-05, "loss": 0.7641, "step": 152830 }, { "epoch": 0.9764511966063146, "grad_norm": 0.860305666923523, "learning_rate": 5.187918212191603e-05, "loss": 0.8608, "step": 152840 }, { "epoch": 0.9765150837560533, "grad_norm": 1.20055091381073, "learning_rate": 5.187416797252307e-05, "loss": 0.7898, "step": 152850 }, { "epoch": 0.976578970905792, "grad_norm": 1.0174932479858398, "learning_rate": 5.186915380425562e-05, "loss": 0.7676, "step": 152860 }, { "epoch": 0.9766428580555307, "grad_norm": 0.8433327674865723, "learning_rate": 5.1864139617164174e-05, "loss": 1.0166, "step": 152870 }, { "epoch": 0.9767067452052695, "grad_norm": 0.8074188828468323, "learning_rate": 5.185912541129924e-05, "loss": 1.202, "step": 152880 }, { "epoch": 0.9767706323550082, "grad_norm": 0.7383306622505188, "learning_rate": 5.1854111186711295e-05, "loss": 0.8643, "step": 152890 }, { "epoch": 0.9768345195047469, "grad_norm": 0.704338014125824, "learning_rate": 5.184909694345084e-05, "loss": 0.7977, "step": 152900 }, { "epoch": 0.9768984066544855, "grad_norm": 0.6669245362281799, "learning_rate": 5.1844082681568386e-05, "loss": 0.8619, "step": 152910 }, { "epoch": 0.9769622938042242, "grad_norm": 0.9143712520599365, "learning_rate": 5.183906840111442e-05, "loss": 0.9948, "step": 152920 }, { "epoch": 0.9770261809539629, "grad_norm": 1.7431244850158691, "learning_rate": 5.1834054102139454e-05, "loss": 0.6948, "step": 152930 }, { "epoch": 0.9770900681037016, "grad_norm": 0.8700932264328003, "learning_rate": 5.182903978469398e-05, "loss": 0.8935, "step": 152940 }, { "epoch": 0.9771539552534403, "grad_norm": 0.7274859547615051, "learning_rate": 5.182402544882847e-05, "loss": 0.7769, "step": 152950 }, { "epoch": 0.977217842403179, "grad_norm": 0.48052549362182617, "learning_rate": 5.181901109459347e-05, "loss": 0.7875, "step": 152960 }, { "epoch": 0.9772817295529177, "grad_norm": 1.1585367918014526, "learning_rate": 5.181399672203946e-05, "loss": 0.7967, "step": 152970 }, { "epoch": 0.9773456167026564, "grad_norm": 1.8060321807861328, "learning_rate": 5.1808982331216915e-05, "loss": 0.8072, "step": 152980 }, { "epoch": 0.9774095038523951, "grad_norm": 0.683174729347229, "learning_rate": 5.1803967922176354e-05, "loss": 0.8027, "step": 152990 }, { "epoch": 0.9774733910021338, "grad_norm": 0.8326396346092224, "learning_rate": 5.1798953494968285e-05, "loss": 0.6853, "step": 153000 }, { "epoch": 0.9775372781518725, "grad_norm": 0.6510669589042664, "learning_rate": 5.179393904964319e-05, "loss": 0.9173, "step": 153010 }, { "epoch": 0.9776011653016112, "grad_norm": 0.894503653049469, "learning_rate": 5.1788924586251575e-05, "loss": 0.9928, "step": 153020 }, { "epoch": 0.9776650524513499, "grad_norm": 0.8102232217788696, "learning_rate": 5.178391010484395e-05, "loss": 1.1457, "step": 153030 }, { "epoch": 0.9777289396010886, "grad_norm": 0.8255794644355774, "learning_rate": 5.17788956054708e-05, "loss": 0.7739, "step": 153040 }, { "epoch": 0.9777928267508273, "grad_norm": 0.968258261680603, "learning_rate": 5.177388108818263e-05, "loss": 0.8573, "step": 153050 }, { "epoch": 0.977856713900566, "grad_norm": 0.6829890608787537, "learning_rate": 5.176886655302994e-05, "loss": 0.8869, "step": 153060 }, { "epoch": 0.9779206010503048, "grad_norm": 3.4975321292877197, "learning_rate": 5.176385200006324e-05, "loss": 1.072, "step": 153070 }, { "epoch": 0.9779844882000435, "grad_norm": 1.045479416847229, "learning_rate": 5.1758837429333026e-05, "loss": 0.6734, "step": 153080 }, { "epoch": 0.9780483753497822, "grad_norm": 1.0568046569824219, "learning_rate": 5.1753822840889796e-05, "loss": 0.7752, "step": 153090 }, { "epoch": 0.9781122624995209, "grad_norm": 0.843722403049469, "learning_rate": 5.174880823478405e-05, "loss": 0.9224, "step": 153100 }, { "epoch": 0.9781761496492596, "grad_norm": 1.006016731262207, "learning_rate": 5.174379361106629e-05, "loss": 0.8705, "step": 153110 }, { "epoch": 0.9782400367989983, "grad_norm": 2.0581963062286377, "learning_rate": 5.173877896978703e-05, "loss": 0.8711, "step": 153120 }, { "epoch": 0.978303923948737, "grad_norm": 0.788337230682373, "learning_rate": 5.173376431099676e-05, "loss": 0.7674, "step": 153130 }, { "epoch": 0.9783678110984757, "grad_norm": 1.791744351387024, "learning_rate": 5.172874963474598e-05, "loss": 0.9112, "step": 153140 }, { "epoch": 0.9784316982482143, "grad_norm": 0.7716138362884521, "learning_rate": 5.17237349410852e-05, "loss": 0.7965, "step": 153150 }, { "epoch": 0.978495585397953, "grad_norm": 0.616436779499054, "learning_rate": 5.171872023006491e-05, "loss": 1.0199, "step": 153160 }, { "epoch": 0.9785594725476917, "grad_norm": 0.8398680686950684, "learning_rate": 5.171370550173562e-05, "loss": 1.0967, "step": 153170 }, { "epoch": 0.9786233596974304, "grad_norm": 0.9748437404632568, "learning_rate": 5.170869075614784e-05, "loss": 0.9849, "step": 153180 }, { "epoch": 0.9786872468471691, "grad_norm": 0.8368244171142578, "learning_rate": 5.1703675993352064e-05, "loss": 0.8339, "step": 153190 }, { "epoch": 0.9787511339969078, "grad_norm": 0.9211305975914001, "learning_rate": 5.169866121339879e-05, "loss": 0.7099, "step": 153200 }, { "epoch": 0.9788150211466465, "grad_norm": 1.1104360818862915, "learning_rate": 5.169364641633855e-05, "loss": 0.8684, "step": 153210 }, { "epoch": 0.9788789082963852, "grad_norm": 0.6493495106697083, "learning_rate": 5.1688631602221794e-05, "loss": 1.0052, "step": 153220 }, { "epoch": 0.978942795446124, "grad_norm": 1.0354483127593994, "learning_rate": 5.168361677109908e-05, "loss": 0.9606, "step": 153230 }, { "epoch": 0.9790066825958627, "grad_norm": 0.6334801912307739, "learning_rate": 5.1678601923020876e-05, "loss": 1.0056, "step": 153240 }, { "epoch": 0.9790705697456014, "grad_norm": 1.2471381425857544, "learning_rate": 5.16735870580377e-05, "loss": 0.9594, "step": 153250 }, { "epoch": 0.9791344568953401, "grad_norm": 1.8686987161636353, "learning_rate": 5.166857217620006e-05, "loss": 0.8422, "step": 153260 }, { "epoch": 0.9791983440450788, "grad_norm": 0.7281016707420349, "learning_rate": 5.1663557277558447e-05, "loss": 1.0227, "step": 153270 }, { "epoch": 0.9792622311948175, "grad_norm": 1.3672218322753906, "learning_rate": 5.1658542362163385e-05, "loss": 1.2125, "step": 153280 }, { "epoch": 0.9793261183445562, "grad_norm": 0.6560665369033813, "learning_rate": 5.165352743006536e-05, "loss": 0.7462, "step": 153290 }, { "epoch": 0.9793900054942949, "grad_norm": 0.8147808313369751, "learning_rate": 5.164851248131488e-05, "loss": 0.8436, "step": 153300 }, { "epoch": 0.9794538926440336, "grad_norm": 1.1131194829940796, "learning_rate": 5.1643497515962455e-05, "loss": 0.8414, "step": 153310 }, { "epoch": 0.9795177797937723, "grad_norm": 0.8697034120559692, "learning_rate": 5.16384825340586e-05, "loss": 0.8524, "step": 153320 }, { "epoch": 0.979581666943511, "grad_norm": 0.9882239699363708, "learning_rate": 5.163346753565379e-05, "loss": 0.7327, "step": 153330 }, { "epoch": 0.9796455540932497, "grad_norm": 0.7860538959503174, "learning_rate": 5.162845252079855e-05, "loss": 0.7041, "step": 153340 }, { "epoch": 0.9797094412429884, "grad_norm": 1.2516241073608398, "learning_rate": 5.16234374895434e-05, "loss": 1.284, "step": 153350 }, { "epoch": 0.9797733283927271, "grad_norm": 1.390798568725586, "learning_rate": 5.161842244193882e-05, "loss": 1.0494, "step": 153360 }, { "epoch": 0.9798372155424658, "grad_norm": 0.6051182746887207, "learning_rate": 5.1613407378035326e-05, "loss": 0.8672, "step": 153370 }, { "epoch": 0.9799011026922045, "grad_norm": 1.087631344795227, "learning_rate": 5.1608392297883426e-05, "loss": 1.1912, "step": 153380 }, { "epoch": 0.9799649898419432, "grad_norm": 0.9267514944076538, "learning_rate": 5.160337720153362e-05, "loss": 0.8654, "step": 153390 }, { "epoch": 0.9800288769916818, "grad_norm": 1.898051142692566, "learning_rate": 5.1598362089036424e-05, "loss": 0.8454, "step": 153400 }, { "epoch": 0.9800927641414205, "grad_norm": 0.8334454894065857, "learning_rate": 5.1593346960442336e-05, "loss": 0.9431, "step": 153410 }, { "epoch": 0.9801566512911593, "grad_norm": 1.1599924564361572, "learning_rate": 5.158833181580186e-05, "loss": 0.7981, "step": 153420 }, { "epoch": 0.980220538440898, "grad_norm": 0.7810460925102234, "learning_rate": 5.1583316655165506e-05, "loss": 1.0807, "step": 153430 }, { "epoch": 0.9802844255906367, "grad_norm": 1.1010318994522095, "learning_rate": 5.157830147858379e-05, "loss": 0.7419, "step": 153440 }, { "epoch": 0.9803483127403754, "grad_norm": 0.6888182163238525, "learning_rate": 5.1573286286107216e-05, "loss": 0.785, "step": 153450 }, { "epoch": 0.9804121998901141, "grad_norm": 0.6871199011802673, "learning_rate": 5.15682710777863e-05, "loss": 0.8662, "step": 153460 }, { "epoch": 0.9804760870398528, "grad_norm": 0.7507506012916565, "learning_rate": 5.156325585367152e-05, "loss": 0.8132, "step": 153470 }, { "epoch": 0.9805399741895915, "grad_norm": 0.8823074698448181, "learning_rate": 5.1558240613813416e-05, "loss": 0.8198, "step": 153480 }, { "epoch": 0.9806038613393302, "grad_norm": 0.6161057949066162, "learning_rate": 5.155322535826246e-05, "loss": 0.8405, "step": 153490 }, { "epoch": 0.9806677484890689, "grad_norm": 0.7086304426193237, "learning_rate": 5.1548210087069196e-05, "loss": 0.7153, "step": 153500 }, { "epoch": 0.9807316356388076, "grad_norm": 1.631806492805481, "learning_rate": 5.154319480028411e-05, "loss": 0.7625, "step": 153510 }, { "epoch": 0.9807955227885463, "grad_norm": 0.7055345177650452, "learning_rate": 5.153817949795772e-05, "loss": 0.8304, "step": 153520 }, { "epoch": 0.980859409938285, "grad_norm": 1.2235612869262695, "learning_rate": 5.153316418014053e-05, "loss": 0.9013, "step": 153530 }, { "epoch": 0.9809232970880237, "grad_norm": 0.678455114364624, "learning_rate": 5.152814884688305e-05, "loss": 0.7831, "step": 153540 }, { "epoch": 0.9809871842377624, "grad_norm": 1.0608277320861816, "learning_rate": 5.152313349823579e-05, "loss": 0.7629, "step": 153550 }, { "epoch": 0.9810510713875011, "grad_norm": 0.8170053958892822, "learning_rate": 5.151811813424926e-05, "loss": 0.6033, "step": 153560 }, { "epoch": 0.9811149585372398, "grad_norm": 0.7763380408287048, "learning_rate": 5.151310275497396e-05, "loss": 0.8555, "step": 153570 }, { "epoch": 0.9811788456869786, "grad_norm": 1.2432817220687866, "learning_rate": 5.150808736046042e-05, "loss": 0.7298, "step": 153580 }, { "epoch": 0.9812427328367173, "grad_norm": 0.9610840082168579, "learning_rate": 5.150307195075912e-05, "loss": 0.9684, "step": 153590 }, { "epoch": 0.981306619986456, "grad_norm": 0.9366688132286072, "learning_rate": 5.149805652592059e-05, "loss": 0.7935, "step": 153600 }, { "epoch": 0.9813705071361947, "grad_norm": 0.8396714329719543, "learning_rate": 5.1493041085995334e-05, "loss": 1.1084, "step": 153610 }, { "epoch": 0.9814343942859334, "grad_norm": 0.9239795207977295, "learning_rate": 5.148802563103387e-05, "loss": 0.6051, "step": 153620 }, { "epoch": 0.9814982814356721, "grad_norm": 0.866423487663269, "learning_rate": 5.1483010161086695e-05, "loss": 1.1068, "step": 153630 }, { "epoch": 0.9815621685854107, "grad_norm": 1.2361878156661987, "learning_rate": 5.147799467620432e-05, "loss": 1.2309, "step": 153640 }, { "epoch": 0.9816260557351494, "grad_norm": 0.9184995889663696, "learning_rate": 5.147297917643728e-05, "loss": 0.9739, "step": 153650 }, { "epoch": 0.9816899428848881, "grad_norm": 0.8369823098182678, "learning_rate": 5.146796366183604e-05, "loss": 0.8687, "step": 153660 }, { "epoch": 0.9817538300346268, "grad_norm": 0.8505387902259827, "learning_rate": 5.146294813245115e-05, "loss": 0.8085, "step": 153670 }, { "epoch": 0.9818177171843655, "grad_norm": 1.1725592613220215, "learning_rate": 5.14579325883331e-05, "loss": 1.0974, "step": 153680 }, { "epoch": 0.9818816043341042, "grad_norm": 1.1303421258926392, "learning_rate": 5.145291702953241e-05, "loss": 0.8779, "step": 153690 }, { "epoch": 0.9819454914838429, "grad_norm": 1.3215134143829346, "learning_rate": 5.144790145609961e-05, "loss": 0.7935, "step": 153700 }, { "epoch": 0.9820093786335816, "grad_norm": 0.9500746130943298, "learning_rate": 5.1442885868085166e-05, "loss": 1.2262, "step": 153710 }, { "epoch": 0.9820732657833203, "grad_norm": 0.8446482419967651, "learning_rate": 5.143787026553962e-05, "loss": 0.7815, "step": 153720 }, { "epoch": 0.982137152933059, "grad_norm": 1.9297406673431396, "learning_rate": 5.143285464851347e-05, "loss": 0.8834, "step": 153730 }, { "epoch": 0.9822010400827977, "grad_norm": 1.5801446437835693, "learning_rate": 5.1427839017057234e-05, "loss": 0.8194, "step": 153740 }, { "epoch": 0.9822649272325364, "grad_norm": 0.723784327507019, "learning_rate": 5.142282337122142e-05, "loss": 0.9339, "step": 153750 }, { "epoch": 0.9823288143822752, "grad_norm": 0.9597413539886475, "learning_rate": 5.141780771105655e-05, "loss": 1.4497, "step": 153760 }, { "epoch": 0.9823927015320139, "grad_norm": 1.1143347024917603, "learning_rate": 5.1412792036613136e-05, "loss": 1.099, "step": 153770 }, { "epoch": 0.9824565886817526, "grad_norm": 0.9566546082496643, "learning_rate": 5.1407776347941674e-05, "loss": 1.1144, "step": 153780 }, { "epoch": 0.9825204758314913, "grad_norm": 1.0242767333984375, "learning_rate": 5.1402760645092696e-05, "loss": 0.8559, "step": 153790 }, { "epoch": 0.98258436298123, "grad_norm": 1.4230691194534302, "learning_rate": 5.13977449281167e-05, "loss": 1.1065, "step": 153800 }, { "epoch": 0.9826482501309687, "grad_norm": 0.743841826915741, "learning_rate": 5.139272919706421e-05, "loss": 0.8184, "step": 153810 }, { "epoch": 0.9827121372807074, "grad_norm": 1.586731195449829, "learning_rate": 5.138771345198572e-05, "loss": 0.8454, "step": 153820 }, { "epoch": 0.9827760244304461, "grad_norm": 0.9233693480491638, "learning_rate": 5.138269769293176e-05, "loss": 1.0317, "step": 153830 }, { "epoch": 0.9828399115801848, "grad_norm": 0.9364616274833679, "learning_rate": 5.137768191995284e-05, "loss": 0.9424, "step": 153840 }, { "epoch": 0.9829037987299235, "grad_norm": 0.9385852217674255, "learning_rate": 5.137266613309947e-05, "loss": 1.1349, "step": 153850 }, { "epoch": 0.9829676858796622, "grad_norm": 0.8022464513778687, "learning_rate": 5.1367650332422155e-05, "loss": 0.8832, "step": 153860 }, { "epoch": 0.9830315730294009, "grad_norm": 1.424519658088684, "learning_rate": 5.136263451797143e-05, "loss": 0.8496, "step": 153870 }, { "epoch": 0.9830954601791395, "grad_norm": 0.835070013999939, "learning_rate": 5.1357618689797795e-05, "loss": 0.7952, "step": 153880 }, { "epoch": 0.9831593473288782, "grad_norm": 1.1491596698760986, "learning_rate": 5.135260284795176e-05, "loss": 1.0079, "step": 153890 }, { "epoch": 0.9832232344786169, "grad_norm": 1.1634465456008911, "learning_rate": 5.134758699248386e-05, "loss": 0.8044, "step": 153900 }, { "epoch": 0.9832871216283556, "grad_norm": 0.9196897745132446, "learning_rate": 5.134257112344457e-05, "loss": 0.8414, "step": 153910 }, { "epoch": 0.9833510087780943, "grad_norm": 0.801220715045929, "learning_rate": 5.133755524088444e-05, "loss": 0.725, "step": 153920 }, { "epoch": 0.983414895927833, "grad_norm": 0.6582996845245361, "learning_rate": 5.133253934485397e-05, "loss": 0.6951, "step": 153930 }, { "epoch": 0.9834787830775717, "grad_norm": 1.0240771770477295, "learning_rate": 5.132752343540368e-05, "loss": 0.9298, "step": 153940 }, { "epoch": 0.9835426702273105, "grad_norm": 0.8002316951751709, "learning_rate": 5.132250751258407e-05, "loss": 1.0542, "step": 153950 }, { "epoch": 0.9836065573770492, "grad_norm": 1.1618034839630127, "learning_rate": 5.131749157644568e-05, "loss": 0.7801, "step": 153960 }, { "epoch": 0.9836704445267879, "grad_norm": 0.9297146201133728, "learning_rate": 5.1312475627039e-05, "loss": 0.8619, "step": 153970 }, { "epoch": 0.9837343316765266, "grad_norm": 0.7752575278282166, "learning_rate": 5.130745966441456e-05, "loss": 0.7556, "step": 153980 }, { "epoch": 0.9837982188262653, "grad_norm": 1.0066181421279907, "learning_rate": 5.130244368862286e-05, "loss": 0.9884, "step": 153990 }, { "epoch": 0.983862105976004, "grad_norm": 0.8494886159896851, "learning_rate": 5.129742769971443e-05, "loss": 0.8843, "step": 154000 }, { "epoch": 0.9839259931257427, "grad_norm": 0.7732999920845032, "learning_rate": 5.1292411697739786e-05, "loss": 0.6683, "step": 154010 }, { "epoch": 0.9839898802754814, "grad_norm": 1.17229425907135, "learning_rate": 5.128739568274944e-05, "loss": 0.7683, "step": 154020 }, { "epoch": 0.9840537674252201, "grad_norm": 1.5680131912231445, "learning_rate": 5.12823796547939e-05, "loss": 1.2148, "step": 154030 }, { "epoch": 0.9841176545749588, "grad_norm": 0.7837061882019043, "learning_rate": 5.1277363613923676e-05, "loss": 0.6788, "step": 154040 }, { "epoch": 0.9841815417246975, "grad_norm": 1.022614598274231, "learning_rate": 5.1272347560189314e-05, "loss": 0.7454, "step": 154050 }, { "epoch": 0.9842454288744362, "grad_norm": 1.2080639600753784, "learning_rate": 5.12673314936413e-05, "loss": 0.8185, "step": 154060 }, { "epoch": 0.9843093160241749, "grad_norm": 1.0237927436828613, "learning_rate": 5.126231541433018e-05, "loss": 0.8095, "step": 154070 }, { "epoch": 0.9843732031739136, "grad_norm": 0.5881984233856201, "learning_rate": 5.125729932230643e-05, "loss": 0.8783, "step": 154080 }, { "epoch": 0.9844370903236523, "grad_norm": 0.7203654646873474, "learning_rate": 5.12522832176206e-05, "loss": 0.7445, "step": 154090 }, { "epoch": 0.984500977473391, "grad_norm": 1.8319315910339355, "learning_rate": 5.1247267100323195e-05, "loss": 1.0696, "step": 154100 }, { "epoch": 0.9845648646231298, "grad_norm": 0.8602228164672852, "learning_rate": 5.124225097046472e-05, "loss": 0.8352, "step": 154110 }, { "epoch": 0.9846287517728685, "grad_norm": 0.7165752053260803, "learning_rate": 5.12372348280957e-05, "loss": 0.981, "step": 154120 }, { "epoch": 0.984692638922607, "grad_norm": 0.9421229362487793, "learning_rate": 5.123221867326666e-05, "loss": 0.9251, "step": 154130 }, { "epoch": 0.9847565260723458, "grad_norm": 1.153640866279602, "learning_rate": 5.1227202506028117e-05, "loss": 1.0239, "step": 154140 }, { "epoch": 0.9848204132220845, "grad_norm": 0.8328729271888733, "learning_rate": 5.122218632643059e-05, "loss": 0.7723, "step": 154150 }, { "epoch": 0.9848843003718232, "grad_norm": 3.4909727573394775, "learning_rate": 5.1217170134524586e-05, "loss": 0.9164, "step": 154160 }, { "epoch": 0.9849481875215619, "grad_norm": 1.5162936449050903, "learning_rate": 5.1212153930360615e-05, "loss": 0.7539, "step": 154170 }, { "epoch": 0.9850120746713006, "grad_norm": 0.8826167583465576, "learning_rate": 5.1207137713989205e-05, "loss": 0.7714, "step": 154180 }, { "epoch": 0.9850759618210393, "grad_norm": 0.7749609351158142, "learning_rate": 5.1202121485460894e-05, "loss": 0.9825, "step": 154190 }, { "epoch": 0.985139848970778, "grad_norm": 0.9990338683128357, "learning_rate": 5.119710524482617e-05, "loss": 0.7492, "step": 154200 }, { "epoch": 0.9852037361205167, "grad_norm": Infinity, "learning_rate": 5.119259061794569e-05, "loss": 0.7843, "step": 154210 }, { "epoch": 0.9852676232702554, "grad_norm": 0.6239207983016968, "learning_rate": 5.118757435444798e-05, "loss": 0.8757, "step": 154220 }, { "epoch": 0.9853315104199941, "grad_norm": 0.8069121241569519, "learning_rate": 5.118255807899036e-05, "loss": 1.0016, "step": 154230 }, { "epoch": 0.9853953975697328, "grad_norm": 0.8366886973381042, "learning_rate": 5.117754179162335e-05, "loss": 0.6895, "step": 154240 }, { "epoch": 0.9854592847194715, "grad_norm": 0.8663131594657898, "learning_rate": 5.1172525492397484e-05, "loss": 0.683, "step": 154250 }, { "epoch": 0.9855231718692102, "grad_norm": 1.0200055837631226, "learning_rate": 5.116750918136327e-05, "loss": 0.8902, "step": 154260 }, { "epoch": 0.9855870590189489, "grad_norm": 1.0423588752746582, "learning_rate": 5.116249285857123e-05, "loss": 0.9948, "step": 154270 }, { "epoch": 0.9856509461686876, "grad_norm": 1.235628366470337, "learning_rate": 5.115747652407189e-05, "loss": 0.9463, "step": 154280 }, { "epoch": 0.9857148333184264, "grad_norm": 0.787745475769043, "learning_rate": 5.115246017791575e-05, "loss": 0.9099, "step": 154290 }, { "epoch": 0.9857787204681651, "grad_norm": 0.8026257157325745, "learning_rate": 5.114744382015334e-05, "loss": 0.8882, "step": 154300 }, { "epoch": 0.9858426076179038, "grad_norm": 0.9308233857154846, "learning_rate": 5.114242745083517e-05, "loss": 0.7079, "step": 154310 }, { "epoch": 0.9859064947676425, "grad_norm": 1.0703706741333008, "learning_rate": 5.1137411070011786e-05, "loss": 0.8582, "step": 154320 }, { "epoch": 0.9859703819173812, "grad_norm": 0.8068180680274963, "learning_rate": 5.113239467773369e-05, "loss": 0.666, "step": 154330 }, { "epoch": 0.9860342690671199, "grad_norm": 1.0489506721496582, "learning_rate": 5.1127378274051385e-05, "loss": 0.9058, "step": 154340 }, { "epoch": 0.9860981562168586, "grad_norm": 1.0829126834869385, "learning_rate": 5.112236185901541e-05, "loss": 0.9407, "step": 154350 }, { "epoch": 0.9861620433665973, "grad_norm": 1.2354950904846191, "learning_rate": 5.111734543267628e-05, "loss": 1.0327, "step": 154360 }, { "epoch": 0.9862259305163359, "grad_norm": 0.9200438857078552, "learning_rate": 5.111232899508451e-05, "loss": 0.884, "step": 154370 }, { "epoch": 0.9862898176660746, "grad_norm": 1.6493422985076904, "learning_rate": 5.110731254629063e-05, "loss": 1.0967, "step": 154380 }, { "epoch": 0.9863537048158133, "grad_norm": 0.49024438858032227, "learning_rate": 5.110229608634516e-05, "loss": 0.891, "step": 154390 }, { "epoch": 0.986417591965552, "grad_norm": 0.9173659682273865, "learning_rate": 5.1097279615298596e-05, "loss": 0.8338, "step": 154400 }, { "epoch": 0.9864814791152907, "grad_norm": 0.718056857585907, "learning_rate": 5.109226313320149e-05, "loss": 0.9905, "step": 154410 }, { "epoch": 0.9865453662650294, "grad_norm": 0.8132973313331604, "learning_rate": 5.108724664010435e-05, "loss": 0.9829, "step": 154420 }, { "epoch": 0.9866092534147681, "grad_norm": 1.2084906101226807, "learning_rate": 5.1082230136057695e-05, "loss": 0.8259, "step": 154430 }, { "epoch": 0.9866731405645068, "grad_norm": 1.0011422634124756, "learning_rate": 5.1077213621112043e-05, "loss": 0.6755, "step": 154440 }, { "epoch": 0.9867370277142455, "grad_norm": 0.930726945400238, "learning_rate": 5.107219709531792e-05, "loss": 0.8441, "step": 154450 }, { "epoch": 0.9868009148639842, "grad_norm": 0.7071484923362732, "learning_rate": 5.1067180558725846e-05, "loss": 0.8279, "step": 154460 }, { "epoch": 0.986864802013723, "grad_norm": 1.1952509880065918, "learning_rate": 5.106216401138635e-05, "loss": 0.7515, "step": 154470 }, { "epoch": 0.9869286891634617, "grad_norm": 0.8858817219734192, "learning_rate": 5.105714745334993e-05, "loss": 0.8042, "step": 154480 }, { "epoch": 0.9869925763132004, "grad_norm": 1.0529940128326416, "learning_rate": 5.105213088466712e-05, "loss": 0.8393, "step": 154490 }, { "epoch": 0.9870564634629391, "grad_norm": 0.8459919095039368, "learning_rate": 5.1047114305388445e-05, "loss": 0.7869, "step": 154500 }, { "epoch": 0.9871203506126778, "grad_norm": 0.8265353441238403, "learning_rate": 5.104209771556443e-05, "loss": 0.9366, "step": 154510 }, { "epoch": 0.9871842377624165, "grad_norm": 1.7919999361038208, "learning_rate": 5.1037081115245576e-05, "loss": 0.8777, "step": 154520 }, { "epoch": 0.9872481249121552, "grad_norm": 0.8435221910476685, "learning_rate": 5.103206450448243e-05, "loss": 0.9533, "step": 154530 }, { "epoch": 0.9873120120618939, "grad_norm": 1.6864407062530518, "learning_rate": 5.10270478833255e-05, "loss": 0.7714, "step": 154540 }, { "epoch": 0.9873758992116326, "grad_norm": 0.9249199032783508, "learning_rate": 5.1022031251825306e-05, "loss": 0.9767, "step": 154550 }, { "epoch": 0.9874397863613713, "grad_norm": 0.6928601264953613, "learning_rate": 5.101701461003238e-05, "loss": 0.965, "step": 154560 }, { "epoch": 0.98750367351111, "grad_norm": 1.4231996536254883, "learning_rate": 5.101199795799723e-05, "loss": 1.1854, "step": 154570 }, { "epoch": 0.9875675606608487, "grad_norm": 1.1377532482147217, "learning_rate": 5.1006981295770376e-05, "loss": 0.8789, "step": 154580 }, { "epoch": 0.9876314478105874, "grad_norm": 0.6748983860015869, "learning_rate": 5.100196462340236e-05, "loss": 0.8104, "step": 154590 }, { "epoch": 0.9876953349603261, "grad_norm": 1.366390585899353, "learning_rate": 5.0996947940943695e-05, "loss": 1.0198, "step": 154600 }, { "epoch": 0.9877592221100647, "grad_norm": 1.1231287717819214, "learning_rate": 5.09919312484449e-05, "loss": 0.9681, "step": 154610 }, { "epoch": 0.9878231092598034, "grad_norm": 1.0698585510253906, "learning_rate": 5.09869145459565e-05, "loss": 0.7381, "step": 154620 }, { "epoch": 0.9878869964095421, "grad_norm": 0.8807600736618042, "learning_rate": 5.098189783352901e-05, "loss": 0.8386, "step": 154630 }, { "epoch": 0.9879508835592808, "grad_norm": 0.9870644211769104, "learning_rate": 5.097688111121296e-05, "loss": 0.7578, "step": 154640 }, { "epoch": 0.9880147707090196, "grad_norm": 0.8701372742652893, "learning_rate": 5.097186437905887e-05, "loss": 0.9337, "step": 154650 }, { "epoch": 0.9880786578587583, "grad_norm": 0.9452102780342102, "learning_rate": 5.0966847637117275e-05, "loss": 0.94, "step": 154660 }, { "epoch": 0.988142545008497, "grad_norm": 1.2239512205123901, "learning_rate": 5.096183088543869e-05, "loss": 0.886, "step": 154670 }, { "epoch": 0.9882064321582357, "grad_norm": 0.7913658022880554, "learning_rate": 5.095681412407363e-05, "loss": 0.9082, "step": 154680 }, { "epoch": 0.9882703193079744, "grad_norm": 1.3978636264801025, "learning_rate": 5.095179735307263e-05, "loss": 0.8123, "step": 154690 }, { "epoch": 0.9883342064577131, "grad_norm": 1.0464966297149658, "learning_rate": 5.0946780572486194e-05, "loss": 0.8111, "step": 154700 }, { "epoch": 0.9883980936074518, "grad_norm": 0.7706538438796997, "learning_rate": 5.094176378236487e-05, "loss": 0.8334, "step": 154710 }, { "epoch": 0.9884619807571905, "grad_norm": 0.8449127674102783, "learning_rate": 5.0936746982759164e-05, "loss": 0.8343, "step": 154720 }, { "epoch": 0.9885258679069292, "grad_norm": 2.550860643386841, "learning_rate": 5.093173017371961e-05, "loss": 1.1222, "step": 154730 }, { "epoch": 0.9885897550566679, "grad_norm": 0.8664345741271973, "learning_rate": 5.0926713355296715e-05, "loss": 0.9487, "step": 154740 }, { "epoch": 0.9886536422064066, "grad_norm": 1.1151171922683716, "learning_rate": 5.092169652754103e-05, "loss": 0.8512, "step": 154750 }, { "epoch": 0.9887175293561453, "grad_norm": 1.0279850959777832, "learning_rate": 5.091667969050304e-05, "loss": 0.9394, "step": 154760 }, { "epoch": 0.988781416505884, "grad_norm": 0.8667672276496887, "learning_rate": 5.091166284423332e-05, "loss": 0.9599, "step": 154770 }, { "epoch": 0.9888453036556227, "grad_norm": 0.6669974327087402, "learning_rate": 5.0906645988782354e-05, "loss": 0.7711, "step": 154780 }, { "epoch": 0.9889091908053614, "grad_norm": 0.7633401155471802, "learning_rate": 5.090162912420068e-05, "loss": 0.8466, "step": 154790 }, { "epoch": 0.9889730779551001, "grad_norm": 1.8261069059371948, "learning_rate": 5.089661225053882e-05, "loss": 0.7971, "step": 154800 }, { "epoch": 0.9890369651048388, "grad_norm": 0.9991775155067444, "learning_rate": 5.08915953678473e-05, "loss": 1.0186, "step": 154810 }, { "epoch": 0.9891008522545776, "grad_norm": 0.7954165935516357, "learning_rate": 5.088657847617666e-05, "loss": 0.9212, "step": 154820 }, { "epoch": 0.9891647394043163, "grad_norm": 1.120200753211975, "learning_rate": 5.0881561575577384e-05, "loss": 0.7709, "step": 154830 }, { "epoch": 0.989228626554055, "grad_norm": 0.8321331143379211, "learning_rate": 5.0876544666100035e-05, "loss": 1.0257, "step": 154840 }, { "epoch": 0.9892925137037936, "grad_norm": 0.7614843249320984, "learning_rate": 5.087152774779511e-05, "loss": 1.0545, "step": 154850 }, { "epoch": 0.9893564008535323, "grad_norm": 1.8227176666259766, "learning_rate": 5.086651082071315e-05, "loss": 0.8129, "step": 154860 }, { "epoch": 0.989420288003271, "grad_norm": 0.577085018157959, "learning_rate": 5.0861493884904686e-05, "loss": 0.8638, "step": 154870 }, { "epoch": 0.9894841751530097, "grad_norm": 0.9807033538818359, "learning_rate": 5.0856476940420225e-05, "loss": 0.9116, "step": 154880 }, { "epoch": 0.9895480623027484, "grad_norm": 3.173016309738159, "learning_rate": 5.0851459987310304e-05, "loss": 0.675, "step": 154890 }, { "epoch": 0.9896119494524871, "grad_norm": 4.230893611907959, "learning_rate": 5.084644302562544e-05, "loss": 0.7969, "step": 154900 }, { "epoch": 0.9896758366022258, "grad_norm": 0.7749008536338806, "learning_rate": 5.0841426055416164e-05, "loss": 0.7723, "step": 154910 }, { "epoch": 0.9897397237519645, "grad_norm": 1.0113115310668945, "learning_rate": 5.083640907673299e-05, "loss": 1.0151, "step": 154920 }, { "epoch": 0.9898036109017032, "grad_norm": 3.3041319847106934, "learning_rate": 5.083139208962646e-05, "loss": 0.8368, "step": 154930 }, { "epoch": 0.9898674980514419, "grad_norm": 0.8300958871841431, "learning_rate": 5.082637509414709e-05, "loss": 0.6277, "step": 154940 }, { "epoch": 0.9899313852011806, "grad_norm": 0.720481276512146, "learning_rate": 5.0821358090345414e-05, "loss": 0.8199, "step": 154950 }, { "epoch": 0.9899952723509193, "grad_norm": 0.7711173295974731, "learning_rate": 5.081634107827196e-05, "loss": 0.9664, "step": 154960 }, { "epoch": 0.990059159500658, "grad_norm": 1.2588512897491455, "learning_rate": 5.081132405797724e-05, "loss": 0.8738, "step": 154970 }, { "epoch": 0.9901230466503967, "grad_norm": 0.7315512895584106, "learning_rate": 5.080630702951178e-05, "loss": 0.9661, "step": 154980 }, { "epoch": 0.9901869338001354, "grad_norm": 0.7708696722984314, "learning_rate": 5.0801289992926106e-05, "loss": 0.7121, "step": 154990 }, { "epoch": 0.9902508209498742, "grad_norm": 1.7888171672821045, "learning_rate": 5.079627294827075e-05, "loss": 0.9311, "step": 155000 }, { "epoch": 0.9903147080996129, "grad_norm": 0.5545375943183899, "learning_rate": 5.0791255895596246e-05, "loss": 1.0415, "step": 155010 }, { "epoch": 0.9903785952493516, "grad_norm": 0.8352196216583252, "learning_rate": 5.07862388349531e-05, "loss": 0.9962, "step": 155020 }, { "epoch": 0.9904424823990903, "grad_norm": 0.9001242518424988, "learning_rate": 5.0781221766391865e-05, "loss": 0.8372, "step": 155030 }, { "epoch": 0.990506369548829, "grad_norm": 0.76966392993927, "learning_rate": 5.0776204689963035e-05, "loss": 0.8428, "step": 155040 }, { "epoch": 0.9905702566985677, "grad_norm": 1.0977442264556885, "learning_rate": 5.0771187605717154e-05, "loss": 0.7625, "step": 155050 }, { "epoch": 0.9906341438483064, "grad_norm": 0.6730074882507324, "learning_rate": 5.076617051370476e-05, "loss": 0.8101, "step": 155060 }, { "epoch": 0.9906980309980451, "grad_norm": 0.7701064348220825, "learning_rate": 5.076115341397636e-05, "loss": 1.0062, "step": 155070 }, { "epoch": 0.9907619181477838, "grad_norm": 0.697149932384491, "learning_rate": 5.075613630658247e-05, "loss": 0.9009, "step": 155080 }, { "epoch": 0.9908258052975225, "grad_norm": 1.0090751647949219, "learning_rate": 5.075111919157364e-05, "loss": 0.6531, "step": 155090 }, { "epoch": 0.9908896924472611, "grad_norm": 1.064965844154358, "learning_rate": 5.07461020690004e-05, "loss": 0.855, "step": 155100 }, { "epoch": 0.9909535795969998, "grad_norm": 0.9386013150215149, "learning_rate": 5.0741084938913265e-05, "loss": 1.009, "step": 155110 }, { "epoch": 0.9910174667467385, "grad_norm": 0.9192590117454529, "learning_rate": 5.0736067801362754e-05, "loss": 0.9065, "step": 155120 }, { "epoch": 0.9910813538964772, "grad_norm": 0.8243198394775391, "learning_rate": 5.073105065639942e-05, "loss": 0.8052, "step": 155130 }, { "epoch": 0.9911452410462159, "grad_norm": 0.8356258273124695, "learning_rate": 5.072603350407376e-05, "loss": 0.8429, "step": 155140 }, { "epoch": 0.9912091281959546, "grad_norm": 0.962837815284729, "learning_rate": 5.0721016344436314e-05, "loss": 0.9432, "step": 155150 }, { "epoch": 0.9912730153456933, "grad_norm": 0.7561596035957336, "learning_rate": 5.071599917753761e-05, "loss": 1.0909, "step": 155160 }, { "epoch": 0.991336902495432, "grad_norm": 0.8569062352180481, "learning_rate": 5.0710982003428187e-05, "loss": 0.7786, "step": 155170 }, { "epoch": 0.9914007896451708, "grad_norm": 1.1719166040420532, "learning_rate": 5.0705964822158544e-05, "loss": 0.7228, "step": 155180 }, { "epoch": 0.9914646767949095, "grad_norm": 0.7665051221847534, "learning_rate": 5.070094763377924e-05, "loss": 0.8832, "step": 155190 }, { "epoch": 0.9915285639446482, "grad_norm": 0.8756385445594788, "learning_rate": 5.0695930438340776e-05, "loss": 0.9353, "step": 155200 }, { "epoch": 0.9915924510943869, "grad_norm": 1.0932066440582275, "learning_rate": 5.069091323589369e-05, "loss": 0.9989, "step": 155210 }, { "epoch": 0.9916563382441256, "grad_norm": 1.00384521484375, "learning_rate": 5.0685896026488514e-05, "loss": 0.7546, "step": 155220 }, { "epoch": 0.9917202253938643, "grad_norm": 1.111561894416809, "learning_rate": 5.068087881017577e-05, "loss": 1.0, "step": 155230 }, { "epoch": 0.991784112543603, "grad_norm": 1.0187937021255493, "learning_rate": 5.067586158700599e-05, "loss": 0.8705, "step": 155240 }, { "epoch": 0.9918479996933417, "grad_norm": 0.9533101320266724, "learning_rate": 5.06708443570297e-05, "loss": 1.0491, "step": 155250 }, { "epoch": 0.9919118868430804, "grad_norm": 0.7527496218681335, "learning_rate": 5.066582712029743e-05, "loss": 0.852, "step": 155260 }, { "epoch": 0.9919757739928191, "grad_norm": 0.5917440056800842, "learning_rate": 5.0660809876859694e-05, "loss": 0.9962, "step": 155270 }, { "epoch": 0.9920396611425578, "grad_norm": 1.4870597124099731, "learning_rate": 5.065579262676704e-05, "loss": 0.8788, "step": 155280 }, { "epoch": 0.9921035482922965, "grad_norm": 0.7296018004417419, "learning_rate": 5.0650775370069966e-05, "loss": 0.8849, "step": 155290 }, { "epoch": 0.9921674354420352, "grad_norm": 1.0578160285949707, "learning_rate": 5.0645758106819055e-05, "loss": 0.7862, "step": 155300 }, { "epoch": 0.9922313225917739, "grad_norm": 0.9313512444496155, "learning_rate": 5.064074083706478e-05, "loss": 0.7519, "step": 155310 }, { "epoch": 0.9922952097415126, "grad_norm": 0.8470985293388367, "learning_rate": 5.063572356085769e-05, "loss": 0.9872, "step": 155320 }, { "epoch": 0.9923590968912513, "grad_norm": 0.9447453618049622, "learning_rate": 5.063070627824833e-05, "loss": 1.0726, "step": 155330 }, { "epoch": 0.9924229840409899, "grad_norm": 0.7884403467178345, "learning_rate": 5.0625688989287204e-05, "loss": 1.1167, "step": 155340 }, { "epoch": 0.9924868711907286, "grad_norm": 1.2746003866195679, "learning_rate": 5.0620671694024836e-05, "loss": 1.0695, "step": 155350 }, { "epoch": 0.9925507583404674, "grad_norm": 1.0743809938430786, "learning_rate": 5.061565439251178e-05, "loss": 0.9215, "step": 155360 }, { "epoch": 0.9926146454902061, "grad_norm": 0.7811270952224731, "learning_rate": 5.061063708479855e-05, "loss": 0.8182, "step": 155370 }, { "epoch": 0.9926785326399448, "grad_norm": 1.0729076862335205, "learning_rate": 5.060561977093568e-05, "loss": 0.7123, "step": 155380 }, { "epoch": 0.9927424197896835, "grad_norm": 0.9297366142272949, "learning_rate": 5.060060245097368e-05, "loss": 0.9888, "step": 155390 }, { "epoch": 0.9928063069394222, "grad_norm": 2.0715694427490234, "learning_rate": 5.059558512496311e-05, "loss": 0.9501, "step": 155400 }, { "epoch": 0.9928701940891609, "grad_norm": 0.7811942100524902, "learning_rate": 5.059056779295447e-05, "loss": 0.8314, "step": 155410 }, { "epoch": 0.9929340812388996, "grad_norm": 0.4893457591533661, "learning_rate": 5.058555045499831e-05, "loss": 1.1216, "step": 155420 }, { "epoch": 0.9929979683886383, "grad_norm": 0.7355089783668518, "learning_rate": 5.058053311114515e-05, "loss": 0.9711, "step": 155430 }, { "epoch": 0.993061855538377, "grad_norm": 0.8681902289390564, "learning_rate": 5.057551576144551e-05, "loss": 0.9379, "step": 155440 }, { "epoch": 0.9931257426881157, "grad_norm": 1.1609843969345093, "learning_rate": 5.0570498405949926e-05, "loss": 0.8046, "step": 155450 }, { "epoch": 0.9931896298378544, "grad_norm": 0.9073358774185181, "learning_rate": 5.056548104470894e-05, "loss": 0.8837, "step": 155460 }, { "epoch": 0.9932535169875931, "grad_norm": 0.7544703483581543, "learning_rate": 5.056046367777306e-05, "loss": 1.0009, "step": 155470 }, { "epoch": 0.9933174041373318, "grad_norm": 2.039977788925171, "learning_rate": 5.055544630519284e-05, "loss": 0.736, "step": 155480 }, { "epoch": 0.9933812912870705, "grad_norm": 0.5482000112533569, "learning_rate": 5.055042892701879e-05, "loss": 0.7449, "step": 155490 }, { "epoch": 0.9934451784368092, "grad_norm": 1.062432050704956, "learning_rate": 5.054541154330145e-05, "loss": 0.9661, "step": 155500 }, { "epoch": 0.993509065586548, "grad_norm": 0.7228785753250122, "learning_rate": 5.054039415409133e-05, "loss": 1.0069, "step": 155510 }, { "epoch": 0.9935729527362867, "grad_norm": 0.7066873908042908, "learning_rate": 5.053537675943899e-05, "loss": 0.911, "step": 155520 }, { "epoch": 0.9936368398860254, "grad_norm": 1.2193684577941895, "learning_rate": 5.053035935939493e-05, "loss": 0.8247, "step": 155530 }, { "epoch": 0.9937007270357641, "grad_norm": 0.8418088555335999, "learning_rate": 5.05253419540097e-05, "loss": 1.0304, "step": 155540 }, { "epoch": 0.9937646141855028, "grad_norm": 0.9440509080886841, "learning_rate": 5.052032454333383e-05, "loss": 0.7883, "step": 155550 }, { "epoch": 0.9938285013352415, "grad_norm": 2.196873426437378, "learning_rate": 5.051530712741783e-05, "loss": 0.7575, "step": 155560 }, { "epoch": 0.9938923884849802, "grad_norm": 0.6164715886116028, "learning_rate": 5.051028970631224e-05, "loss": 0.9493, "step": 155570 }, { "epoch": 0.9939562756347188, "grad_norm": 0.6802273392677307, "learning_rate": 5.05052722800676e-05, "loss": 0.7849, "step": 155580 }, { "epoch": 0.9940201627844575, "grad_norm": 0.7959185242652893, "learning_rate": 5.0500254848734415e-05, "loss": 0.9129, "step": 155590 }, { "epoch": 0.9940840499341962, "grad_norm": 0.7099493741989136, "learning_rate": 5.049523741236325e-05, "loss": 0.9131, "step": 155600 }, { "epoch": 0.9941479370839349, "grad_norm": 1.0658419132232666, "learning_rate": 5.049021997100459e-05, "loss": 0.7746, "step": 155610 }, { "epoch": 0.9942118242336736, "grad_norm": 1.2376176118850708, "learning_rate": 5.048520252470901e-05, "loss": 1.0721, "step": 155620 }, { "epoch": 0.9942757113834123, "grad_norm": 3.2693676948547363, "learning_rate": 5.048018507352702e-05, "loss": 0.9045, "step": 155630 }, { "epoch": 0.994339598533151, "grad_norm": 1.1164909601211548, "learning_rate": 5.047516761750915e-05, "loss": 0.8043, "step": 155640 }, { "epoch": 0.9944034856828897, "grad_norm": 2.0805160999298096, "learning_rate": 5.0470150156705933e-05, "loss": 0.8283, "step": 155650 }, { "epoch": 0.9944673728326284, "grad_norm": 1.3686854839324951, "learning_rate": 5.0465132691167894e-05, "loss": 0.7677, "step": 155660 }, { "epoch": 0.9945312599823671, "grad_norm": 0.9186270833015442, "learning_rate": 5.046011522094556e-05, "loss": 0.8535, "step": 155670 }, { "epoch": 0.9945951471321058, "grad_norm": 1.0402723550796509, "learning_rate": 5.045509774608947e-05, "loss": 0.7879, "step": 155680 }, { "epoch": 0.9946590342818445, "grad_norm": 0.8195865154266357, "learning_rate": 5.0450080266650165e-05, "loss": 0.7122, "step": 155690 }, { "epoch": 0.9947229214315833, "grad_norm": 1.0943642854690552, "learning_rate": 5.0445062782678154e-05, "loss": 0.9818, "step": 155700 }, { "epoch": 0.994786808581322, "grad_norm": 0.9934229850769043, "learning_rate": 5.044004529422397e-05, "loss": 0.8661, "step": 155710 }, { "epoch": 0.9948506957310607, "grad_norm": 1.000313639640808, "learning_rate": 5.0435027801338164e-05, "loss": 0.9347, "step": 155720 }, { "epoch": 0.9949145828807994, "grad_norm": 0.7824245691299438, "learning_rate": 5.043001030407124e-05, "loss": 0.6195, "step": 155730 }, { "epoch": 0.9949784700305381, "grad_norm": 0.848616898059845, "learning_rate": 5.042499280247373e-05, "loss": 0.6459, "step": 155740 }, { "epoch": 0.9950423571802768, "grad_norm": 1.090847373008728, "learning_rate": 5.04199752965962e-05, "loss": 0.7625, "step": 155750 }, { "epoch": 0.9951062443300155, "grad_norm": 1.3222191333770752, "learning_rate": 5.041495778648914e-05, "loss": 1.0372, "step": 155760 }, { "epoch": 0.9951701314797542, "grad_norm": 1.4843127727508545, "learning_rate": 5.0409940272203093e-05, "loss": 0.7487, "step": 155770 }, { "epoch": 0.9952340186294929, "grad_norm": 1.1687159538269043, "learning_rate": 5.040492275378861e-05, "loss": 0.823, "step": 155780 }, { "epoch": 0.9952979057792316, "grad_norm": 0.8214207887649536, "learning_rate": 5.039990523129618e-05, "loss": 0.7828, "step": 155790 }, { "epoch": 0.9953617929289703, "grad_norm": 1.5629782676696777, "learning_rate": 5.0394887704776385e-05, "loss": 0.7958, "step": 155800 }, { "epoch": 0.995425680078709, "grad_norm": 1.2977160215377808, "learning_rate": 5.038987017427971e-05, "loss": 0.9585, "step": 155810 }, { "epoch": 0.9954895672284477, "grad_norm": 0.7463441491127014, "learning_rate": 5.0384852639856706e-05, "loss": 0.7156, "step": 155820 }, { "epoch": 0.9955534543781863, "grad_norm": 0.878885805606842, "learning_rate": 5.037983510155791e-05, "loss": 0.7061, "step": 155830 }, { "epoch": 0.995617341527925, "grad_norm": 0.9723998308181763, "learning_rate": 5.037481755943385e-05, "loss": 1.1002, "step": 155840 }, { "epoch": 0.9956812286776637, "grad_norm": 0.8004569411277771, "learning_rate": 5.036980001353504e-05, "loss": 0.7974, "step": 155850 }, { "epoch": 0.9957451158274024, "grad_norm": 0.9547368288040161, "learning_rate": 5.036478246391203e-05, "loss": 1.1055, "step": 155860 }, { "epoch": 0.9958090029771411, "grad_norm": 1.7963746786117554, "learning_rate": 5.035976491061535e-05, "loss": 0.9589, "step": 155870 }, { "epoch": 0.9958728901268799, "grad_norm": 0.7605364918708801, "learning_rate": 5.035474735369552e-05, "loss": 1.0215, "step": 155880 }, { "epoch": 0.9959367772766186, "grad_norm": 0.868885338306427, "learning_rate": 5.0349729793203085e-05, "loss": 1.1309, "step": 155890 }, { "epoch": 0.9960006644263573, "grad_norm": 0.8125676512718201, "learning_rate": 5.034471222918856e-05, "loss": 0.7623, "step": 155900 }, { "epoch": 0.996064551576096, "grad_norm": 0.9288298487663269, "learning_rate": 5.033969466170248e-05, "loss": 0.9507, "step": 155910 }, { "epoch": 0.9961284387258347, "grad_norm": 1.1834713220596313, "learning_rate": 5.033467709079539e-05, "loss": 0.8471, "step": 155920 }, { "epoch": 0.9961923258755734, "grad_norm": 0.5627526640892029, "learning_rate": 5.032965951651781e-05, "loss": 0.8455, "step": 155930 }, { "epoch": 0.9962562130253121, "grad_norm": 1.100093126296997, "learning_rate": 5.032464193892028e-05, "loss": 0.7466, "step": 155940 }, { "epoch": 0.9963201001750508, "grad_norm": 1.0520758628845215, "learning_rate": 5.031962435805332e-05, "loss": 0.8332, "step": 155950 }, { "epoch": 0.9963839873247895, "grad_norm": 1.0780564546585083, "learning_rate": 5.0314606773967456e-05, "loss": 1.1039, "step": 155960 }, { "epoch": 0.9964478744745282, "grad_norm": 2.222808361053467, "learning_rate": 5.0309589186713235e-05, "loss": 0.8462, "step": 155970 }, { "epoch": 0.9965117616242669, "grad_norm": 0.8251408338546753, "learning_rate": 5.030457159634118e-05, "loss": 0.7902, "step": 155980 }, { "epoch": 0.9965756487740056, "grad_norm": 0.9923737645149231, "learning_rate": 5.029955400290183e-05, "loss": 0.9637, "step": 155990 }, { "epoch": 0.9966395359237443, "grad_norm": 0.7754630446434021, "learning_rate": 5.029453640644571e-05, "loss": 1.025, "step": 156000 }, { "epoch": 0.996703423073483, "grad_norm": 0.49166470766067505, "learning_rate": 5.028951880702336e-05, "loss": 0.709, "step": 156010 }, { "epoch": 0.9967673102232217, "grad_norm": 0.7618236541748047, "learning_rate": 5.028450120468531e-05, "loss": 0.9728, "step": 156020 }, { "epoch": 0.9968311973729604, "grad_norm": 2.817028760910034, "learning_rate": 5.027948359948209e-05, "loss": 0.9221, "step": 156030 }, { "epoch": 0.9968950845226991, "grad_norm": 0.8261348009109497, "learning_rate": 5.027446599146421e-05, "loss": 0.9886, "step": 156040 }, { "epoch": 0.9969589716724379, "grad_norm": 0.749161958694458, "learning_rate": 5.026944838068223e-05, "loss": 0.9215, "step": 156050 }, { "epoch": 0.9970228588221766, "grad_norm": 1.2337532043457031, "learning_rate": 5.026443076718666e-05, "loss": 0.9563, "step": 156060 }, { "epoch": 0.9970867459719152, "grad_norm": 0.6758466362953186, "learning_rate": 5.0259413151028066e-05, "loss": 0.6758, "step": 156070 }, { "epoch": 0.9971506331216539, "grad_norm": 1.36204993724823, "learning_rate": 5.0254395532256935e-05, "loss": 0.948, "step": 156080 }, { "epoch": 0.9972145202713926, "grad_norm": 0.7348865866661072, "learning_rate": 5.0249377910923834e-05, "loss": 0.9833, "step": 156090 }, { "epoch": 0.9972784074211313, "grad_norm": 0.6744032502174377, "learning_rate": 5.0244360287079287e-05, "loss": 0.9639, "step": 156100 }, { "epoch": 0.99734229457087, "grad_norm": 0.9378622174263, "learning_rate": 5.0239342660773804e-05, "loss": 0.8584, "step": 156110 }, { "epoch": 0.9974061817206087, "grad_norm": 1.2324236631393433, "learning_rate": 5.023432503205794e-05, "loss": 0.9609, "step": 156120 }, { "epoch": 0.9974700688703474, "grad_norm": 0.9098687171936035, "learning_rate": 5.0229307400982215e-05, "loss": 1.008, "step": 156130 }, { "epoch": 0.9975339560200861, "grad_norm": 0.9950636029243469, "learning_rate": 5.0224289767597164e-05, "loss": 0.892, "step": 156140 }, { "epoch": 0.9975978431698248, "grad_norm": 0.6629953980445862, "learning_rate": 5.021927213195333e-05, "loss": 0.7111, "step": 156150 }, { "epoch": 0.9976617303195635, "grad_norm": 0.7785073518753052, "learning_rate": 5.021425449410123e-05, "loss": 0.7638, "step": 156160 }, { "epoch": 0.9977256174693022, "grad_norm": 0.6204543113708496, "learning_rate": 5.0209236854091414e-05, "loss": 0.8849, "step": 156170 }, { "epoch": 0.9977895046190409, "grad_norm": 0.7661494016647339, "learning_rate": 5.020421921197439e-05, "loss": 0.7041, "step": 156180 }, { "epoch": 0.9978533917687796, "grad_norm": 1.0413057804107666, "learning_rate": 5.0199201567800704e-05, "loss": 0.8334, "step": 156190 }, { "epoch": 0.9979172789185183, "grad_norm": 1.1320688724517822, "learning_rate": 5.0194183921620895e-05, "loss": 0.8678, "step": 156200 }, { "epoch": 0.997981166068257, "grad_norm": 1.0221413373947144, "learning_rate": 5.0189166273485476e-05, "loss": 0.7552, "step": 156210 }, { "epoch": 0.9980450532179957, "grad_norm": 0.6623107194900513, "learning_rate": 5.018414862344499e-05, "loss": 0.8624, "step": 156220 }, { "epoch": 0.9981089403677345, "grad_norm": 1.305783748626709, "learning_rate": 5.017913097154997e-05, "loss": 0.8993, "step": 156230 }, { "epoch": 0.9981728275174732, "grad_norm": 1.0739842653274536, "learning_rate": 5.017411331785094e-05, "loss": 0.8799, "step": 156240 }, { "epoch": 0.9982367146672119, "grad_norm": 0.6890377998352051, "learning_rate": 5.016909566239846e-05, "loss": 0.777, "step": 156250 }, { "epoch": 0.9983006018169506, "grad_norm": 1.3199329376220703, "learning_rate": 5.016407800524302e-05, "loss": 1.0892, "step": 156260 }, { "epoch": 0.9983644889666893, "grad_norm": 1.2792729139328003, "learning_rate": 5.015906034643517e-05, "loss": 0.7636, "step": 156270 }, { "epoch": 0.998428376116428, "grad_norm": 1.0154145956039429, "learning_rate": 5.015404268602547e-05, "loss": 1.0007, "step": 156280 }, { "epoch": 0.9984922632661667, "grad_norm": 1.130146861076355, "learning_rate": 5.014902502406441e-05, "loss": 0.8497, "step": 156290 }, { "epoch": 0.9985561504159054, "grad_norm": 0.9147141575813293, "learning_rate": 5.014400736060252e-05, "loss": 1.0122, "step": 156300 }, { "epoch": 0.998620037565644, "grad_norm": 2.783782720565796, "learning_rate": 5.013898969569038e-05, "loss": 1.0387, "step": 156310 }, { "epoch": 0.9986839247153827, "grad_norm": 1.1431195735931396, "learning_rate": 5.013397202937847e-05, "loss": 1.1868, "step": 156320 }, { "epoch": 0.9987478118651214, "grad_norm": 0.596916139125824, "learning_rate": 5.0128954361717365e-05, "loss": 0.841, "step": 156330 }, { "epoch": 0.9988116990148601, "grad_norm": 0.7760692238807678, "learning_rate": 5.0123936692757566e-05, "loss": 0.821, "step": 156340 }, { "epoch": 0.9988755861645988, "grad_norm": 0.8499806523323059, "learning_rate": 5.011891902254963e-05, "loss": 1.0573, "step": 156350 }, { "epoch": 0.9989394733143375, "grad_norm": 0.9884029626846313, "learning_rate": 5.0113901351144065e-05, "loss": 1.0637, "step": 156360 }, { "epoch": 0.9990033604640762, "grad_norm": 0.7256179451942444, "learning_rate": 5.0108883678591424e-05, "loss": 1.0313, "step": 156370 }, { "epoch": 0.9990672476138149, "grad_norm": 1.0645331144332886, "learning_rate": 5.010386600494222e-05, "loss": 1.0018, "step": 156380 }, { "epoch": 0.9991311347635536, "grad_norm": 1.1328057050704956, "learning_rate": 5.0098848330247006e-05, "loss": 0.9414, "step": 156390 }, { "epoch": 0.9991950219132923, "grad_norm": 0.8693520426750183, "learning_rate": 5.00938306545563e-05, "loss": 0.7726, "step": 156400 }, { "epoch": 0.999258909063031, "grad_norm": 1.004093050956726, "learning_rate": 5.008881297792063e-05, "loss": 0.7736, "step": 156410 }, { "epoch": 0.9993227962127698, "grad_norm": 0.7669504284858704, "learning_rate": 5.008379530039055e-05, "loss": 1.1268, "step": 156420 }, { "epoch": 0.9993866833625085, "grad_norm": 0.9872044324874878, "learning_rate": 5.007877762201657e-05, "loss": 0.7277, "step": 156430 }, { "epoch": 0.9994505705122472, "grad_norm": 0.8186001181602478, "learning_rate": 5.007375994284923e-05, "loss": 0.9602, "step": 156440 }, { "epoch": 0.9995144576619859, "grad_norm": 0.840390145778656, "learning_rate": 5.006874226293907e-05, "loss": 0.8884, "step": 156450 }, { "epoch": 0.9995783448117246, "grad_norm": 1.0728955268859863, "learning_rate": 5.0063724582336614e-05, "loss": 1.0428, "step": 156460 }, { "epoch": 0.9996422319614633, "grad_norm": 0.8296906352043152, "learning_rate": 5.005870690109239e-05, "loss": 0.7635, "step": 156470 }, { "epoch": 0.999706119111202, "grad_norm": 0.9049399495124817, "learning_rate": 5.0053689219256946e-05, "loss": 0.7707, "step": 156480 }, { "epoch": 0.9997700062609407, "grad_norm": 0.6958884000778198, "learning_rate": 5.0048671536880797e-05, "loss": 0.6962, "step": 156490 }, { "epoch": 0.9998338934106794, "grad_norm": 1.1141598224639893, "learning_rate": 5.0043653854014486e-05, "loss": 0.6017, "step": 156500 }, { "epoch": 0.9998977805604181, "grad_norm": 1.1344066858291626, "learning_rate": 5.0038636170708544e-05, "loss": 0.9523, "step": 156510 }, { "epoch": 0.9999616677101568, "grad_norm": 0.8506420850753784, "learning_rate": 5.003361848701351e-05, "loss": 0.7334, "step": 156520 }, { "epoch": 1.0000255548598955, "grad_norm": 1.0688685178756714, "learning_rate": 5.00286008029799e-05, "loss": 0.7696, "step": 156530 }, { "epoch": 1.0000894420096342, "grad_norm": 1.3739877939224243, "learning_rate": 5.002358311865826e-05, "loss": 1.1261, "step": 156540 }, { "epoch": 1.000153329159373, "grad_norm": 1.3839253187179565, "learning_rate": 5.001856543409911e-05, "loss": 0.9307, "step": 156550 }, { "epoch": 1.0002172163091116, "grad_norm": 1.329694151878357, "learning_rate": 5.001354774935299e-05, "loss": 0.6314, "step": 156560 }, { "epoch": 1.0002811034588504, "grad_norm": 0.7411394119262695, "learning_rate": 5.000853006447044e-05, "loss": 0.9073, "step": 156570 }, { "epoch": 1.000344990608589, "grad_norm": 1.0584617853164673, "learning_rate": 5.000351237950198e-05, "loss": 0.9411, "step": 156580 }, { "epoch": 1.0004088777583278, "grad_norm": 0.8067578673362732, "learning_rate": 4.999849469449815e-05, "loss": 0.8204, "step": 156590 }, { "epoch": 1.0004727649080665, "grad_norm": 0.48932382464408875, "learning_rate": 4.999347700950948e-05, "loss": 0.6526, "step": 156600 }, { "epoch": 1.0005366520578052, "grad_norm": 1.271622657775879, "learning_rate": 4.99884593245865e-05, "loss": 0.9878, "step": 156610 }, { "epoch": 1.0006005392075439, "grad_norm": 0.7740110754966736, "learning_rate": 4.998344163977975e-05, "loss": 0.8846, "step": 156620 }, { "epoch": 1.0006644263572826, "grad_norm": 1.9784252643585205, "learning_rate": 4.997842395513975e-05, "loss": 0.8338, "step": 156630 }, { "epoch": 1.0007283135070213, "grad_norm": 1.1081469058990479, "learning_rate": 4.9973406270717044e-05, "loss": 0.8351, "step": 156640 }, { "epoch": 1.00079220065676, "grad_norm": 0.9051830172538757, "learning_rate": 4.996838858656215e-05, "loss": 0.754, "step": 156650 }, { "epoch": 1.0008560878064987, "grad_norm": 1.0151948928833008, "learning_rate": 4.996337090272562e-05, "loss": 0.9473, "step": 156660 }, { "epoch": 1.0009199749562374, "grad_norm": 0.955154299736023, "learning_rate": 4.9958353219257966e-05, "loss": 1.0262, "step": 156670 }, { "epoch": 1.000983862105976, "grad_norm": 0.9474563598632812, "learning_rate": 4.995333553620974e-05, "loss": 1.0282, "step": 156680 }, { "epoch": 1.0010477492557146, "grad_norm": 0.6811796426773071, "learning_rate": 4.994831785363147e-05, "loss": 0.7229, "step": 156690 }, { "epoch": 1.0011116364054533, "grad_norm": 1.0170528888702393, "learning_rate": 4.994330017157368e-05, "loss": 0.7113, "step": 156700 }, { "epoch": 1.001175523555192, "grad_norm": 0.5645203590393066, "learning_rate": 4.99382824900869e-05, "loss": 0.7999, "step": 156710 }, { "epoch": 1.0012394107049307, "grad_norm": 1.0513136386871338, "learning_rate": 4.9933264809221674e-05, "loss": 0.8627, "step": 156720 }, { "epoch": 1.0013032978546694, "grad_norm": 0.749899685382843, "learning_rate": 4.992824712902853e-05, "loss": 1.1349, "step": 156730 }, { "epoch": 1.0013671850044081, "grad_norm": 1.0058104991912842, "learning_rate": 4.9923229449558e-05, "loss": 0.8528, "step": 156740 }, { "epoch": 1.0014310721541468, "grad_norm": 1.5371594429016113, "learning_rate": 4.991821177086061e-05, "loss": 0.7446, "step": 156750 }, { "epoch": 1.0014949593038855, "grad_norm": 5.122760772705078, "learning_rate": 4.991319409298691e-05, "loss": 0.7597, "step": 156760 }, { "epoch": 1.0015588464536243, "grad_norm": 0.7622484564781189, "learning_rate": 4.990817641598741e-05, "loss": 0.9563, "step": 156770 }, { "epoch": 1.001622733603363, "grad_norm": 0.8959377408027649, "learning_rate": 4.990315873991266e-05, "loss": 0.959, "step": 156780 }, { "epoch": 1.0016866207531017, "grad_norm": 0.8473190069198608, "learning_rate": 4.989814106481318e-05, "loss": 0.8385, "step": 156790 }, { "epoch": 1.0017505079028404, "grad_norm": 0.8622284531593323, "learning_rate": 4.989312339073951e-05, "loss": 0.9859, "step": 156800 }, { "epoch": 1.001814395052579, "grad_norm": 0.7962250113487244, "learning_rate": 4.988810571774218e-05, "loss": 0.7629, "step": 156810 }, { "epoch": 1.0018782822023178, "grad_norm": 0.9139593243598938, "learning_rate": 4.988308804587172e-05, "loss": 0.7469, "step": 156820 }, { "epoch": 1.0019421693520565, "grad_norm": 0.7976312041282654, "learning_rate": 4.9878070375178666e-05, "loss": 0.8388, "step": 156830 }, { "epoch": 1.0020060565017952, "grad_norm": 0.8629096150398254, "learning_rate": 4.987305270571355e-05, "loss": 1.1644, "step": 156840 }, { "epoch": 1.002069943651534, "grad_norm": 0.8316608667373657, "learning_rate": 4.98680350375269e-05, "loss": 0.9476, "step": 156850 }, { "epoch": 1.0021338308012726, "grad_norm": 1.2766749858856201, "learning_rate": 4.9863017370669255e-05, "loss": 0.6629, "step": 156860 }, { "epoch": 1.0021977179510113, "grad_norm": 1.242931604385376, "learning_rate": 4.985799970519113e-05, "loss": 0.8247, "step": 156870 }, { "epoch": 1.00226160510075, "grad_norm": 0.8744287490844727, "learning_rate": 4.9852982041143103e-05, "loss": 0.7827, "step": 156880 }, { "epoch": 1.0023254922504887, "grad_norm": 0.8393293619155884, "learning_rate": 4.984796437857566e-05, "loss": 0.8903, "step": 156890 }, { "epoch": 1.0023893794002274, "grad_norm": 0.9289880394935608, "learning_rate": 4.984294671753933e-05, "loss": 0.7764, "step": 156900 }, { "epoch": 1.0024532665499661, "grad_norm": 0.5650531053543091, "learning_rate": 4.983792905808468e-05, "loss": 0.8134, "step": 156910 }, { "epoch": 1.0025171536997048, "grad_norm": 0.8123154640197754, "learning_rate": 4.983291140026222e-05, "loss": 0.8212, "step": 156920 }, { "epoch": 1.0025810408494435, "grad_norm": 0.8330434560775757, "learning_rate": 4.982789374412248e-05, "loss": 0.773, "step": 156930 }, { "epoch": 1.0026449279991823, "grad_norm": 1.7509496212005615, "learning_rate": 4.9822876089716e-05, "loss": 0.9797, "step": 156940 }, { "epoch": 1.002708815148921, "grad_norm": 1.1598998308181763, "learning_rate": 4.9817858437093315e-05, "loss": 0.7175, "step": 156950 }, { "epoch": 1.0027727022986597, "grad_norm": 0.7346895337104797, "learning_rate": 4.981284078630496e-05, "loss": 0.7898, "step": 156960 }, { "epoch": 1.0028365894483984, "grad_norm": 1.4007014036178589, "learning_rate": 4.980782313740145e-05, "loss": 0.9026, "step": 156970 }, { "epoch": 1.002900476598137, "grad_norm": 0.5492518544197083, "learning_rate": 4.980280549043333e-05, "loss": 0.9399, "step": 156980 }, { "epoch": 1.0029643637478758, "grad_norm": 0.8820534348487854, "learning_rate": 4.9797787845451114e-05, "loss": 0.8288, "step": 156990 }, { "epoch": 1.0030282508976145, "grad_norm": 0.8820619583129883, "learning_rate": 4.979277020250537e-05, "loss": 0.9111, "step": 157000 }, { "epoch": 1.0030921380473532, "grad_norm": 0.825981616973877, "learning_rate": 4.978775256164661e-05, "loss": 0.8334, "step": 157010 }, { "epoch": 1.003156025197092, "grad_norm": 1.0771020650863647, "learning_rate": 4.978273492292535e-05, "loss": 1.1424, "step": 157020 }, { "epoch": 1.0032199123468306, "grad_norm": 1.030242681503296, "learning_rate": 4.977771728639215e-05, "loss": 0.7994, "step": 157030 }, { "epoch": 1.0032837994965693, "grad_norm": 0.6895780563354492, "learning_rate": 4.9772699652097526e-05, "loss": 0.9221, "step": 157040 }, { "epoch": 1.003347686646308, "grad_norm": 0.8492401242256165, "learning_rate": 4.976768202009201e-05, "loss": 1.0848, "step": 157050 }, { "epoch": 1.0034115737960467, "grad_norm": 1.2907142639160156, "learning_rate": 4.976266439042615e-05, "loss": 1.0327, "step": 157060 }, { "epoch": 1.0034754609457854, "grad_norm": 0.6771016120910645, "learning_rate": 4.975764676315045e-05, "loss": 0.7145, "step": 157070 }, { "epoch": 1.0035393480955241, "grad_norm": 1.0897397994995117, "learning_rate": 4.975262913831546e-05, "loss": 1.1423, "step": 157080 }, { "epoch": 1.0036032352452628, "grad_norm": 1.0723178386688232, "learning_rate": 4.974761151597171e-05, "loss": 0.9393, "step": 157090 }, { "epoch": 1.0036671223950016, "grad_norm": 0.8129236698150635, "learning_rate": 4.974259389616973e-05, "loss": 0.98, "step": 157100 }, { "epoch": 1.0037310095447403, "grad_norm": 0.8235514760017395, "learning_rate": 4.973757627896005e-05, "loss": 0.7475, "step": 157110 }, { "epoch": 1.003794896694479, "grad_norm": 0.7380703091621399, "learning_rate": 4.9732558664393214e-05, "loss": 0.8268, "step": 157120 }, { "epoch": 1.0038587838442177, "grad_norm": 1.4135018587112427, "learning_rate": 4.9727541052519736e-05, "loss": 0.9903, "step": 157130 }, { "epoch": 1.0039226709939564, "grad_norm": 1.2729389667510986, "learning_rate": 4.972252344339015e-05, "loss": 0.9318, "step": 157140 }, { "epoch": 1.003986558143695, "grad_norm": 0.9822022318840027, "learning_rate": 4.9717505837055e-05, "loss": 0.859, "step": 157150 }, { "epoch": 1.0040504452934336, "grad_norm": 1.236742615699768, "learning_rate": 4.971248823356481e-05, "loss": 0.7326, "step": 157160 }, { "epoch": 1.0041143324431723, "grad_norm": 1.0240849256515503, "learning_rate": 4.970747063297011e-05, "loss": 0.8845, "step": 157170 }, { "epoch": 1.004178219592911, "grad_norm": 0.9231441020965576, "learning_rate": 4.970245303532144e-05, "loss": 0.7605, "step": 157180 }, { "epoch": 1.0042421067426497, "grad_norm": 0.9898058772087097, "learning_rate": 4.969743544066931e-05, "loss": 0.8963, "step": 157190 }, { "epoch": 1.0043059938923884, "grad_norm": 2.537976026535034, "learning_rate": 4.969241784906428e-05, "loss": 0.9316, "step": 157200 }, { "epoch": 1.004369881042127, "grad_norm": 1.539099097251892, "learning_rate": 4.968740026055686e-05, "loss": 0.9704, "step": 157210 }, { "epoch": 1.0044337681918658, "grad_norm": 1.2426973581314087, "learning_rate": 4.96823826751976e-05, "loss": 0.9339, "step": 157220 }, { "epoch": 1.0044976553416045, "grad_norm": 0.9273771643638611, "learning_rate": 4.967736509303701e-05, "loss": 0.788, "step": 157230 }, { "epoch": 1.0045615424913432, "grad_norm": 0.931636393070221, "learning_rate": 4.9672347514125645e-05, "loss": 0.9634, "step": 157240 }, { "epoch": 1.004625429641082, "grad_norm": 2.1478915214538574, "learning_rate": 4.966732993851402e-05, "loss": 0.8644, "step": 157250 }, { "epoch": 1.0046893167908206, "grad_norm": 0.8873687386512756, "learning_rate": 4.966231236625267e-05, "loss": 0.7035, "step": 157260 }, { "epoch": 1.0047532039405593, "grad_norm": 0.49548423290252686, "learning_rate": 4.965729479739212e-05, "loss": 0.8945, "step": 157270 }, { "epoch": 1.004817091090298, "grad_norm": 1.0232203006744385, "learning_rate": 4.965227723198292e-05, "loss": 0.8334, "step": 157280 }, { "epoch": 1.0048809782400367, "grad_norm": 1.0437577962875366, "learning_rate": 4.964725967007558e-05, "loss": 0.7539, "step": 157290 }, { "epoch": 1.0049448653897755, "grad_norm": 1.0047162771224976, "learning_rate": 4.964224211172064e-05, "loss": 1.1266, "step": 157300 }, { "epoch": 1.0050087525395142, "grad_norm": 1.6184574365615845, "learning_rate": 4.9637224556968634e-05, "loss": 0.7608, "step": 157310 }, { "epoch": 1.0050726396892529, "grad_norm": 0.6376124620437622, "learning_rate": 4.963220700587009e-05, "loss": 0.6971, "step": 157320 }, { "epoch": 1.0051365268389916, "grad_norm": 0.8973642587661743, "learning_rate": 4.9627189458475544e-05, "loss": 1.0248, "step": 157330 }, { "epoch": 1.0052004139887303, "grad_norm": 0.6999796032905579, "learning_rate": 4.962217191483552e-05, "loss": 0.9811, "step": 157340 }, { "epoch": 1.005264301138469, "grad_norm": 1.4351537227630615, "learning_rate": 4.961715437500055e-05, "loss": 0.8413, "step": 157350 }, { "epoch": 1.0053281882882077, "grad_norm": 0.7439177632331848, "learning_rate": 4.9612136839021165e-05, "loss": 0.9353, "step": 157360 }, { "epoch": 1.0053920754379464, "grad_norm": 1.172404408454895, "learning_rate": 4.9607119306947915e-05, "loss": 0.9226, "step": 157370 }, { "epoch": 1.005455962587685, "grad_norm": 0.95972740650177, "learning_rate": 4.96021017788313e-05, "loss": 0.788, "step": 157380 }, { "epoch": 1.0055198497374238, "grad_norm": 0.9233173727989197, "learning_rate": 4.959708425472186e-05, "loss": 1.1035, "step": 157390 }, { "epoch": 1.0055837368871625, "grad_norm": 0.7995260953903198, "learning_rate": 4.959206673467013e-05, "loss": 0.8307, "step": 157400 }, { "epoch": 1.0056476240369012, "grad_norm": 2.598005533218384, "learning_rate": 4.958704921872665e-05, "loss": 1.0584, "step": 157410 }, { "epoch": 1.00571151118664, "grad_norm": 1.0082064867019653, "learning_rate": 4.9582031706941936e-05, "loss": 0.894, "step": 157420 }, { "epoch": 1.0057753983363786, "grad_norm": 2.215607166290283, "learning_rate": 4.957701419936652e-05, "loss": 1.2318, "step": 157430 }, { "epoch": 1.0058392854861173, "grad_norm": 0.9388594627380371, "learning_rate": 4.9571996696050945e-05, "loss": 0.9138, "step": 157440 }, { "epoch": 1.005903172635856, "grad_norm": 0.8170478343963623, "learning_rate": 4.9566979197045726e-05, "loss": 0.7381, "step": 157450 }, { "epoch": 1.0059670597855948, "grad_norm": 0.7179467082023621, "learning_rate": 4.9561961702401405e-05, "loss": 1.0941, "step": 157460 }, { "epoch": 1.0060309469353335, "grad_norm": 0.8564074635505676, "learning_rate": 4.9556944212168506e-05, "loss": 1.0778, "step": 157470 }, { "epoch": 1.0060948340850722, "grad_norm": 1.0831458568572998, "learning_rate": 4.955192672639756e-05, "loss": 1.1512, "step": 157480 }, { "epoch": 1.0061587212348109, "grad_norm": 0.6284303665161133, "learning_rate": 4.9546909245139103e-05, "loss": 0.875, "step": 157490 }, { "epoch": 1.0062226083845496, "grad_norm": 0.7781526446342468, "learning_rate": 4.954189176844367e-05, "loss": 0.8538, "step": 157500 }, { "epoch": 1.0062864955342883, "grad_norm": 0.9631011486053467, "learning_rate": 4.9536874296361763e-05, "loss": 0.8143, "step": 157510 }, { "epoch": 1.006350382684027, "grad_norm": 1.1078636646270752, "learning_rate": 4.953185682894395e-05, "loss": 0.9528, "step": 157520 }, { "epoch": 1.0064142698337657, "grad_norm": 0.8949397206306458, "learning_rate": 4.952683936624074e-05, "loss": 0.6958, "step": 157530 }, { "epoch": 1.0064781569835044, "grad_norm": 0.6079200506210327, "learning_rate": 4.952182190830266e-05, "loss": 0.8135, "step": 157540 }, { "epoch": 1.006542044133243, "grad_norm": 1.173264503479004, "learning_rate": 4.951680445518026e-05, "loss": 0.783, "step": 157550 }, { "epoch": 1.0066059312829818, "grad_norm": 0.9052695631980896, "learning_rate": 4.951178700692404e-05, "loss": 0.6618, "step": 157560 }, { "epoch": 1.0066698184327205, "grad_norm": 0.8281904458999634, "learning_rate": 4.950676956358456e-05, "loss": 0.9381, "step": 157570 }, { "epoch": 1.0067337055824592, "grad_norm": 0.803945004940033, "learning_rate": 4.950175212521234e-05, "loss": 0.8125, "step": 157580 }, { "epoch": 1.006797592732198, "grad_norm": 0.8486142158508301, "learning_rate": 4.94967346918579e-05, "loss": 0.7433, "step": 157590 }, { "epoch": 1.0068614798819366, "grad_norm": 1.5051418542861938, "learning_rate": 4.9491717263571777e-05, "loss": 0.8196, "step": 157600 }, { "epoch": 1.0069253670316753, "grad_norm": 0.7606010437011719, "learning_rate": 4.94866998404045e-05, "loss": 0.7728, "step": 157610 }, { "epoch": 1.006989254181414, "grad_norm": 0.7703734040260315, "learning_rate": 4.94816824224066e-05, "loss": 0.83, "step": 157620 }, { "epoch": 1.0070531413311528, "grad_norm": 0.6582179665565491, "learning_rate": 4.947666500962861e-05, "loss": 0.8594, "step": 157630 }, { "epoch": 1.0071170284808915, "grad_norm": 0.9291526079177856, "learning_rate": 4.947164760212106e-05, "loss": 0.802, "step": 157640 }, { "epoch": 1.00718091563063, "grad_norm": 0.7917651534080505, "learning_rate": 4.9466630199934464e-05, "loss": 0.6386, "step": 157650 }, { "epoch": 1.0072448027803687, "grad_norm": 0.8128526210784912, "learning_rate": 4.946161280311937e-05, "loss": 1.0251, "step": 157660 }, { "epoch": 1.0073086899301074, "grad_norm": 1.1424459218978882, "learning_rate": 4.94565954117263e-05, "loss": 0.9383, "step": 157670 }, { "epoch": 1.007372577079846, "grad_norm": 0.5946982502937317, "learning_rate": 4.9451578025805786e-05, "loss": 0.6585, "step": 157680 }, { "epoch": 1.0074364642295848, "grad_norm": 0.8714378476142883, "learning_rate": 4.9446560645408354e-05, "loss": 0.6871, "step": 157690 }, { "epoch": 1.0075003513793235, "grad_norm": 0.8224378824234009, "learning_rate": 4.944154327058454e-05, "loss": 0.8583, "step": 157700 }, { "epoch": 1.0075642385290622, "grad_norm": 1.5147910118103027, "learning_rate": 4.9436525901384865e-05, "loss": 0.7587, "step": 157710 }, { "epoch": 1.007628125678801, "grad_norm": 0.6513627767562866, "learning_rate": 4.943150853785984e-05, "loss": 0.691, "step": 157720 }, { "epoch": 1.0076920128285396, "grad_norm": 1.0497170686721802, "learning_rate": 4.942649118006004e-05, "loss": 0.9602, "step": 157730 }, { "epoch": 1.0077558999782783, "grad_norm": 2.1938791275024414, "learning_rate": 4.942147382803597e-05, "loss": 0.8407, "step": 157740 }, { "epoch": 1.007819787128017, "grad_norm": 1.1465129852294922, "learning_rate": 4.941645648183816e-05, "loss": 0.9224, "step": 157750 }, { "epoch": 1.0078836742777557, "grad_norm": 1.2474966049194336, "learning_rate": 4.941143914151713e-05, "loss": 0.8904, "step": 157760 }, { "epoch": 1.0079475614274944, "grad_norm": 0.6194661855697632, "learning_rate": 4.9406421807123424e-05, "loss": 0.7821, "step": 157770 }, { "epoch": 1.0080114485772331, "grad_norm": 1.8728440999984741, "learning_rate": 4.940140447870756e-05, "loss": 0.8937, "step": 157780 }, { "epoch": 1.0080753357269718, "grad_norm": 1.6072417497634888, "learning_rate": 4.939638715632007e-05, "loss": 0.7891, "step": 157790 }, { "epoch": 1.0081392228767105, "grad_norm": 1.476420283317566, "learning_rate": 4.939136984001148e-05, "loss": 0.824, "step": 157800 }, { "epoch": 1.0082031100264492, "grad_norm": 0.5812131762504578, "learning_rate": 4.938635252983233e-05, "loss": 0.7095, "step": 157810 }, { "epoch": 1.008266997176188, "grad_norm": 0.7907794713973999, "learning_rate": 4.9381335225833136e-05, "loss": 1.0357, "step": 157820 }, { "epoch": 1.0083308843259267, "grad_norm": 1.1523560285568237, "learning_rate": 4.937631792806444e-05, "loss": 0.7895, "step": 157830 }, { "epoch": 1.0083947714756654, "grad_norm": 0.927148163318634, "learning_rate": 4.937130063657675e-05, "loss": 1.1295, "step": 157840 }, { "epoch": 1.008458658625404, "grad_norm": 0.7727904915809631, "learning_rate": 4.9366283351420616e-05, "loss": 1.0136, "step": 157850 }, { "epoch": 1.0085225457751428, "grad_norm": 0.8255678415298462, "learning_rate": 4.936126607264656e-05, "loss": 0.8372, "step": 157860 }, { "epoch": 1.0085864329248815, "grad_norm": 0.6773051619529724, "learning_rate": 4.9356248800305106e-05, "loss": 0.7595, "step": 157870 }, { "epoch": 1.0086503200746202, "grad_norm": 0.8545355200767517, "learning_rate": 4.9351231534446776e-05, "loss": 1.1426, "step": 157880 }, { "epoch": 1.008714207224359, "grad_norm": 1.0750840902328491, "learning_rate": 4.9346214275122105e-05, "loss": 0.9281, "step": 157890 }, { "epoch": 1.0087780943740976, "grad_norm": 0.7120381593704224, "learning_rate": 4.9341197022381626e-05, "loss": 0.8779, "step": 157900 }, { "epoch": 1.0088419815238363, "grad_norm": 0.8368415236473083, "learning_rate": 4.933617977627586e-05, "loss": 0.7732, "step": 157910 }, { "epoch": 1.008905868673575, "grad_norm": 0.7155873775482178, "learning_rate": 4.933116253685534e-05, "loss": 0.9169, "step": 157920 }, { "epoch": 1.0089697558233137, "grad_norm": 0.7591100931167603, "learning_rate": 4.932614530417059e-05, "loss": 0.9115, "step": 157930 }, { "epoch": 1.0090336429730524, "grad_norm": 1.039440393447876, "learning_rate": 4.932112807827215e-05, "loss": 0.9292, "step": 157940 }, { "epoch": 1.0090975301227911, "grad_norm": 1.248268961906433, "learning_rate": 4.931611085921052e-05, "loss": 0.99, "step": 157950 }, { "epoch": 1.0091614172725298, "grad_norm": 0.9334812760353088, "learning_rate": 4.9311093647036255e-05, "loss": 0.9961, "step": 157960 }, { "epoch": 1.0092253044222685, "grad_norm": 0.8057778477668762, "learning_rate": 4.930607644179987e-05, "loss": 0.847, "step": 157970 }, { "epoch": 1.0092891915720072, "grad_norm": 1.522869348526001, "learning_rate": 4.93010592435519e-05, "loss": 1.0672, "step": 157980 }, { "epoch": 1.009353078721746, "grad_norm": 1.1457351446151733, "learning_rate": 4.929604205234286e-05, "loss": 0.7168, "step": 157990 }, { "epoch": 1.0094169658714847, "grad_norm": 0.6407791376113892, "learning_rate": 4.92910248682233e-05, "loss": 0.7698, "step": 158000 }, { "epoch": 1.0094808530212234, "grad_norm": 1.0870330333709717, "learning_rate": 4.928600769124372e-05, "loss": 0.8819, "step": 158010 }, { "epoch": 1.009544740170962, "grad_norm": 1.1727497577667236, "learning_rate": 4.928099052145466e-05, "loss": 1.0363, "step": 158020 }, { "epoch": 1.0096086273207008, "grad_norm": 0.7357398867607117, "learning_rate": 4.927597335890665e-05, "loss": 1.0793, "step": 158030 }, { "epoch": 1.0096725144704395, "grad_norm": 1.1307222843170166, "learning_rate": 4.927095620365021e-05, "loss": 0.8468, "step": 158040 }, { "epoch": 1.0097364016201782, "grad_norm": 0.747114896774292, "learning_rate": 4.926593905573588e-05, "loss": 0.8705, "step": 158050 }, { "epoch": 1.009800288769917, "grad_norm": 0.9897515177726746, "learning_rate": 4.926092191521418e-05, "loss": 0.8029, "step": 158060 }, { "epoch": 1.0098641759196556, "grad_norm": 1.3690990209579468, "learning_rate": 4.925590478213563e-05, "loss": 0.8735, "step": 158070 }, { "epoch": 1.0099280630693943, "grad_norm": 1.2536840438842773, "learning_rate": 4.925088765655076e-05, "loss": 0.8238, "step": 158080 }, { "epoch": 1.009991950219133, "grad_norm": 1.0726910829544067, "learning_rate": 4.92458705385101e-05, "loss": 1.0702, "step": 158090 }, { "epoch": 1.0100558373688717, "grad_norm": 0.6436864733695984, "learning_rate": 4.924085342806419e-05, "loss": 0.7983, "step": 158100 }, { "epoch": 1.0101197245186104, "grad_norm": 1.1560698747634888, "learning_rate": 4.923583632526353e-05, "loss": 0.943, "step": 158110 }, { "epoch": 1.0101836116683491, "grad_norm": 2.0632247924804688, "learning_rate": 4.923081923015866e-05, "loss": 0.8969, "step": 158120 }, { "epoch": 1.0102474988180878, "grad_norm": 1.1319400072097778, "learning_rate": 4.922580214280011e-05, "loss": 0.8463, "step": 158130 }, { "epoch": 1.0103113859678263, "grad_norm": 0.8231350779533386, "learning_rate": 4.92207850632384e-05, "loss": 0.7702, "step": 158140 }, { "epoch": 1.010375273117565, "grad_norm": 0.5904900431632996, "learning_rate": 4.9215767991524056e-05, "loss": 0.8007, "step": 158150 }, { "epoch": 1.0104391602673037, "grad_norm": 0.6886080503463745, "learning_rate": 4.921075092770761e-05, "loss": 0.979, "step": 158160 }, { "epoch": 1.0105030474170424, "grad_norm": 1.1137446165084839, "learning_rate": 4.920573387183959e-05, "loss": 0.6878, "step": 158170 }, { "epoch": 1.0105669345667811, "grad_norm": 1.4936422109603882, "learning_rate": 4.920071682397051e-05, "loss": 0.9938, "step": 158180 }, { "epoch": 1.0106308217165199, "grad_norm": 0.7173126935958862, "learning_rate": 4.9195699784150906e-05, "loss": 1.0017, "step": 158190 }, { "epoch": 1.0106947088662586, "grad_norm": 1.1607474088668823, "learning_rate": 4.91906827524313e-05, "loss": 0.7932, "step": 158200 }, { "epoch": 1.0107585960159973, "grad_norm": 1.1725101470947266, "learning_rate": 4.918566572886222e-05, "loss": 0.949, "step": 158210 }, { "epoch": 1.010822483165736, "grad_norm": 0.7787816524505615, "learning_rate": 4.9180648713494184e-05, "loss": 1.1126, "step": 158220 }, { "epoch": 1.0108863703154747, "grad_norm": 1.0860399007797241, "learning_rate": 4.9175631706377735e-05, "loss": 1.1293, "step": 158230 }, { "epoch": 1.0109502574652134, "grad_norm": 0.8303453922271729, "learning_rate": 4.917061470756338e-05, "loss": 0.7749, "step": 158240 }, { "epoch": 1.011014144614952, "grad_norm": 0.8753618597984314, "learning_rate": 4.916559771710164e-05, "loss": 0.7246, "step": 158250 }, { "epoch": 1.0110780317646908, "grad_norm": 0.9717980027198792, "learning_rate": 4.916058073504307e-05, "loss": 0.7445, "step": 158260 }, { "epoch": 1.0111419189144295, "grad_norm": 1.009787678718567, "learning_rate": 4.915556376143818e-05, "loss": 0.8512, "step": 158270 }, { "epoch": 1.0112058060641682, "grad_norm": 0.6770163774490356, "learning_rate": 4.9150546796337486e-05, "loss": 0.8507, "step": 158280 }, { "epoch": 1.011269693213907, "grad_norm": 1.2202988862991333, "learning_rate": 4.914603153505973e-05, "loss": 0.8679, "step": 158290 }, { "epoch": 1.0113335803636456, "grad_norm": 1.823740839958191, "learning_rate": 4.914101458625622e-05, "loss": 0.9841, "step": 158300 }, { "epoch": 1.0113974675133843, "grad_norm": 0.8558661937713623, "learning_rate": 4.913599764610344e-05, "loss": 0.9539, "step": 158310 }, { "epoch": 1.011461354663123, "grad_norm": 2.187252998352051, "learning_rate": 4.913098071465191e-05, "loss": 1.0052, "step": 158320 }, { "epoch": 1.0115252418128617, "grad_norm": 0.9841252565383911, "learning_rate": 4.912596379195216e-05, "loss": 1.0755, "step": 158330 }, { "epoch": 1.0115891289626004, "grad_norm": 0.6061422824859619, "learning_rate": 4.91209468780547e-05, "loss": 1.1245, "step": 158340 }, { "epoch": 1.0116530161123392, "grad_norm": 0.5873895883560181, "learning_rate": 4.911592997301007e-05, "loss": 0.8757, "step": 158350 }, { "epoch": 1.0117169032620779, "grad_norm": 1.3842110633850098, "learning_rate": 4.911091307686879e-05, "loss": 0.9066, "step": 158360 }, { "epoch": 1.0117807904118166, "grad_norm": 0.6734603047370911, "learning_rate": 4.910589618968138e-05, "loss": 0.7995, "step": 158370 }, { "epoch": 1.0118446775615553, "grad_norm": 0.9523679614067078, "learning_rate": 4.910087931149838e-05, "loss": 0.8374, "step": 158380 }, { "epoch": 1.011908564711294, "grad_norm": 1.5969555377960205, "learning_rate": 4.90958624423703e-05, "loss": 0.7422, "step": 158390 }, { "epoch": 1.0119724518610327, "grad_norm": 1.0750153064727783, "learning_rate": 4.9090845582347664e-05, "loss": 0.9901, "step": 158400 }, { "epoch": 1.0120363390107714, "grad_norm": 0.9817994236946106, "learning_rate": 4.9085828731481004e-05, "loss": 0.8279, "step": 158410 }, { "epoch": 1.01210022616051, "grad_norm": 1.0518522262573242, "learning_rate": 4.908081188982083e-05, "loss": 0.8748, "step": 158420 }, { "epoch": 1.0121641133102488, "grad_norm": 1.547917366027832, "learning_rate": 4.907579505741769e-05, "loss": 1.0364, "step": 158430 }, { "epoch": 1.0122280004599875, "grad_norm": 0.9296419620513916, "learning_rate": 4.907077823432209e-05, "loss": 0.9092, "step": 158440 }, { "epoch": 1.0122918876097262, "grad_norm": 0.907597541809082, "learning_rate": 4.906576142058455e-05, "loss": 0.7798, "step": 158450 }, { "epoch": 1.012355774759465, "grad_norm": 1.192789077758789, "learning_rate": 4.90607446162556e-05, "loss": 0.7114, "step": 158460 }, { "epoch": 1.0124196619092036, "grad_norm": 1.136411428451538, "learning_rate": 4.905572782138577e-05, "loss": 0.7638, "step": 158470 }, { "epoch": 1.0124835490589423, "grad_norm": 1.407484769821167, "learning_rate": 4.9050711036025576e-05, "loss": 0.7775, "step": 158480 }, { "epoch": 1.012547436208681, "grad_norm": 0.7734007835388184, "learning_rate": 4.904569426022556e-05, "loss": 0.7446, "step": 158490 }, { "epoch": 1.0126113233584197, "grad_norm": 0.8089882135391235, "learning_rate": 4.9040677494036205e-05, "loss": 0.7903, "step": 158500 }, { "epoch": 1.0126752105081585, "grad_norm": 0.9194614291191101, "learning_rate": 4.903566073750806e-05, "loss": 0.799, "step": 158510 }, { "epoch": 1.0127390976578972, "grad_norm": 0.9951015710830688, "learning_rate": 4.9030643990691655e-05, "loss": 1.0582, "step": 158520 }, { "epoch": 1.0128029848076359, "grad_norm": 0.6360517144203186, "learning_rate": 4.90256272536375e-05, "loss": 0.596, "step": 158530 }, { "epoch": 1.0128668719573746, "grad_norm": 1.3233652114868164, "learning_rate": 4.9020610526396116e-05, "loss": 1.1017, "step": 158540 }, { "epoch": 1.0129307591071133, "grad_norm": 0.7260464429855347, "learning_rate": 4.9015593809018034e-05, "loss": 0.7517, "step": 158550 }, { "epoch": 1.012994646256852, "grad_norm": 1.1746294498443604, "learning_rate": 4.901057710155378e-05, "loss": 0.7294, "step": 158560 }, { "epoch": 1.0130585334065907, "grad_norm": 0.7293243408203125, "learning_rate": 4.900556040405386e-05, "loss": 0.8202, "step": 158570 }, { "epoch": 1.0131224205563294, "grad_norm": 1.2035466432571411, "learning_rate": 4.900054371656882e-05, "loss": 0.8897, "step": 158580 }, { "epoch": 1.013186307706068, "grad_norm": 1.4569511413574219, "learning_rate": 4.899552703914916e-05, "loss": 0.9422, "step": 158590 }, { "epoch": 1.0132501948558068, "grad_norm": 1.067893624305725, "learning_rate": 4.899051037184541e-05, "loss": 0.6845, "step": 158600 }, { "epoch": 1.0133140820055455, "grad_norm": 1.2214561700820923, "learning_rate": 4.89854937147081e-05, "loss": 0.8067, "step": 158610 }, { "epoch": 1.013377969155284, "grad_norm": 0.9461604356765747, "learning_rate": 4.898047706778774e-05, "loss": 0.7671, "step": 158620 }, { "epoch": 1.0134418563050227, "grad_norm": 0.7252498865127563, "learning_rate": 4.897546043113487e-05, "loss": 0.9272, "step": 158630 }, { "epoch": 1.0135057434547614, "grad_norm": 0.8559421896934509, "learning_rate": 4.8970443804799985e-05, "loss": 0.8639, "step": 158640 }, { "epoch": 1.0135696306045001, "grad_norm": 0.5652778148651123, "learning_rate": 4.8965427188833627e-05, "loss": 0.8474, "step": 158650 }, { "epoch": 1.0136335177542388, "grad_norm": 0.5576297640800476, "learning_rate": 4.8960410583286315e-05, "loss": 0.9793, "step": 158660 }, { "epoch": 1.0136974049039775, "grad_norm": 0.7062064409255981, "learning_rate": 4.895539398820857e-05, "loss": 0.9898, "step": 158670 }, { "epoch": 1.0137612920537162, "grad_norm": 1.1425914764404297, "learning_rate": 4.89503774036509e-05, "loss": 0.8466, "step": 158680 }, { "epoch": 1.013825179203455, "grad_norm": 1.2256412506103516, "learning_rate": 4.8945360829663837e-05, "loss": 0.8359, "step": 158690 }, { "epoch": 1.0138890663531936, "grad_norm": 1.0603712797164917, "learning_rate": 4.894034426629791e-05, "loss": 0.789, "step": 158700 }, { "epoch": 1.0139529535029324, "grad_norm": 0.9669123291969299, "learning_rate": 4.8935327713603634e-05, "loss": 0.809, "step": 158710 }, { "epoch": 1.014016840652671, "grad_norm": 1.034204363822937, "learning_rate": 4.893031117163153e-05, "loss": 0.7027, "step": 158720 }, { "epoch": 1.0140807278024098, "grad_norm": 0.6518446803092957, "learning_rate": 4.892529464043212e-05, "loss": 0.7673, "step": 158730 }, { "epoch": 1.0141446149521485, "grad_norm": 0.6312533617019653, "learning_rate": 4.8920278120055915e-05, "loss": 0.8681, "step": 158740 }, { "epoch": 1.0142085021018872, "grad_norm": 1.1980721950531006, "learning_rate": 4.8915261610553446e-05, "loss": 0.9774, "step": 158750 }, { "epoch": 1.0142723892516259, "grad_norm": 1.1740984916687012, "learning_rate": 4.8910245111975236e-05, "loss": 0.6504, "step": 158760 }, { "epoch": 1.0143362764013646, "grad_norm": 0.9296197295188904, "learning_rate": 4.8905228624371794e-05, "loss": 0.9532, "step": 158770 }, { "epoch": 1.0144001635511033, "grad_norm": 1.52986741065979, "learning_rate": 4.890021214779366e-05, "loss": 0.6905, "step": 158780 }, { "epoch": 1.014464050700842, "grad_norm": 0.9443937540054321, "learning_rate": 4.8895195682291326e-05, "loss": 0.8081, "step": 158790 }, { "epoch": 1.0145279378505807, "grad_norm": 1.0230110883712769, "learning_rate": 4.8890179227915333e-05, "loss": 0.8733, "step": 158800 }, { "epoch": 1.0145918250003194, "grad_norm": 1.2321919202804565, "learning_rate": 4.888516278471619e-05, "loss": 0.6539, "step": 158810 }, { "epoch": 1.0146557121500581, "grad_norm": 1.0817656517028809, "learning_rate": 4.888014635274443e-05, "loss": 0.7763, "step": 158820 }, { "epoch": 1.0147195992997968, "grad_norm": 0.7480055093765259, "learning_rate": 4.887512993205056e-05, "loss": 0.7766, "step": 158830 }, { "epoch": 1.0147834864495355, "grad_norm": 1.0258066654205322, "learning_rate": 4.887011352268511e-05, "loss": 0.9319, "step": 158840 }, { "epoch": 1.0148473735992742, "grad_norm": 0.8432429432868958, "learning_rate": 4.88650971246986e-05, "loss": 1.2076, "step": 158850 }, { "epoch": 1.014911260749013, "grad_norm": 1.4191237688064575, "learning_rate": 4.886008073814153e-05, "loss": 1.0, "step": 158860 }, { "epoch": 1.0149751478987517, "grad_norm": 0.7389619946479797, "learning_rate": 4.885506436306444e-05, "loss": 0.662, "step": 158870 }, { "epoch": 1.0150390350484904, "grad_norm": 0.9246344566345215, "learning_rate": 4.8850047999517834e-05, "loss": 0.8963, "step": 158880 }, { "epoch": 1.015102922198229, "grad_norm": 0.9745082259178162, "learning_rate": 4.884503164755224e-05, "loss": 0.8817, "step": 158890 }, { "epoch": 1.0151668093479678, "grad_norm": 1.0244060754776, "learning_rate": 4.884001530721818e-05, "loss": 0.9566, "step": 158900 }, { "epoch": 1.0152306964977065, "grad_norm": 1.594533085823059, "learning_rate": 4.883499897856617e-05, "loss": 1.2052, "step": 158910 }, { "epoch": 1.0152945836474452, "grad_norm": 1.8419270515441895, "learning_rate": 4.882998266164673e-05, "loss": 0.7605, "step": 158920 }, { "epoch": 1.0153584707971839, "grad_norm": 0.7778764963150024, "learning_rate": 4.8824966356510375e-05, "loss": 0.9946, "step": 158930 }, { "epoch": 1.0154223579469226, "grad_norm": 1.0129575729370117, "learning_rate": 4.881995006320763e-05, "loss": 1.0609, "step": 158940 }, { "epoch": 1.0154862450966613, "grad_norm": 0.7646721601486206, "learning_rate": 4.8814933781789004e-05, "loss": 0.9917, "step": 158950 }, { "epoch": 1.0155501322464, "grad_norm": 0.7843183279037476, "learning_rate": 4.880991751230502e-05, "loss": 0.8404, "step": 158960 }, { "epoch": 1.0156140193961387, "grad_norm": 1.0840474367141724, "learning_rate": 4.880490125480622e-05, "loss": 0.8327, "step": 158970 }, { "epoch": 1.0156779065458774, "grad_norm": 0.6885616779327393, "learning_rate": 4.8799885009343084e-05, "loss": 0.9157, "step": 158980 }, { "epoch": 1.0157417936956161, "grad_norm": 1.2638275623321533, "learning_rate": 4.879486877596613e-05, "loss": 0.9309, "step": 158990 }, { "epoch": 1.0158056808453548, "grad_norm": 0.6783813834190369, "learning_rate": 4.87898525547259e-05, "loss": 1.2162, "step": 159000 }, { "epoch": 1.0158695679950935, "grad_norm": 0.8159160614013672, "learning_rate": 4.878483634567289e-05, "loss": 0.7415, "step": 159010 }, { "epoch": 1.0159334551448322, "grad_norm": 0.8057973384857178, "learning_rate": 4.8779820148857636e-05, "loss": 1.0629, "step": 159020 }, { "epoch": 1.015997342294571, "grad_norm": 1.3183810710906982, "learning_rate": 4.8774803964330653e-05, "loss": 0.6688, "step": 159030 }, { "epoch": 1.0160612294443097, "grad_norm": 1.4091906547546387, "learning_rate": 4.876978779214245e-05, "loss": 1.0829, "step": 159040 }, { "epoch": 1.0161251165940484, "grad_norm": 0.9577476978302002, "learning_rate": 4.876477163234355e-05, "loss": 1.0011, "step": 159050 }, { "epoch": 1.016189003743787, "grad_norm": 0.6230905652046204, "learning_rate": 4.8759755484984466e-05, "loss": 0.8103, "step": 159060 }, { "epoch": 1.0162528908935258, "grad_norm": 1.224851369857788, "learning_rate": 4.875473935011572e-05, "loss": 1.0953, "step": 159070 }, { "epoch": 1.0163167780432645, "grad_norm": 0.8951313495635986, "learning_rate": 4.874972322778782e-05, "loss": 0.775, "step": 159080 }, { "epoch": 1.0163806651930032, "grad_norm": 1.2181718349456787, "learning_rate": 4.8744707118051294e-05, "loss": 0.7839, "step": 159090 }, { "epoch": 1.0164445523427417, "grad_norm": 0.6959841847419739, "learning_rate": 4.873969102095666e-05, "loss": 0.7769, "step": 159100 }, { "epoch": 1.0165084394924804, "grad_norm": 1.0471941232681274, "learning_rate": 4.873467493655442e-05, "loss": 0.7421, "step": 159110 }, { "epoch": 1.016572326642219, "grad_norm": 0.6630411148071289, "learning_rate": 4.87296588648951e-05, "loss": 0.8807, "step": 159120 }, { "epoch": 1.0166362137919578, "grad_norm": 0.845357358455658, "learning_rate": 4.8724642806029206e-05, "loss": 0.7257, "step": 159130 }, { "epoch": 1.0167001009416965, "grad_norm": 0.9893336296081543, "learning_rate": 4.8719626760007276e-05, "loss": 0.8755, "step": 159140 }, { "epoch": 1.0167639880914352, "grad_norm": 1.0361378192901611, "learning_rate": 4.87146107268798e-05, "loss": 1.0532, "step": 159150 }, { "epoch": 1.016827875241174, "grad_norm": 0.9727418422698975, "learning_rate": 4.870959470669732e-05, "loss": 0.7611, "step": 159160 }, { "epoch": 1.0168917623909126, "grad_norm": 0.8137701749801636, "learning_rate": 4.870457869951033e-05, "loss": 0.9455, "step": 159170 }, { "epoch": 1.0169556495406513, "grad_norm": 0.8102377653121948, "learning_rate": 4.869956270536935e-05, "loss": 0.8612, "step": 159180 }, { "epoch": 1.01701953669039, "grad_norm": 0.8785635828971863, "learning_rate": 4.8694546724324895e-05, "loss": 0.8923, "step": 159190 }, { "epoch": 1.0170834238401287, "grad_norm": 1.1253139972686768, "learning_rate": 4.868953075642749e-05, "loss": 0.7758, "step": 159200 }, { "epoch": 1.0171473109898674, "grad_norm": 1.2032454013824463, "learning_rate": 4.868451480172764e-05, "loss": 0.8554, "step": 159210 }, { "epoch": 1.0172111981396061, "grad_norm": 0.8401626944541931, "learning_rate": 4.867949886027586e-05, "loss": 0.7357, "step": 159220 }, { "epoch": 1.0172750852893448, "grad_norm": 1.0876498222351074, "learning_rate": 4.8674482932122686e-05, "loss": 0.8303, "step": 159230 }, { "epoch": 1.0173389724390836, "grad_norm": 0.704442024230957, "learning_rate": 4.866946701731861e-05, "loss": 0.9889, "step": 159240 }, { "epoch": 1.0174028595888223, "grad_norm": 0.9629689455032349, "learning_rate": 4.866445111591414e-05, "loss": 0.8805, "step": 159250 }, { "epoch": 1.017466746738561, "grad_norm": 0.8684937953948975, "learning_rate": 4.8659435227959815e-05, "loss": 0.7646, "step": 159260 }, { "epoch": 1.0175306338882997, "grad_norm": 0.8176425099372864, "learning_rate": 4.865441935350613e-05, "loss": 0.8797, "step": 159270 }, { "epoch": 1.0175945210380384, "grad_norm": 1.4737823009490967, "learning_rate": 4.8649403492603615e-05, "loss": 0.8141, "step": 159280 }, { "epoch": 1.017658408187777, "grad_norm": 0.8579307198524475, "learning_rate": 4.864438764530276e-05, "loss": 1.0162, "step": 159290 }, { "epoch": 1.0177222953375158, "grad_norm": 0.9210206270217896, "learning_rate": 4.863937181165411e-05, "loss": 0.9758, "step": 159300 }, { "epoch": 1.0177861824872545, "grad_norm": 0.6698283553123474, "learning_rate": 4.863435599170816e-05, "loss": 0.8678, "step": 159310 }, { "epoch": 1.0178500696369932, "grad_norm": 0.9772372841835022, "learning_rate": 4.862934018551542e-05, "loss": 0.975, "step": 159320 }, { "epoch": 1.017913956786732, "grad_norm": 0.7392674684524536, "learning_rate": 4.8624324393126416e-05, "loss": 0.8978, "step": 159330 }, { "epoch": 1.0179778439364706, "grad_norm": 0.781814455986023, "learning_rate": 4.861930861459165e-05, "loss": 0.8135, "step": 159340 }, { "epoch": 1.0180417310862093, "grad_norm": 0.9082130193710327, "learning_rate": 4.8614292849961646e-05, "loss": 0.6263, "step": 159350 }, { "epoch": 1.018105618235948, "grad_norm": 0.790849506855011, "learning_rate": 4.860927709928691e-05, "loss": 0.8153, "step": 159360 }, { "epoch": 1.0181695053856867, "grad_norm": 1.3248441219329834, "learning_rate": 4.860426136261796e-05, "loss": 1.1221, "step": 159370 }, { "epoch": 1.0182333925354254, "grad_norm": 0.797540545463562, "learning_rate": 4.85992456400053e-05, "loss": 1.0036, "step": 159380 }, { "epoch": 1.0182972796851641, "grad_norm": 0.9565972089767456, "learning_rate": 4.859422993149945e-05, "loss": 0.9894, "step": 159390 }, { "epoch": 1.0183611668349029, "grad_norm": 1.2057994604110718, "learning_rate": 4.858921423715092e-05, "loss": 0.7413, "step": 159400 }, { "epoch": 1.0184250539846416, "grad_norm": 1.2084450721740723, "learning_rate": 4.8584198557010224e-05, "loss": 0.7765, "step": 159410 }, { "epoch": 1.0184889411343803, "grad_norm": 0.9695774912834167, "learning_rate": 4.8579182891127864e-05, "loss": 0.7912, "step": 159420 }, { "epoch": 1.018552828284119, "grad_norm": 0.6492698192596436, "learning_rate": 4.857416723955437e-05, "loss": 0.7832, "step": 159430 }, { "epoch": 1.0186167154338577, "grad_norm": 0.815000593662262, "learning_rate": 4.856915160234025e-05, "loss": 0.9811, "step": 159440 }, { "epoch": 1.0186806025835964, "grad_norm": 1.2782448530197144, "learning_rate": 4.8564135979536e-05, "loss": 0.7518, "step": 159450 }, { "epoch": 1.018744489733335, "grad_norm": 1.1094145774841309, "learning_rate": 4.855912037119218e-05, "loss": 0.9107, "step": 159460 }, { "epoch": 1.0188083768830738, "grad_norm": 0.8510834574699402, "learning_rate": 4.855410477735923e-05, "loss": 1.1143, "step": 159470 }, { "epoch": 1.0188722640328125, "grad_norm": 0.5925911068916321, "learning_rate": 4.85490891980877e-05, "loss": 0.7271, "step": 159480 }, { "epoch": 1.0189361511825512, "grad_norm": 1.6815381050109863, "learning_rate": 4.85440736334281e-05, "loss": 0.9389, "step": 159490 }, { "epoch": 1.01900003833229, "grad_norm": 1.0281870365142822, "learning_rate": 4.853905808343094e-05, "loss": 0.7189, "step": 159500 }, { "epoch": 1.0190639254820286, "grad_norm": 0.7680450081825256, "learning_rate": 4.853404254814672e-05, "loss": 0.7787, "step": 159510 }, { "epoch": 1.0191278126317673, "grad_norm": 0.6367865800857544, "learning_rate": 4.852902702762597e-05, "loss": 0.7252, "step": 159520 }, { "epoch": 1.019191699781506, "grad_norm": 0.8541706204414368, "learning_rate": 4.852401152191918e-05, "loss": 0.8, "step": 159530 }, { "epoch": 1.0192555869312447, "grad_norm": 0.788281261920929, "learning_rate": 4.851899603107687e-05, "loss": 1.0424, "step": 159540 }, { "epoch": 1.0193194740809834, "grad_norm": 0.8140308856964111, "learning_rate": 4.851398055514955e-05, "loss": 0.8846, "step": 159550 }, { "epoch": 1.0193833612307222, "grad_norm": 1.011403203010559, "learning_rate": 4.850896509418774e-05, "loss": 0.7852, "step": 159560 }, { "epoch": 1.0194472483804609, "grad_norm": 0.8204459547996521, "learning_rate": 4.850394964824194e-05, "loss": 0.9566, "step": 159570 }, { "epoch": 1.0195111355301996, "grad_norm": 3.202881097793579, "learning_rate": 4.8498934217362665e-05, "loss": 0.8407, "step": 159580 }, { "epoch": 1.019575022679938, "grad_norm": 1.3872183561325073, "learning_rate": 4.849391880160041e-05, "loss": 0.6323, "step": 159590 }, { "epoch": 1.0196389098296768, "grad_norm": 0.9473857879638672, "learning_rate": 4.848890340100571e-05, "loss": 0.7185, "step": 159600 }, { "epoch": 1.0197027969794155, "grad_norm": 1.2074732780456543, "learning_rate": 4.848388801562905e-05, "loss": 0.8186, "step": 159610 }, { "epoch": 1.0197666841291542, "grad_norm": 0.5796457529067993, "learning_rate": 4.8478872645520954e-05, "loss": 0.7665, "step": 159620 }, { "epoch": 1.0198305712788929, "grad_norm": 1.2254159450531006, "learning_rate": 4.847385729073192e-05, "loss": 0.9178, "step": 159630 }, { "epoch": 1.0198944584286316, "grad_norm": 1.5869561433792114, "learning_rate": 4.846884195131247e-05, "loss": 0.9841, "step": 159640 }, { "epoch": 1.0199583455783703, "grad_norm": 1.9314491748809814, "learning_rate": 4.846382662731311e-05, "loss": 0.7713, "step": 159650 }, { "epoch": 1.020022232728109, "grad_norm": 0.8856831789016724, "learning_rate": 4.8458811318784334e-05, "loss": 0.7843, "step": 159660 }, { "epoch": 1.0200861198778477, "grad_norm": 0.6835888028144836, "learning_rate": 4.8453796025776665e-05, "loss": 0.591, "step": 159670 }, { "epoch": 1.0201500070275864, "grad_norm": 0.5204030275344849, "learning_rate": 4.844878074834061e-05, "loss": 0.8107, "step": 159680 }, { "epoch": 1.020213894177325, "grad_norm": 0.9245986342430115, "learning_rate": 4.8443765486526675e-05, "loss": 0.8568, "step": 159690 }, { "epoch": 1.0202777813270638, "grad_norm": 2.367129325866699, "learning_rate": 4.8438750240385366e-05, "loss": 0.8777, "step": 159700 }, { "epoch": 1.0203416684768025, "grad_norm": 0.9855824112892151, "learning_rate": 4.84337350099672e-05, "loss": 0.821, "step": 159710 }, { "epoch": 1.0204055556265412, "grad_norm": 0.6439787745475769, "learning_rate": 4.842871979532267e-05, "loss": 0.8859, "step": 159720 }, { "epoch": 1.02046944277628, "grad_norm": 0.9851899743080139, "learning_rate": 4.84242061156708e-05, "loss": 0.9057, "step": 159730 }, { "epoch": 1.0205333299260186, "grad_norm": 0.8334624171257019, "learning_rate": 4.841919093113534e-05, "loss": 0.7313, "step": 159740 }, { "epoch": 1.0205972170757573, "grad_norm": 0.8483121991157532, "learning_rate": 4.841417576252e-05, "loss": 1.1077, "step": 159750 }, { "epoch": 1.020661104225496, "grad_norm": 1.2232626676559448, "learning_rate": 4.840916060987528e-05, "loss": 0.8178, "step": 159760 }, { "epoch": 1.0207249913752348, "grad_norm": 1.0820945501327515, "learning_rate": 4.840414547325168e-05, "loss": 0.8174, "step": 159770 }, { "epoch": 1.0207888785249735, "grad_norm": 0.7780520915985107, "learning_rate": 4.8399130352699726e-05, "loss": 0.6902, "step": 159780 }, { "epoch": 1.0208527656747122, "grad_norm": 0.8979020714759827, "learning_rate": 4.839411524826991e-05, "loss": 0.9166, "step": 159790 }, { "epoch": 1.0209166528244509, "grad_norm": 4.772502422332764, "learning_rate": 4.8389100160012744e-05, "loss": 1.0738, "step": 159800 }, { "epoch": 1.0209805399741896, "grad_norm": 1.1232408285140991, "learning_rate": 4.838408508797874e-05, "loss": 0.6826, "step": 159810 }, { "epoch": 1.0210444271239283, "grad_norm": 0.6424837112426758, "learning_rate": 4.8379070032218386e-05, "loss": 1.0162, "step": 159820 }, { "epoch": 1.021108314273667, "grad_norm": 1.7317675352096558, "learning_rate": 4.83740549927822e-05, "loss": 0.9172, "step": 159830 }, { "epoch": 1.0211722014234057, "grad_norm": 1.2768808603286743, "learning_rate": 4.83690399697207e-05, "loss": 0.8575, "step": 159840 }, { "epoch": 1.0212360885731444, "grad_norm": 0.901062548160553, "learning_rate": 4.836402496308437e-05, "loss": 0.9215, "step": 159850 }, { "epoch": 1.0212999757228831, "grad_norm": 0.7806056141853333, "learning_rate": 4.835900997292371e-05, "loss": 0.7751, "step": 159860 }, { "epoch": 1.0213638628726218, "grad_norm": 0.7727100253105164, "learning_rate": 4.835399499928925e-05, "loss": 0.6676, "step": 159870 }, { "epoch": 1.0214277500223605, "grad_norm": 0.8922156095504761, "learning_rate": 4.834898004223148e-05, "loss": 0.8759, "step": 159880 }, { "epoch": 1.0214916371720992, "grad_norm": 0.9628594517707825, "learning_rate": 4.834396510180092e-05, "loss": 0.8242, "step": 159890 }, { "epoch": 1.021555524321838, "grad_norm": 0.6507847309112549, "learning_rate": 4.8338950178048056e-05, "loss": 1.1914, "step": 159900 }, { "epoch": 1.0216194114715766, "grad_norm": 1.5707703828811646, "learning_rate": 4.8333935271023415e-05, "loss": 0.8153, "step": 159910 }, { "epoch": 1.0216832986213154, "grad_norm": 0.7113951444625854, "learning_rate": 4.8328920380777473e-05, "loss": 0.7568, "step": 159920 }, { "epoch": 1.021747185771054, "grad_norm": 0.9113976955413818, "learning_rate": 4.832390550736075e-05, "loss": 0.9178, "step": 159930 }, { "epoch": 1.0218110729207928, "grad_norm": 0.5762841701507568, "learning_rate": 4.831889065082375e-05, "loss": 0.801, "step": 159940 }, { "epoch": 1.0218749600705315, "grad_norm": 0.9322299361228943, "learning_rate": 4.831387581121698e-05, "loss": 1.0433, "step": 159950 }, { "epoch": 1.0219388472202702, "grad_norm": 1.1670128107070923, "learning_rate": 4.8308860988590935e-05, "loss": 0.8452, "step": 159960 }, { "epoch": 1.0220027343700089, "grad_norm": 0.7586202621459961, "learning_rate": 4.8303846182996124e-05, "loss": 0.8195, "step": 159970 }, { "epoch": 1.0220666215197476, "grad_norm": 0.9201989769935608, "learning_rate": 4.829883139448305e-05, "loss": 0.894, "step": 159980 }, { "epoch": 1.0221305086694863, "grad_norm": 0.981075644493103, "learning_rate": 4.829381662310221e-05, "loss": 1.0564, "step": 159990 }, { "epoch": 1.022194395819225, "grad_norm": 1.116087794303894, "learning_rate": 4.8288801868904124e-05, "loss": 0.7544, "step": 160000 }, { "epoch": 1.0222582829689637, "grad_norm": 0.7812409400939941, "learning_rate": 4.828378713193927e-05, "loss": 1.0803, "step": 160010 }, { "epoch": 1.0223221701187024, "grad_norm": 0.7585064172744751, "learning_rate": 4.827877241225817e-05, "loss": 0.654, "step": 160020 }, { "epoch": 1.0223860572684411, "grad_norm": 1.045121431350708, "learning_rate": 4.8273757709911324e-05, "loss": 1.3915, "step": 160030 }, { "epoch": 1.0224499444181798, "grad_norm": 0.9577502012252808, "learning_rate": 4.826874302494923e-05, "loss": 0.8388, "step": 160040 }, { "epoch": 1.0225138315679185, "grad_norm": 1.286743402481079, "learning_rate": 4.826372835742239e-05, "loss": 1.1065, "step": 160050 }, { "epoch": 1.0225777187176572, "grad_norm": 1.2941441535949707, "learning_rate": 4.82587137073813e-05, "loss": 0.76, "step": 160060 }, { "epoch": 1.022641605867396, "grad_norm": 0.7606869339942932, "learning_rate": 4.825369907487647e-05, "loss": 0.8793, "step": 160070 }, { "epoch": 1.0227054930171344, "grad_norm": 1.1968684196472168, "learning_rate": 4.824868445995841e-05, "loss": 0.7791, "step": 160080 }, { "epoch": 1.0227693801668731, "grad_norm": 0.8206735253334045, "learning_rate": 4.8243669862677634e-05, "loss": 0.8489, "step": 160090 }, { "epoch": 1.0228332673166118, "grad_norm": 1.5179201364517212, "learning_rate": 4.82386552830846e-05, "loss": 0.9613, "step": 160100 }, { "epoch": 1.0228971544663505, "grad_norm": 0.9671904444694519, "learning_rate": 4.823364072122983e-05, "loss": 1.0319, "step": 160110 }, { "epoch": 1.0229610416160893, "grad_norm": 0.7965761423110962, "learning_rate": 4.8228626177163824e-05, "loss": 0.5945, "step": 160120 }, { "epoch": 1.023024928765828, "grad_norm": 0.7991026043891907, "learning_rate": 4.822361165093709e-05, "loss": 0.8775, "step": 160130 }, { "epoch": 1.0230888159155667, "grad_norm": 1.263779878616333, "learning_rate": 4.821859714260012e-05, "loss": 0.8976, "step": 160140 }, { "epoch": 1.0231527030653054, "grad_norm": 0.7055035829544067, "learning_rate": 4.821358265220342e-05, "loss": 0.8665, "step": 160150 }, { "epoch": 1.023216590215044, "grad_norm": 1.1190840005874634, "learning_rate": 4.8208568179797485e-05, "loss": 1.0261, "step": 160160 }, { "epoch": 1.0232804773647828, "grad_norm": 0.7512614130973816, "learning_rate": 4.820355372543282e-05, "loss": 0.7382, "step": 160170 }, { "epoch": 1.0233443645145215, "grad_norm": 0.8903238773345947, "learning_rate": 4.8198539289159914e-05, "loss": 0.9103, "step": 160180 }, { "epoch": 1.0234082516642602, "grad_norm": 1.0027856826782227, "learning_rate": 4.819352487102928e-05, "loss": 0.9353, "step": 160190 }, { "epoch": 1.023472138813999, "grad_norm": 1.451263427734375, "learning_rate": 4.818851047109142e-05, "loss": 0.7841, "step": 160200 }, { "epoch": 1.0235360259637376, "grad_norm": 0.9455044269561768, "learning_rate": 4.8183496089396826e-05, "loss": 0.9434, "step": 160210 }, { "epoch": 1.0235999131134763, "grad_norm": 0.7364357709884644, "learning_rate": 4.8178481725996e-05, "loss": 0.9864, "step": 160220 }, { "epoch": 1.023663800263215, "grad_norm": 0.8373795747756958, "learning_rate": 4.817346738093943e-05, "loss": 0.8139, "step": 160230 }, { "epoch": 1.0237276874129537, "grad_norm": 0.6274827122688293, "learning_rate": 4.816845305427764e-05, "loss": 0.8578, "step": 160240 }, { "epoch": 1.0237915745626924, "grad_norm": 0.9364883303642273, "learning_rate": 4.8163438746061095e-05, "loss": 0.9182, "step": 160250 }, { "epoch": 1.0238554617124311, "grad_norm": 0.8702690601348877, "learning_rate": 4.815842445634032e-05, "loss": 0.943, "step": 160260 }, { "epoch": 1.0239193488621698, "grad_norm": 0.7030714750289917, "learning_rate": 4.81534101851658e-05, "loss": 0.8738, "step": 160270 }, { "epoch": 1.0239832360119085, "grad_norm": 0.9644308090209961, "learning_rate": 4.814839593258804e-05, "loss": 1.1133, "step": 160280 }, { "epoch": 1.0240471231616473, "grad_norm": 0.642647922039032, "learning_rate": 4.814338169865753e-05, "loss": 0.9129, "step": 160290 }, { "epoch": 1.024111010311386, "grad_norm": 1.3615950345993042, "learning_rate": 4.813836748342477e-05, "loss": 0.8771, "step": 160300 }, { "epoch": 1.0241748974611247, "grad_norm": 0.9638871550559998, "learning_rate": 4.813335328694027e-05, "loss": 0.9895, "step": 160310 }, { "epoch": 1.0242387846108634, "grad_norm": 0.9455498456954956, "learning_rate": 4.8128339109254516e-05, "loss": 1.2231, "step": 160320 }, { "epoch": 1.024302671760602, "grad_norm": 0.8217796683311462, "learning_rate": 4.8123324950418e-05, "loss": 0.8139, "step": 160330 }, { "epoch": 1.0243665589103408, "grad_norm": 0.6381251811981201, "learning_rate": 4.811831081048123e-05, "loss": 0.684, "step": 160340 }, { "epoch": 1.0244304460600795, "grad_norm": 1.0423862934112549, "learning_rate": 4.8113296689494693e-05, "loss": 0.7997, "step": 160350 }, { "epoch": 1.0244943332098182, "grad_norm": 1.1656889915466309, "learning_rate": 4.8108282587508895e-05, "loss": 1.1299, "step": 160360 }, { "epoch": 1.024558220359557, "grad_norm": 1.2188206911087036, "learning_rate": 4.8103268504574325e-05, "loss": 0.5699, "step": 160370 }, { "epoch": 1.0246221075092956, "grad_norm": 0.7609322667121887, "learning_rate": 4.8098254440741486e-05, "loss": 1.2015, "step": 160380 }, { "epoch": 1.0246859946590343, "grad_norm": 1.1754517555236816, "learning_rate": 4.8093240396060854e-05, "loss": 1.0247, "step": 160390 }, { "epoch": 1.024749881808773, "grad_norm": 0.74627286195755, "learning_rate": 4.808822637058296e-05, "loss": 0.7625, "step": 160400 }, { "epoch": 1.0248137689585117, "grad_norm": 0.7061801552772522, "learning_rate": 4.8083212364358277e-05, "loss": 0.9833, "step": 160410 }, { "epoch": 1.0248776561082504, "grad_norm": 1.1060301065444946, "learning_rate": 4.80781983774373e-05, "loss": 0.9781, "step": 160420 }, { "epoch": 1.0249415432579891, "grad_norm": 1.5939122438430786, "learning_rate": 4.807318440987053e-05, "loss": 0.915, "step": 160430 }, { "epoch": 1.0250054304077278, "grad_norm": 0.8850380778312683, "learning_rate": 4.8068170461708464e-05, "loss": 1.1094, "step": 160440 }, { "epoch": 1.0250693175574666, "grad_norm": 0.8643857836723328, "learning_rate": 4.8063156533001585e-05, "loss": 1.2635, "step": 160450 }, { "epoch": 1.0251332047072053, "grad_norm": 1.5707765817642212, "learning_rate": 4.8058142623800404e-05, "loss": 1.073, "step": 160460 }, { "epoch": 1.025197091856944, "grad_norm": 0.7831910252571106, "learning_rate": 4.80531287341554e-05, "loss": 0.9, "step": 160470 }, { "epoch": 1.0252609790066827, "grad_norm": 0.6048902869224548, "learning_rate": 4.804811486411707e-05, "loss": 0.846, "step": 160480 }, { "epoch": 1.0253248661564214, "grad_norm": 0.6415519118309021, "learning_rate": 4.804310101373592e-05, "loss": 0.9206, "step": 160490 }, { "epoch": 1.02538875330616, "grad_norm": 0.9880675077438354, "learning_rate": 4.803808718306243e-05, "loss": 0.9464, "step": 160500 }, { "epoch": 1.0254526404558988, "grad_norm": 0.9564589262008667, "learning_rate": 4.80330733721471e-05, "loss": 0.6466, "step": 160510 }, { "epoch": 1.0255165276056375, "grad_norm": 0.9664949774742126, "learning_rate": 4.802805958104043e-05, "loss": 0.8345, "step": 160520 }, { "epoch": 1.0255804147553762, "grad_norm": 0.4916277229785919, "learning_rate": 4.80230458097929e-05, "loss": 0.7715, "step": 160530 }, { "epoch": 1.025644301905115, "grad_norm": 0.894357442855835, "learning_rate": 4.801803205845501e-05, "loss": 0.8438, "step": 160540 }, { "epoch": 1.0257081890548536, "grad_norm": 1.06783926486969, "learning_rate": 4.801301832707725e-05, "loss": 0.811, "step": 160550 }, { "epoch": 1.0257720762045923, "grad_norm": 0.6926699876785278, "learning_rate": 4.800800461571012e-05, "loss": 0.8775, "step": 160560 }, { "epoch": 1.0258359633543308, "grad_norm": 0.8281245231628418, "learning_rate": 4.800299092440411e-05, "loss": 0.8978, "step": 160570 }, { "epoch": 1.0258998505040695, "grad_norm": 2.3448851108551025, "learning_rate": 4.7997977253209706e-05, "loss": 0.7675, "step": 160580 }, { "epoch": 1.0259637376538082, "grad_norm": 1.0262439250946045, "learning_rate": 4.79929636021774e-05, "loss": 0.9032, "step": 160590 }, { "epoch": 1.026027624803547, "grad_norm": 0.8757922053337097, "learning_rate": 4.798794997135769e-05, "loss": 0.8369, "step": 160600 }, { "epoch": 1.0260915119532856, "grad_norm": 0.669083833694458, "learning_rate": 4.798293636080106e-05, "loss": 0.8433, "step": 160610 }, { "epoch": 1.0261553991030243, "grad_norm": 1.0054523944854736, "learning_rate": 4.7977922770558e-05, "loss": 0.8063, "step": 160620 }, { "epoch": 1.026219286252763, "grad_norm": 0.6672931909561157, "learning_rate": 4.797290920067902e-05, "loss": 0.9353, "step": 160630 }, { "epoch": 1.0262831734025017, "grad_norm": 1.1048996448516846, "learning_rate": 4.796789565121459e-05, "loss": 0.8202, "step": 160640 }, { "epoch": 1.0263470605522405, "grad_norm": 0.6301406621932983, "learning_rate": 4.796288212221521e-05, "loss": 0.7345, "step": 160650 }, { "epoch": 1.0264109477019792, "grad_norm": 0.7342906594276428, "learning_rate": 4.795786861373137e-05, "loss": 0.9213, "step": 160660 }, { "epoch": 1.0264748348517179, "grad_norm": 0.8929392695426941, "learning_rate": 4.795285512581356e-05, "loss": 0.8837, "step": 160670 }, { "epoch": 1.0265387220014566, "grad_norm": 0.8775836229324341, "learning_rate": 4.7947841658512274e-05, "loss": 0.7707, "step": 160680 }, { "epoch": 1.0266026091511953, "grad_norm": 0.9756439328193665, "learning_rate": 4.794282821187799e-05, "loss": 1.0365, "step": 160690 }, { "epoch": 1.026666496300934, "grad_norm": 0.5219516158103943, "learning_rate": 4.793781478596121e-05, "loss": 1.0974, "step": 160700 }, { "epoch": 1.0267303834506727, "grad_norm": 0.6594055891036987, "learning_rate": 4.793280138081241e-05, "loss": 0.726, "step": 160710 }, { "epoch": 1.0267942706004114, "grad_norm": 0.5748750567436218, "learning_rate": 4.7927787996482095e-05, "loss": 0.7363, "step": 160720 }, { "epoch": 1.02685815775015, "grad_norm": 1.0073412656784058, "learning_rate": 4.792277463302075e-05, "loss": 0.9049, "step": 160730 }, { "epoch": 1.0269220448998888, "grad_norm": 2.4105050563812256, "learning_rate": 4.791776129047886e-05, "loss": 0.9481, "step": 160740 }, { "epoch": 1.0269859320496275, "grad_norm": 0.9212214946746826, "learning_rate": 4.7912747968906916e-05, "loss": 0.919, "step": 160750 }, { "epoch": 1.0270498191993662, "grad_norm": 0.8846902847290039, "learning_rate": 4.79077346683554e-05, "loss": 0.6759, "step": 160760 }, { "epoch": 1.027113706349105, "grad_norm": 2.740834951400757, "learning_rate": 4.7902721388874814e-05, "loss": 1.0255, "step": 160770 }, { "epoch": 1.0271775934988436, "grad_norm": 1.0306124687194824, "learning_rate": 4.7897708130515637e-05, "loss": 0.8978, "step": 160780 }, { "epoch": 1.0272414806485823, "grad_norm": 0.7168565988540649, "learning_rate": 4.789269489332836e-05, "loss": 0.9045, "step": 160790 }, { "epoch": 1.027305367798321, "grad_norm": 1.0961785316467285, "learning_rate": 4.788768167736346e-05, "loss": 1.0505, "step": 160800 }, { "epoch": 1.0273692549480598, "grad_norm": 1.3778088092803955, "learning_rate": 4.788266848267144e-05, "loss": 0.7855, "step": 160810 }, { "epoch": 1.0274331420977985, "grad_norm": 0.6504400372505188, "learning_rate": 4.7877655309302776e-05, "loss": 0.7996, "step": 160820 }, { "epoch": 1.0274970292475372, "grad_norm": 1.6229898929595947, "learning_rate": 4.7872642157307965e-05, "loss": 0.8625, "step": 160830 }, { "epoch": 1.0275609163972759, "grad_norm": 0.8949072957038879, "learning_rate": 4.7867629026737484e-05, "loss": 0.9184, "step": 160840 }, { "epoch": 1.0276248035470146, "grad_norm": 1.153201699256897, "learning_rate": 4.786261591764182e-05, "loss": 0.7552, "step": 160850 }, { "epoch": 1.0276886906967533, "grad_norm": 1.047953486442566, "learning_rate": 4.785760283007147e-05, "loss": 0.7731, "step": 160860 }, { "epoch": 1.027752577846492, "grad_norm": 0.7609559893608093, "learning_rate": 4.785258976407691e-05, "loss": 0.8437, "step": 160870 }, { "epoch": 1.0278164649962307, "grad_norm": 0.9429197907447815, "learning_rate": 4.784757671970863e-05, "loss": 1.2661, "step": 160880 }, { "epoch": 1.0278803521459694, "grad_norm": 4.659643173217773, "learning_rate": 4.784256369701711e-05, "loss": 0.9362, "step": 160890 }, { "epoch": 1.027944239295708, "grad_norm": 0.6563370823860168, "learning_rate": 4.783755069605284e-05, "loss": 0.64, "step": 160900 }, { "epoch": 1.0280081264454468, "grad_norm": 0.8179851174354553, "learning_rate": 4.78325377168663e-05, "loss": 0.9857, "step": 160910 }, { "epoch": 1.0280720135951855, "grad_norm": 1.0126748085021973, "learning_rate": 4.782752475950799e-05, "loss": 0.7072, "step": 160920 }, { "epoch": 1.0281359007449242, "grad_norm": 1.2026677131652832, "learning_rate": 4.782251182402838e-05, "loss": 0.9921, "step": 160930 }, { "epoch": 1.028199787894663, "grad_norm": 0.667464554309845, "learning_rate": 4.781749891047797e-05, "loss": 0.8723, "step": 160940 }, { "epoch": 1.0282636750444016, "grad_norm": 0.7997406125068665, "learning_rate": 4.7812486018907224e-05, "loss": 0.8121, "step": 160950 }, { "epoch": 1.0283275621941403, "grad_norm": 0.905473530292511, "learning_rate": 4.7807473149366636e-05, "loss": 0.8253, "step": 160960 }, { "epoch": 1.028391449343879, "grad_norm": 0.9246165752410889, "learning_rate": 4.780246030190669e-05, "loss": 0.8049, "step": 160970 }, { "epoch": 1.0284553364936178, "grad_norm": 1.9537968635559082, "learning_rate": 4.7797447476577876e-05, "loss": 0.8806, "step": 160980 }, { "epoch": 1.0285192236433565, "grad_norm": 1.1061798334121704, "learning_rate": 4.779243467343068e-05, "loss": 0.8522, "step": 160990 }, { "epoch": 1.0285831107930952, "grad_norm": 0.8190047740936279, "learning_rate": 4.778742189251556e-05, "loss": 0.8833, "step": 161000 }, { "epoch": 1.0286469979428339, "grad_norm": 0.997075617313385, "learning_rate": 4.778240913388302e-05, "loss": 1.2601, "step": 161010 }, { "epoch": 1.0287108850925726, "grad_norm": 0.8031179308891296, "learning_rate": 4.777739639758354e-05, "loss": 0.7952, "step": 161020 }, { "epoch": 1.0287747722423113, "grad_norm": 0.8493223786354065, "learning_rate": 4.77723836836676e-05, "loss": 0.7857, "step": 161030 }, { "epoch": 1.02883865939205, "grad_norm": 0.6901337504386902, "learning_rate": 4.776737099218568e-05, "loss": 0.8275, "step": 161040 }, { "epoch": 1.0289025465417885, "grad_norm": 0.6938019394874573, "learning_rate": 4.7762358323188274e-05, "loss": 1.0794, "step": 161050 }, { "epoch": 1.0289664336915272, "grad_norm": 0.6040987372398376, "learning_rate": 4.775734567672586e-05, "loss": 0.748, "step": 161060 }, { "epoch": 1.0290303208412659, "grad_norm": 0.8941619396209717, "learning_rate": 4.775233305284891e-05, "loss": 0.7298, "step": 161070 }, { "epoch": 1.0290942079910046, "grad_norm": 1.5386172533035278, "learning_rate": 4.7747320451607905e-05, "loss": 0.8937, "step": 161080 }, { "epoch": 1.0291580951407433, "grad_norm": 0.6366963982582092, "learning_rate": 4.7742307873053336e-05, "loss": 0.9512, "step": 161090 }, { "epoch": 1.029221982290482, "grad_norm": 0.6901091933250427, "learning_rate": 4.773729531723568e-05, "loss": 1.0791, "step": 161100 }, { "epoch": 1.0292858694402207, "grad_norm": 1.6313228607177734, "learning_rate": 4.773228278420542e-05, "loss": 0.8286, "step": 161110 }, { "epoch": 1.0293497565899594, "grad_norm": 0.7802162170410156, "learning_rate": 4.772727027401303e-05, "loss": 0.9759, "step": 161120 }, { "epoch": 1.0294136437396981, "grad_norm": 0.9796809554100037, "learning_rate": 4.772225778670899e-05, "loss": 0.9091, "step": 161130 }, { "epoch": 1.0294775308894368, "grad_norm": 0.9127634167671204, "learning_rate": 4.771724532234379e-05, "loss": 0.8796, "step": 161140 }, { "epoch": 1.0295414180391755, "grad_norm": 1.400619387626648, "learning_rate": 4.771223288096791e-05, "loss": 0.7692, "step": 161150 }, { "epoch": 1.0296053051889142, "grad_norm": 0.8520326614379883, "learning_rate": 4.7707220462631816e-05, "loss": 0.729, "step": 161160 }, { "epoch": 1.029669192338653, "grad_norm": 0.7154648900032043, "learning_rate": 4.7702208067386e-05, "loss": 0.7087, "step": 161170 }, { "epoch": 1.0297330794883917, "grad_norm": 0.9384026527404785, "learning_rate": 4.7697195695280924e-05, "loss": 0.7901, "step": 161180 }, { "epoch": 1.0297969666381304, "grad_norm": 0.7101640701293945, "learning_rate": 4.769218334636709e-05, "loss": 0.9741, "step": 161190 }, { "epoch": 1.029860853787869, "grad_norm": 0.6075759530067444, "learning_rate": 4.7687171020694974e-05, "loss": 1.0605, "step": 161200 }, { "epoch": 1.0299247409376078, "grad_norm": 0.9361954927444458, "learning_rate": 4.7682158718315036e-05, "loss": 0.7721, "step": 161210 }, { "epoch": 1.0299886280873465, "grad_norm": 0.8033797144889832, "learning_rate": 4.767714643927776e-05, "loss": 1.1211, "step": 161220 }, { "epoch": 1.0300525152370852, "grad_norm": 0.9793620109558105, "learning_rate": 4.767213418363363e-05, "loss": 0.8834, "step": 161230 }, { "epoch": 1.030116402386824, "grad_norm": 1.3734352588653564, "learning_rate": 4.766712195143313e-05, "loss": 0.8944, "step": 161240 }, { "epoch": 1.0301802895365626, "grad_norm": 1.7260838747024536, "learning_rate": 4.766210974272673e-05, "loss": 1.051, "step": 161250 }, { "epoch": 1.0302441766863013, "grad_norm": 1.3381998538970947, "learning_rate": 4.7657097557564906e-05, "loss": 1.0072, "step": 161260 }, { "epoch": 1.03030806383604, "grad_norm": 2.562208890914917, "learning_rate": 4.765208539599814e-05, "loss": 1.1557, "step": 161270 }, { "epoch": 1.0303719509857787, "grad_norm": 0.8522435426712036, "learning_rate": 4.7647073258076905e-05, "loss": 0.8596, "step": 161280 }, { "epoch": 1.0304358381355174, "grad_norm": 0.8830184936523438, "learning_rate": 4.764206114385167e-05, "loss": 0.7747, "step": 161290 }, { "epoch": 1.0304997252852561, "grad_norm": 0.8866000175476074, "learning_rate": 4.763704905337292e-05, "loss": 0.8545, "step": 161300 }, { "epoch": 1.0305636124349948, "grad_norm": 0.7133574485778809, "learning_rate": 4.7632036986691136e-05, "loss": 1.0307, "step": 161310 }, { "epoch": 1.0306274995847335, "grad_norm": 0.8143234252929688, "learning_rate": 4.762702494385679e-05, "loss": 0.8395, "step": 161320 }, { "epoch": 1.0306913867344722, "grad_norm": 0.9006192088127136, "learning_rate": 4.762201292492035e-05, "loss": 0.9308, "step": 161330 }, { "epoch": 1.030755273884211, "grad_norm": 0.6531468033790588, "learning_rate": 4.76170009299323e-05, "loss": 1.047, "step": 161340 }, { "epoch": 1.0308191610339497, "grad_norm": 1.5346288681030273, "learning_rate": 4.7611988958943114e-05, "loss": 0.6627, "step": 161350 }, { "epoch": 1.0308830481836884, "grad_norm": 0.6416158080101013, "learning_rate": 4.760697701200326e-05, "loss": 1.0079, "step": 161360 }, { "epoch": 1.030946935333427, "grad_norm": 1.5637730360031128, "learning_rate": 4.760196508916323e-05, "loss": 0.8233, "step": 161370 }, { "epoch": 1.0310108224831658, "grad_norm": 0.7167168855667114, "learning_rate": 4.759695319047347e-05, "loss": 0.8423, "step": 161380 }, { "epoch": 1.0310747096329045, "grad_norm": 0.7242134213447571, "learning_rate": 4.7591941315984475e-05, "loss": 0.8862, "step": 161390 }, { "epoch": 1.0311385967826432, "grad_norm": 0.7080520391464233, "learning_rate": 4.758692946574672e-05, "loss": 0.6676, "step": 161400 }, { "epoch": 1.031202483932382, "grad_norm": 0.8707887530326843, "learning_rate": 4.7581917639810666e-05, "loss": 0.7589, "step": 161410 }, { "epoch": 1.0312663710821206, "grad_norm": 0.955916702747345, "learning_rate": 4.7576905838226795e-05, "loss": 0.7692, "step": 161420 }, { "epoch": 1.0313302582318593, "grad_norm": 1.247024416923523, "learning_rate": 4.7571894061045584e-05, "loss": 0.8207, "step": 161430 }, { "epoch": 1.031394145381598, "grad_norm": 0.6420993208885193, "learning_rate": 4.756688230831748e-05, "loss": 0.8466, "step": 161440 }, { "epoch": 1.0314580325313367, "grad_norm": 1.2500863075256348, "learning_rate": 4.7561870580093e-05, "loss": 0.9945, "step": 161450 }, { "epoch": 1.0315219196810754, "grad_norm": 0.887197732925415, "learning_rate": 4.755685887642258e-05, "loss": 0.649, "step": 161460 }, { "epoch": 1.0315858068308141, "grad_norm": 0.8526789546012878, "learning_rate": 4.7551847197356715e-05, "loss": 0.8747, "step": 161470 }, { "epoch": 1.0316496939805528, "grad_norm": 2.258584499359131, "learning_rate": 4.754683554294586e-05, "loss": 1.0596, "step": 161480 }, { "epoch": 1.0317135811302915, "grad_norm": 1.2613749504089355, "learning_rate": 4.7541823913240494e-05, "loss": 0.8289, "step": 161490 }, { "epoch": 1.0317774682800303, "grad_norm": 0.9289235472679138, "learning_rate": 4.753681230829109e-05, "loss": 0.7338, "step": 161500 }, { "epoch": 1.031841355429769, "grad_norm": 1.704034447669983, "learning_rate": 4.753180072814812e-05, "loss": 0.7966, "step": 161510 }, { "epoch": 1.0319052425795077, "grad_norm": 0.9457545876502991, "learning_rate": 4.752678917286205e-05, "loss": 0.8958, "step": 161520 }, { "epoch": 1.0319691297292461, "grad_norm": 0.8356863856315613, "learning_rate": 4.752177764248335e-05, "loss": 0.8386, "step": 161530 }, { "epoch": 1.0320330168789849, "grad_norm": 0.8406546711921692, "learning_rate": 4.751676613706249e-05, "loss": 0.9743, "step": 161540 }, { "epoch": 1.0320969040287236, "grad_norm": 0.7396709322929382, "learning_rate": 4.751175465664996e-05, "loss": 0.7308, "step": 161550 }, { "epoch": 1.0321607911784623, "grad_norm": 0.9812073707580566, "learning_rate": 4.7506743201296195e-05, "loss": 0.8928, "step": 161560 }, { "epoch": 1.032224678328201, "grad_norm": 1.6784000396728516, "learning_rate": 4.750173177105169e-05, "loss": 0.9561, "step": 161570 }, { "epoch": 1.0322885654779397, "grad_norm": 0.9261510372161865, "learning_rate": 4.7496720365966904e-05, "loss": 0.6682, "step": 161580 }, { "epoch": 1.0323524526276784, "grad_norm": 0.7271578311920166, "learning_rate": 4.749170898609231e-05, "loss": 0.9699, "step": 161590 }, { "epoch": 1.032416339777417, "grad_norm": 1.4668047428131104, "learning_rate": 4.7486697631478376e-05, "loss": 0.8962, "step": 161600 }, { "epoch": 1.0324802269271558, "grad_norm": 1.041753888130188, "learning_rate": 4.748168630217557e-05, "loss": 0.7792, "step": 161610 }, { "epoch": 1.0325441140768945, "grad_norm": 0.7902843952178955, "learning_rate": 4.7476674998234374e-05, "loss": 0.6976, "step": 161620 }, { "epoch": 1.0326080012266332, "grad_norm": 0.85538250207901, "learning_rate": 4.747166371970523e-05, "loss": 1.0145, "step": 161630 }, { "epoch": 1.032671888376372, "grad_norm": 0.8777297735214233, "learning_rate": 4.746665246663862e-05, "loss": 0.7155, "step": 161640 }, { "epoch": 1.0327357755261106, "grad_norm": 1.4333163499832153, "learning_rate": 4.746164123908502e-05, "loss": 0.7761, "step": 161650 }, { "epoch": 1.0327996626758493, "grad_norm": 1.410051941871643, "learning_rate": 4.7456630037094884e-05, "loss": 1.1004, "step": 161660 }, { "epoch": 1.032863549825588, "grad_norm": 0.6612430214881897, "learning_rate": 4.745161886071868e-05, "loss": 0.8089, "step": 161670 }, { "epoch": 1.0329274369753267, "grad_norm": 0.8079758882522583, "learning_rate": 4.744660771000688e-05, "loss": 0.8166, "step": 161680 }, { "epoch": 1.0329913241250654, "grad_norm": 1.0633221864700317, "learning_rate": 4.744159658500996e-05, "loss": 0.8759, "step": 161690 }, { "epoch": 1.0330552112748042, "grad_norm": 0.620724618434906, "learning_rate": 4.743658548577836e-05, "loss": 0.8089, "step": 161700 }, { "epoch": 1.0331190984245429, "grad_norm": 0.9149288535118103, "learning_rate": 4.743157441236257e-05, "loss": 0.7886, "step": 161710 }, { "epoch": 1.0331829855742816, "grad_norm": 1.0109933614730835, "learning_rate": 4.742656336481305e-05, "loss": 0.9348, "step": 161720 }, { "epoch": 1.0332468727240203, "grad_norm": 0.9548466205596924, "learning_rate": 4.742155234318026e-05, "loss": 0.8496, "step": 161730 }, { "epoch": 1.033310759873759, "grad_norm": 0.7352958917617798, "learning_rate": 4.741654134751467e-05, "loss": 0.8121, "step": 161740 }, { "epoch": 1.0333746470234977, "grad_norm": 0.6567855477333069, "learning_rate": 4.7411530377866745e-05, "loss": 1.0109, "step": 161750 }, { "epoch": 1.0334385341732364, "grad_norm": 1.1146572828292847, "learning_rate": 4.740651943428693e-05, "loss": 1.097, "step": 161760 }, { "epoch": 1.033502421322975, "grad_norm": 1.0022268295288086, "learning_rate": 4.740150851682572e-05, "loss": 0.8517, "step": 161770 }, { "epoch": 1.0335663084727138, "grad_norm": 1.1390724182128906, "learning_rate": 4.739649762553358e-05, "loss": 0.9655, "step": 161780 }, { "epoch": 1.0336301956224525, "grad_norm": 0.6415576934814453, "learning_rate": 4.7391486760460946e-05, "loss": 0.9868, "step": 161790 }, { "epoch": 1.0336940827721912, "grad_norm": 0.741622269153595, "learning_rate": 4.73864759216583e-05, "loss": 1.0249, "step": 161800 }, { "epoch": 1.03375796992193, "grad_norm": 0.6062465310096741, "learning_rate": 4.738146510917611e-05, "loss": 0.9439, "step": 161810 }, { "epoch": 1.0338218570716686, "grad_norm": 1.0667154788970947, "learning_rate": 4.737645432306483e-05, "loss": 0.9751, "step": 161820 }, { "epoch": 1.0338857442214073, "grad_norm": 0.8074294328689575, "learning_rate": 4.737144356337492e-05, "loss": 0.779, "step": 161830 }, { "epoch": 1.033949631371146, "grad_norm": 0.8012452125549316, "learning_rate": 4.736643283015685e-05, "loss": 0.7509, "step": 161840 }, { "epoch": 1.0340135185208847, "grad_norm": 0.5732237696647644, "learning_rate": 4.736142212346108e-05, "loss": 0.8607, "step": 161850 }, { "epoch": 1.0340774056706235, "grad_norm": 6.360764503479004, "learning_rate": 4.7356411443338064e-05, "loss": 0.7453, "step": 161860 }, { "epoch": 1.0341412928203622, "grad_norm": 0.8119571208953857, "learning_rate": 4.735140078983828e-05, "loss": 0.9051, "step": 161870 }, { "epoch": 1.0342051799701009, "grad_norm": 0.9205659031867981, "learning_rate": 4.7346390163012186e-05, "loss": 0.89, "step": 161880 }, { "epoch": 1.0342690671198396, "grad_norm": 0.8405207395553589, "learning_rate": 4.734137956291023e-05, "loss": 0.9451, "step": 161890 }, { "epoch": 1.0343329542695783, "grad_norm": 0.6272563934326172, "learning_rate": 4.733636898958289e-05, "loss": 0.9643, "step": 161900 }, { "epoch": 1.034396841419317, "grad_norm": 1.0878784656524658, "learning_rate": 4.733135844308061e-05, "loss": 0.9135, "step": 161910 }, { "epoch": 1.0344607285690557, "grad_norm": 1.5157370567321777, "learning_rate": 4.732634792345386e-05, "loss": 0.62, "step": 161920 }, { "epoch": 1.0345246157187944, "grad_norm": 0.5053197145462036, "learning_rate": 4.73213374307531e-05, "loss": 0.711, "step": 161930 }, { "epoch": 1.034588502868533, "grad_norm": 0.8659653663635254, "learning_rate": 4.7316326965028795e-05, "loss": 0.9249, "step": 161940 }, { "epoch": 1.0346523900182718, "grad_norm": 1.141728162765503, "learning_rate": 4.731131652633139e-05, "loss": 0.9143, "step": 161950 }, { "epoch": 1.0347162771680105, "grad_norm": 1.3741239309310913, "learning_rate": 4.730630611471137e-05, "loss": 0.6867, "step": 161960 }, { "epoch": 1.0347801643177492, "grad_norm": 0.9329667091369629, "learning_rate": 4.7301295730219156e-05, "loss": 0.8827, "step": 161970 }, { "epoch": 1.034844051467488, "grad_norm": 0.9854671359062195, "learning_rate": 4.7296285372905234e-05, "loss": 0.8048, "step": 161980 }, { "epoch": 1.0349079386172266, "grad_norm": 0.6096177101135254, "learning_rate": 4.729127504282007e-05, "loss": 0.8919, "step": 161990 }, { "epoch": 1.0349718257669653, "grad_norm": 0.7038993239402771, "learning_rate": 4.7286264740014105e-05, "loss": 0.8944, "step": 162000 }, { "epoch": 1.035035712916704, "grad_norm": 0.6950554847717285, "learning_rate": 4.7281254464537796e-05, "loss": 0.9064, "step": 162010 }, { "epoch": 1.0350996000664425, "grad_norm": 1.203827977180481, "learning_rate": 4.7276244216441604e-05, "loss": 0.8495, "step": 162020 }, { "epoch": 1.0351634872161812, "grad_norm": 1.0846861600875854, "learning_rate": 4.7271233995776e-05, "loss": 0.8045, "step": 162030 }, { "epoch": 1.03522737436592, "grad_norm": 1.1663875579833984, "learning_rate": 4.726622380259145e-05, "loss": 1.0716, "step": 162040 }, { "epoch": 1.0352912615156586, "grad_norm": 1.1435000896453857, "learning_rate": 4.726121363693837e-05, "loss": 0.9198, "step": 162050 }, { "epoch": 1.0353551486653974, "grad_norm": 1.8560707569122314, "learning_rate": 4.725620349886723e-05, "loss": 1.0904, "step": 162060 }, { "epoch": 1.035419035815136, "grad_norm": 0.8612160086631775, "learning_rate": 4.72511933884285e-05, "loss": 1.0926, "step": 162070 }, { "epoch": 1.0354829229648748, "grad_norm": 0.7381712794303894, "learning_rate": 4.724618330567262e-05, "loss": 0.9483, "step": 162080 }, { "epoch": 1.0355468101146135, "grad_norm": 0.8830359578132629, "learning_rate": 4.724117325065007e-05, "loss": 0.8329, "step": 162090 }, { "epoch": 1.0356106972643522, "grad_norm": 0.5636923313140869, "learning_rate": 4.723616322341128e-05, "loss": 0.9412, "step": 162100 }, { "epoch": 1.0356745844140909, "grad_norm": 1.070669174194336, "learning_rate": 4.723115322400673e-05, "loss": 0.8123, "step": 162110 }, { "epoch": 1.0357384715638296, "grad_norm": 0.564687967300415, "learning_rate": 4.7226143252486857e-05, "loss": 0.8417, "step": 162120 }, { "epoch": 1.0358023587135683, "grad_norm": 0.7282418012619019, "learning_rate": 4.7221133308902126e-05, "loss": 1.1135, "step": 162130 }, { "epoch": 1.035866245863307, "grad_norm": 1.1398917436599731, "learning_rate": 4.7216123393302984e-05, "loss": 1.1587, "step": 162140 }, { "epoch": 1.0359301330130457, "grad_norm": 0.6934093832969666, "learning_rate": 4.721111350573989e-05, "loss": 0.8892, "step": 162150 }, { "epoch": 1.0359940201627844, "grad_norm": 1.1826916933059692, "learning_rate": 4.7206103646263286e-05, "loss": 0.8076, "step": 162160 }, { "epoch": 1.0360579073125231, "grad_norm": 1.2234805822372437, "learning_rate": 4.720109381492365e-05, "loss": 0.8936, "step": 162170 }, { "epoch": 1.0361217944622618, "grad_norm": 0.7950318455696106, "learning_rate": 4.719608401177141e-05, "loss": 0.9691, "step": 162180 }, { "epoch": 1.0361856816120005, "grad_norm": 0.8671907186508179, "learning_rate": 4.7191074236857034e-05, "loss": 0.7834, "step": 162190 }, { "epoch": 1.0362495687617392, "grad_norm": 1.9488028287887573, "learning_rate": 4.718606449023097e-05, "loss": 0.9613, "step": 162200 }, { "epoch": 1.036313455911478, "grad_norm": 0.8187459111213684, "learning_rate": 4.7181054771943675e-05, "loss": 0.8139, "step": 162210 }, { "epoch": 1.0363773430612166, "grad_norm": 0.9009087085723877, "learning_rate": 4.717604508204559e-05, "loss": 0.9519, "step": 162220 }, { "epoch": 1.0364412302109554, "grad_norm": 0.9934369325637817, "learning_rate": 4.717103542058717e-05, "loss": 0.8313, "step": 162230 }, { "epoch": 1.036505117360694, "grad_norm": 1.1523507833480835, "learning_rate": 4.716602578761888e-05, "loss": 0.7647, "step": 162240 }, { "epoch": 1.0365690045104328, "grad_norm": 0.9399502277374268, "learning_rate": 4.7161016183191165e-05, "loss": 0.6769, "step": 162250 }, { "epoch": 1.0366328916601715, "grad_norm": 1.1942522525787354, "learning_rate": 4.715600660735446e-05, "loss": 0.9192, "step": 162260 }, { "epoch": 1.0366967788099102, "grad_norm": 0.9985781908035278, "learning_rate": 4.715099706015924e-05, "loss": 0.8788, "step": 162270 }, { "epoch": 1.0367606659596489, "grad_norm": 0.9958245754241943, "learning_rate": 4.7145987541655937e-05, "loss": 0.8654, "step": 162280 }, { "epoch": 1.0368245531093876, "grad_norm": 0.8271211981773376, "learning_rate": 4.7140978051895006e-05, "loss": 0.7453, "step": 162290 }, { "epoch": 1.0368884402591263, "grad_norm": 0.8055025339126587, "learning_rate": 4.713596859092691e-05, "loss": 0.8883, "step": 162300 }, { "epoch": 1.036952327408865, "grad_norm": 0.7597174644470215, "learning_rate": 4.713095915880208e-05, "loss": 0.7735, "step": 162310 }, { "epoch": 1.0370162145586037, "grad_norm": 1.102772831916809, "learning_rate": 4.7125949755570976e-05, "loss": 0.9174, "step": 162320 }, { "epoch": 1.0370801017083424, "grad_norm": 0.8158230781555176, "learning_rate": 4.712094038128405e-05, "loss": 1.0066, "step": 162330 }, { "epoch": 1.0371439888580811, "grad_norm": 1.1537375450134277, "learning_rate": 4.7115931035991737e-05, "loss": 0.9317, "step": 162340 }, { "epoch": 1.0372078760078198, "grad_norm": 1.2150251865386963, "learning_rate": 4.7110921719744496e-05, "loss": 0.8097, "step": 162350 }, { "epoch": 1.0372717631575585, "grad_norm": 1.1560499668121338, "learning_rate": 4.7105912432592776e-05, "loss": 1.0273, "step": 162360 }, { "epoch": 1.0373356503072972, "grad_norm": 0.8007164001464844, "learning_rate": 4.710090317458702e-05, "loss": 0.8984, "step": 162370 }, { "epoch": 1.037399537457036, "grad_norm": 1.2273977994918823, "learning_rate": 4.7095893945777675e-05, "loss": 0.6576, "step": 162380 }, { "epoch": 1.0374634246067747, "grad_norm": 0.8803560733795166, "learning_rate": 4.709088474621519e-05, "loss": 0.6075, "step": 162390 }, { "epoch": 1.0375273117565134, "grad_norm": 1.3692848682403564, "learning_rate": 4.7085875575950014e-05, "loss": 1.1301, "step": 162400 }, { "epoch": 1.037591198906252, "grad_norm": 0.5952280163764954, "learning_rate": 4.70808664350326e-05, "loss": 0.958, "step": 162410 }, { "epoch": 1.0376550860559908, "grad_norm": 0.9508340954780579, "learning_rate": 4.7075857323513375e-05, "loss": 0.881, "step": 162420 }, { "epoch": 1.0377189732057295, "grad_norm": 1.1710052490234375, "learning_rate": 4.70708482414428e-05, "loss": 0.8735, "step": 162430 }, { "epoch": 1.0377828603554682, "grad_norm": 0.9404975175857544, "learning_rate": 4.706583918887131e-05, "loss": 1.0104, "step": 162440 }, { "epoch": 1.037846747505207, "grad_norm": 2.7931301593780518, "learning_rate": 4.706083016584937e-05, "loss": 0.9396, "step": 162450 }, { "epoch": 1.0379106346549456, "grad_norm": 0.9627763628959656, "learning_rate": 4.705582117242741e-05, "loss": 1.0136, "step": 162460 }, { "epoch": 1.0379745218046843, "grad_norm": 0.7976264953613281, "learning_rate": 4.705081220865587e-05, "loss": 0.8678, "step": 162470 }, { "epoch": 1.038038408954423, "grad_norm": 1.0382015705108643, "learning_rate": 4.7045803274585216e-05, "loss": 0.9829, "step": 162480 }, { "epoch": 1.0381022961041617, "grad_norm": 0.73431795835495, "learning_rate": 4.704079437026586e-05, "loss": 0.7218, "step": 162490 }, { "epoch": 1.0381661832539004, "grad_norm": 0.4776628613471985, "learning_rate": 4.703578549574827e-05, "loss": 0.9868, "step": 162500 }, { "epoch": 1.038230070403639, "grad_norm": 0.8821565508842468, "learning_rate": 4.703077665108289e-05, "loss": 0.785, "step": 162510 }, { "epoch": 1.0382939575533776, "grad_norm": 0.6736978888511658, "learning_rate": 4.7025767836320185e-05, "loss": 0.7856, "step": 162520 }, { "epoch": 1.0383578447031163, "grad_norm": 0.9216102361679077, "learning_rate": 4.7020759051510543e-05, "loss": 0.8021, "step": 162530 }, { "epoch": 1.038421731852855, "grad_norm": 0.7360609173774719, "learning_rate": 4.7015750296704435e-05, "loss": 0.9183, "step": 162540 }, { "epoch": 1.0384856190025937, "grad_norm": 4.63002347946167, "learning_rate": 4.70107415719523e-05, "loss": 0.8354, "step": 162550 }, { "epoch": 1.0385495061523324, "grad_norm": 0.9079681038856506, "learning_rate": 4.7005732877304594e-05, "loss": 0.7523, "step": 162560 }, { "epoch": 1.0386133933020711, "grad_norm": 1.2179920673370361, "learning_rate": 4.700072421281174e-05, "loss": 0.7573, "step": 162570 }, { "epoch": 1.0386772804518098, "grad_norm": 0.7866430282592773, "learning_rate": 4.699571557852419e-05, "loss": 0.7788, "step": 162580 }, { "epoch": 1.0387411676015486, "grad_norm": 0.9179165959358215, "learning_rate": 4.6990706974492385e-05, "loss": 1.2034, "step": 162590 }, { "epoch": 1.0388050547512873, "grad_norm": 0.9241698384284973, "learning_rate": 4.698569840076676e-05, "loss": 1.0294, "step": 162600 }, { "epoch": 1.038868941901026, "grad_norm": 0.9719100594520569, "learning_rate": 4.698068985739775e-05, "loss": 0.7556, "step": 162610 }, { "epoch": 1.0389328290507647, "grad_norm": 1.2796732187271118, "learning_rate": 4.6975681344435824e-05, "loss": 0.9342, "step": 162620 }, { "epoch": 1.0389967162005034, "grad_norm": 1.0012308359146118, "learning_rate": 4.69706728619314e-05, "loss": 1.0237, "step": 162630 }, { "epoch": 1.039060603350242, "grad_norm": 2.034093141555786, "learning_rate": 4.6965664409934915e-05, "loss": 0.7937, "step": 162640 }, { "epoch": 1.0391244904999808, "grad_norm": 0.9703361988067627, "learning_rate": 4.696065598849682e-05, "loss": 1.0253, "step": 162650 }, { "epoch": 1.0391883776497195, "grad_norm": 0.8073797821998596, "learning_rate": 4.695564759766754e-05, "loss": 0.9453, "step": 162660 }, { "epoch": 1.0392522647994582, "grad_norm": 1.053033471107483, "learning_rate": 4.6950639237497526e-05, "loss": 1.2163, "step": 162670 }, { "epoch": 1.039316151949197, "grad_norm": 1.111161470413208, "learning_rate": 4.694563090803722e-05, "loss": 0.759, "step": 162680 }, { "epoch": 1.0393800390989356, "grad_norm": 0.8086190819740295, "learning_rate": 4.6940622609337046e-05, "loss": 1.0759, "step": 162690 }, { "epoch": 1.0394439262486743, "grad_norm": 1.1267038583755493, "learning_rate": 4.6935614341447455e-05, "loss": 1.0972, "step": 162700 }, { "epoch": 1.039507813398413, "grad_norm": 0.6237444877624512, "learning_rate": 4.693060610441887e-05, "loss": 0.8735, "step": 162710 }, { "epoch": 1.0395717005481517, "grad_norm": 1.0405194759368896, "learning_rate": 4.6925597898301746e-05, "loss": 0.9158, "step": 162720 }, { "epoch": 1.0396355876978904, "grad_norm": 0.8615335822105408, "learning_rate": 4.692058972314651e-05, "loss": 0.9571, "step": 162730 }, { "epoch": 1.0396994748476291, "grad_norm": 1.0155763626098633, "learning_rate": 4.691558157900359e-05, "loss": 0.9306, "step": 162740 }, { "epoch": 1.0397633619973679, "grad_norm": 1.2488973140716553, "learning_rate": 4.691057346592344e-05, "loss": 1.2076, "step": 162750 }, { "epoch": 1.0398272491471066, "grad_norm": 0.745689332485199, "learning_rate": 4.690556538395648e-05, "loss": 0.7862, "step": 162760 }, { "epoch": 1.0398911362968453, "grad_norm": 0.8243439793586731, "learning_rate": 4.690055733315317e-05, "loss": 0.6247, "step": 162770 }, { "epoch": 1.039955023446584, "grad_norm": 1.0167168378829956, "learning_rate": 4.689554931356391e-05, "loss": 0.8157, "step": 162780 }, { "epoch": 1.0400189105963227, "grad_norm": 0.9779410362243652, "learning_rate": 4.689054132523917e-05, "loss": 0.8555, "step": 162790 }, { "epoch": 1.0400827977460614, "grad_norm": 0.9805686473846436, "learning_rate": 4.688553336822936e-05, "loss": 0.7161, "step": 162800 }, { "epoch": 1.0401466848958, "grad_norm": 0.9556584358215332, "learning_rate": 4.688052544258493e-05, "loss": 0.7346, "step": 162810 }, { "epoch": 1.0402105720455388, "grad_norm": 0.589160680770874, "learning_rate": 4.687551754835629e-05, "loss": 0.8524, "step": 162820 }, { "epoch": 1.0402744591952775, "grad_norm": 0.9270586371421814, "learning_rate": 4.6870509685593905e-05, "loss": 0.8096, "step": 162830 }, { "epoch": 1.0403383463450162, "grad_norm": 0.8865234851837158, "learning_rate": 4.686550185434819e-05, "loss": 1.0351, "step": 162840 }, { "epoch": 1.040402233494755, "grad_norm": 0.7455191612243652, "learning_rate": 4.6860494054669593e-05, "loss": 0.9099, "step": 162850 }, { "epoch": 1.0404661206444936, "grad_norm": 1.0902010202407837, "learning_rate": 4.6855486286608526e-05, "loss": 0.9958, "step": 162860 }, { "epoch": 1.0405300077942323, "grad_norm": 0.9043089747428894, "learning_rate": 4.685047855021544e-05, "loss": 0.7603, "step": 162870 }, { "epoch": 1.040593894943971, "grad_norm": 1.1999502182006836, "learning_rate": 4.6845470845540755e-05, "loss": 0.6993, "step": 162880 }, { "epoch": 1.0406577820937097, "grad_norm": 0.9678430557250977, "learning_rate": 4.6840463172634915e-05, "loss": 0.8959, "step": 162890 }, { "epoch": 1.0407216692434484, "grad_norm": 1.4664934873580933, "learning_rate": 4.6835455531548336e-05, "loss": 0.7311, "step": 162900 }, { "epoch": 1.0407855563931872, "grad_norm": 1.0061843395233154, "learning_rate": 4.683044792233146e-05, "loss": 0.7976, "step": 162910 }, { "epoch": 1.0408494435429259, "grad_norm": 1.5385732650756836, "learning_rate": 4.6825440345034714e-05, "loss": 0.7237, "step": 162920 }, { "epoch": 1.0409133306926646, "grad_norm": 1.0533753633499146, "learning_rate": 4.6820432799708536e-05, "loss": 1.3697, "step": 162930 }, { "epoch": 1.0409772178424033, "grad_norm": 0.6327069401741028, "learning_rate": 4.681542528640335e-05, "loss": 0.9806, "step": 162940 }, { "epoch": 1.041041104992142, "grad_norm": 0.9591493010520935, "learning_rate": 4.681041780516958e-05, "loss": 0.9627, "step": 162950 }, { "epoch": 1.0411049921418807, "grad_norm": 0.9499560594558716, "learning_rate": 4.680541035605766e-05, "loss": 0.986, "step": 162960 }, { "epoch": 1.0411688792916194, "grad_norm": 1.0875968933105469, "learning_rate": 4.680040293911803e-05, "loss": 1.0011, "step": 162970 }, { "epoch": 1.0412327664413579, "grad_norm": 0.7869101166725159, "learning_rate": 4.679539555440111e-05, "loss": 1.1579, "step": 162980 }, { "epoch": 1.0412966535910968, "grad_norm": 0.9354702234268188, "learning_rate": 4.6790388201957326e-05, "loss": 0.7806, "step": 162990 }, { "epoch": 1.0413605407408353, "grad_norm": 1.113673448562622, "learning_rate": 4.678538088183711e-05, "loss": 0.6678, "step": 163000 }, { "epoch": 1.041424427890574, "grad_norm": 1.2135668992996216, "learning_rate": 4.67803735940909e-05, "loss": 1.1268, "step": 163010 }, { "epoch": 1.0414883150403127, "grad_norm": 1.2587641477584839, "learning_rate": 4.6775366338769106e-05, "loss": 0.8575, "step": 163020 }, { "epoch": 1.0415522021900514, "grad_norm": 0.9545971155166626, "learning_rate": 4.677035911592216e-05, "loss": 0.682, "step": 163030 }, { "epoch": 1.04161608933979, "grad_norm": 0.8569166660308838, "learning_rate": 4.676535192560049e-05, "loss": 1.0957, "step": 163040 }, { "epoch": 1.0416799764895288, "grad_norm": 1.1652165651321411, "learning_rate": 4.6760344767854524e-05, "loss": 0.9504, "step": 163050 }, { "epoch": 1.0417438636392675, "grad_norm": 1.0948913097381592, "learning_rate": 4.675533764273469e-05, "loss": 1.0243, "step": 163060 }, { "epoch": 1.0418077507890062, "grad_norm": 1.0545660257339478, "learning_rate": 4.6750330550291414e-05, "loss": 1.0981, "step": 163070 }, { "epoch": 1.041871637938745, "grad_norm": 0.7978355884552002, "learning_rate": 4.674532349057512e-05, "loss": 0.7644, "step": 163080 }, { "epoch": 1.0419355250884836, "grad_norm": 1.381232738494873, "learning_rate": 4.674031646363624e-05, "loss": 0.8781, "step": 163090 }, { "epoch": 1.0419994122382223, "grad_norm": 0.7706129550933838, "learning_rate": 4.673530946952518e-05, "loss": 0.8316, "step": 163100 }, { "epoch": 1.042063299387961, "grad_norm": 1.1203925609588623, "learning_rate": 4.673030250829239e-05, "loss": 1.0133, "step": 163110 }, { "epoch": 1.0421271865376998, "grad_norm": 1.0205503702163696, "learning_rate": 4.672529557998828e-05, "loss": 0.858, "step": 163120 }, { "epoch": 1.0421910736874385, "grad_norm": 0.9623286128044128, "learning_rate": 4.672028868466327e-05, "loss": 0.8325, "step": 163130 }, { "epoch": 1.0422549608371772, "grad_norm": 1.5411490201950073, "learning_rate": 4.6715281822367786e-05, "loss": 0.8526, "step": 163140 }, { "epoch": 1.0423188479869159, "grad_norm": 1.3137603998184204, "learning_rate": 4.6710274993152264e-05, "loss": 0.7788, "step": 163150 }, { "epoch": 1.0423827351366546, "grad_norm": 0.8402478098869324, "learning_rate": 4.670526819706712e-05, "loss": 0.8678, "step": 163160 }, { "epoch": 1.0424466222863933, "grad_norm": 1.447865605354309, "learning_rate": 4.6700261434162774e-05, "loss": 1.0133, "step": 163170 }, { "epoch": 1.042510509436132, "grad_norm": 0.958998441696167, "learning_rate": 4.669525470448965e-05, "loss": 1.0255, "step": 163180 }, { "epoch": 1.0425743965858707, "grad_norm": 0.8727611303329468, "learning_rate": 4.669024800809817e-05, "loss": 0.7536, "step": 163190 }, { "epoch": 1.0426382837356094, "grad_norm": 1.4122403860092163, "learning_rate": 4.668524134503875e-05, "loss": 0.8819, "step": 163200 }, { "epoch": 1.0427021708853481, "grad_norm": 1.0412826538085938, "learning_rate": 4.668023471536183e-05, "loss": 0.7679, "step": 163210 }, { "epoch": 1.0427660580350868, "grad_norm": 0.7801997065544128, "learning_rate": 4.6675228119117816e-05, "loss": 0.8101, "step": 163220 }, { "epoch": 1.0428299451848255, "grad_norm": 0.6483613848686218, "learning_rate": 4.667022155635713e-05, "loss": 1.0608, "step": 163230 }, { "epoch": 1.0428938323345642, "grad_norm": 0.8273327946662903, "learning_rate": 4.666521502713019e-05, "loss": 1.1278, "step": 163240 }, { "epoch": 1.042957719484303, "grad_norm": 0.8206183910369873, "learning_rate": 4.666020853148742e-05, "loss": 0.8313, "step": 163250 }, { "epoch": 1.0430216066340416, "grad_norm": 1.0390762090682983, "learning_rate": 4.6655202069479245e-05, "loss": 0.8014, "step": 163260 }, { "epoch": 1.0430854937837803, "grad_norm": 1.1970301866531372, "learning_rate": 4.665019564115607e-05, "loss": 0.7897, "step": 163270 }, { "epoch": 1.043149380933519, "grad_norm": 0.8434377908706665, "learning_rate": 4.6645189246568325e-05, "loss": 0.892, "step": 163280 }, { "epoch": 1.0432132680832578, "grad_norm": 0.9261834025382996, "learning_rate": 4.6640182885766434e-05, "loss": 0.858, "step": 163290 }, { "epoch": 1.0432771552329965, "grad_norm": 0.863802433013916, "learning_rate": 4.66351765588008e-05, "loss": 0.9973, "step": 163300 }, { "epoch": 1.0433410423827352, "grad_norm": 1.4032562971115112, "learning_rate": 4.663017026572185e-05, "loss": 0.8758, "step": 163310 }, { "epoch": 1.0434049295324739, "grad_norm": 0.7954492568969727, "learning_rate": 4.6625164006580006e-05, "loss": 0.7323, "step": 163320 }, { "epoch": 1.0434688166822126, "grad_norm": 0.5905239582061768, "learning_rate": 4.662015778142568e-05, "loss": 0.854, "step": 163330 }, { "epoch": 1.0435327038319513, "grad_norm": 0.6909790635108948, "learning_rate": 4.661515159030927e-05, "loss": 0.8778, "step": 163340 }, { "epoch": 1.04359659098169, "grad_norm": 2.076235055923462, "learning_rate": 4.6610145433281225e-05, "loss": 1.1878, "step": 163350 }, { "epoch": 1.0436604781314287, "grad_norm": 0.9670212268829346, "learning_rate": 4.6605139310391956e-05, "loss": 1.1208, "step": 163360 }, { "epoch": 1.0437243652811674, "grad_norm": 0.9897720813751221, "learning_rate": 4.6600133221691865e-05, "loss": 0.92, "step": 163370 }, { "epoch": 1.0437882524309061, "grad_norm": 0.831823468208313, "learning_rate": 4.659512716723138e-05, "loss": 1.0368, "step": 163380 }, { "epoch": 1.0438521395806448, "grad_norm": 1.1693984270095825, "learning_rate": 4.65901211470609e-05, "loss": 0.9192, "step": 163390 }, { "epoch": 1.0439160267303835, "grad_norm": 1.2230675220489502, "learning_rate": 4.6585115161230855e-05, "loss": 0.8708, "step": 163400 }, { "epoch": 1.0439799138801222, "grad_norm": 0.6010975241661072, "learning_rate": 4.658010920979165e-05, "loss": 0.7601, "step": 163410 }, { "epoch": 1.044043801029861, "grad_norm": 0.9563870429992676, "learning_rate": 4.657510329279371e-05, "loss": 0.8896, "step": 163420 }, { "epoch": 1.0441076881795996, "grad_norm": 0.8813214302062988, "learning_rate": 4.657009741028745e-05, "loss": 1.0249, "step": 163430 }, { "epoch": 1.0441715753293384, "grad_norm": 1.9995386600494385, "learning_rate": 4.656509156232326e-05, "loss": 0.9217, "step": 163440 }, { "epoch": 1.044235462479077, "grad_norm": 1.1445976495742798, "learning_rate": 4.6560085748951574e-05, "loss": 0.8641, "step": 163450 }, { "epoch": 1.0442993496288158, "grad_norm": 0.9022873640060425, "learning_rate": 4.655507997022281e-05, "loss": 0.6487, "step": 163460 }, { "epoch": 1.0443632367785542, "grad_norm": 1.0987581014633179, "learning_rate": 4.6550074226187364e-05, "loss": 1.0106, "step": 163470 }, { "epoch": 1.0444271239282932, "grad_norm": 0.8926182389259338, "learning_rate": 4.654506851689566e-05, "loss": 0.9113, "step": 163480 }, { "epoch": 1.0444910110780317, "grad_norm": 0.9439060688018799, "learning_rate": 4.6540062842398106e-05, "loss": 0.7761, "step": 163490 }, { "epoch": 1.0445548982277704, "grad_norm": 1.0596351623535156, "learning_rate": 4.6535057202745105e-05, "loss": 0.7039, "step": 163500 }, { "epoch": 1.044618785377509, "grad_norm": 1.1492658853530884, "learning_rate": 4.6530051597987076e-05, "loss": 0.7879, "step": 163510 }, { "epoch": 1.0446826725272478, "grad_norm": 0.6139842867851257, "learning_rate": 4.6525046028174435e-05, "loss": 0.7387, "step": 163520 }, { "epoch": 1.0447465596769865, "grad_norm": 0.8731631636619568, "learning_rate": 4.6520040493357584e-05, "loss": 0.7721, "step": 163530 }, { "epoch": 1.0448104468267252, "grad_norm": 1.1998591423034668, "learning_rate": 4.651503499358694e-05, "loss": 0.7002, "step": 163540 }, { "epoch": 1.044874333976464, "grad_norm": 0.9016870856285095, "learning_rate": 4.65100295289129e-05, "loss": 0.707, "step": 163550 }, { "epoch": 1.0449382211262026, "grad_norm": 0.9702405333518982, "learning_rate": 4.650502409938589e-05, "loss": 0.9536, "step": 163560 }, { "epoch": 1.0450021082759413, "grad_norm": 1.6170375347137451, "learning_rate": 4.6500018705056295e-05, "loss": 0.5949, "step": 163570 }, { "epoch": 1.04506599542568, "grad_norm": 1.246148943901062, "learning_rate": 4.6495013345974555e-05, "loss": 1.0669, "step": 163580 }, { "epoch": 1.0451298825754187, "grad_norm": 0.6922361254692078, "learning_rate": 4.6490008022191056e-05, "loss": 0.7937, "step": 163590 }, { "epoch": 1.0451937697251574, "grad_norm": 0.8997225761413574, "learning_rate": 4.6485002733756214e-05, "loss": 0.9853, "step": 163600 }, { "epoch": 1.0452576568748961, "grad_norm": 0.808290421962738, "learning_rate": 4.647999748072044e-05, "loss": 0.9732, "step": 163610 }, { "epoch": 1.0453215440246348, "grad_norm": 1.9581161737442017, "learning_rate": 4.647499226313413e-05, "loss": 0.9051, "step": 163620 }, { "epoch": 1.0453854311743735, "grad_norm": 0.791298508644104, "learning_rate": 4.646998708104771e-05, "loss": 0.9813, "step": 163630 }, { "epoch": 1.0454493183241123, "grad_norm": 0.7668251395225525, "learning_rate": 4.646498193451156e-05, "loss": 0.9142, "step": 163640 }, { "epoch": 1.045513205473851, "grad_norm": 0.5910570621490479, "learning_rate": 4.6459976823576105e-05, "loss": 1.1384, "step": 163650 }, { "epoch": 1.0455770926235897, "grad_norm": 0.6951436400413513, "learning_rate": 4.645497174829173e-05, "loss": 0.8321, "step": 163660 }, { "epoch": 1.0456409797733284, "grad_norm": 1.029000997543335, "learning_rate": 4.644996670870887e-05, "loss": 0.8471, "step": 163670 }, { "epoch": 1.045704866923067, "grad_norm": 1.0844112634658813, "learning_rate": 4.644496170487792e-05, "loss": 0.8395, "step": 163680 }, { "epoch": 1.0457687540728058, "grad_norm": 1.0361007452011108, "learning_rate": 4.6439956736849284e-05, "loss": 0.9918, "step": 163690 }, { "epoch": 1.0458326412225445, "grad_norm": 0.8928366899490356, "learning_rate": 4.643495180467336e-05, "loss": 0.9797, "step": 163700 }, { "epoch": 1.0458965283722832, "grad_norm": 1.0317213535308838, "learning_rate": 4.642994690840055e-05, "loss": 0.8224, "step": 163710 }, { "epoch": 1.045960415522022, "grad_norm": 0.8236901760101318, "learning_rate": 4.6424942048081275e-05, "loss": 0.773, "step": 163720 }, { "epoch": 1.0460243026717606, "grad_norm": 1.1160997152328491, "learning_rate": 4.641993722376591e-05, "loss": 0.9447, "step": 163730 }, { "epoch": 1.0460881898214993, "grad_norm": 1.206079125404358, "learning_rate": 4.6414932435504886e-05, "loss": 0.8785, "step": 163740 }, { "epoch": 1.046152076971238, "grad_norm": 0.8513423204421997, "learning_rate": 4.64099276833486e-05, "loss": 0.8516, "step": 163750 }, { "epoch": 1.0462159641209767, "grad_norm": 0.8137949705123901, "learning_rate": 4.640492296734744e-05, "loss": 0.7112, "step": 163760 }, { "epoch": 1.0462798512707154, "grad_norm": 2.2618188858032227, "learning_rate": 4.6399918287551814e-05, "loss": 0.8609, "step": 163770 }, { "epoch": 1.0463437384204541, "grad_norm": 0.6762095093727112, "learning_rate": 4.639491364401212e-05, "loss": 0.8314, "step": 163780 }, { "epoch": 1.0464076255701928, "grad_norm": 0.492422491312027, "learning_rate": 4.638990903677878e-05, "loss": 0.726, "step": 163790 }, { "epoch": 1.0464715127199316, "grad_norm": 2.151423215866089, "learning_rate": 4.638540492135234e-05, "loss": 1.0316, "step": 163800 }, { "epoch": 1.0465353998696703, "grad_norm": 0.8146224021911621, "learning_rate": 4.638040038323989e-05, "loss": 1.0298, "step": 163810 }, { "epoch": 1.046599287019409, "grad_norm": 0.9958155751228333, "learning_rate": 4.637539588157993e-05, "loss": 0.9971, "step": 163820 }, { "epoch": 1.0466631741691477, "grad_norm": 1.1163171529769897, "learning_rate": 4.637039141642288e-05, "loss": 0.9504, "step": 163830 }, { "epoch": 1.0467270613188864, "grad_norm": 0.8440408110618591, "learning_rate": 4.6365386987819124e-05, "loss": 0.8803, "step": 163840 }, { "epoch": 1.046790948468625, "grad_norm": 1.4019627571105957, "learning_rate": 4.636038259581907e-05, "loss": 0.87, "step": 163850 }, { "epoch": 1.0468548356183638, "grad_norm": 2.8956496715545654, "learning_rate": 4.635537824047311e-05, "loss": 0.7409, "step": 163860 }, { "epoch": 1.0469187227681025, "grad_norm": 0.9600827693939209, "learning_rate": 4.6350373921831644e-05, "loss": 0.6301, "step": 163870 }, { "epoch": 1.0469826099178412, "grad_norm": 0.9884980320930481, "learning_rate": 4.634536963994506e-05, "loss": 0.8136, "step": 163880 }, { "epoch": 1.04704649706758, "grad_norm": 0.8919425010681152, "learning_rate": 4.634036539486378e-05, "loss": 0.5576, "step": 163890 }, { "epoch": 1.0471103842173186, "grad_norm": 1.3119444847106934, "learning_rate": 4.6335361186638184e-05, "loss": 1.0449, "step": 163900 }, { "epoch": 1.0471742713670573, "grad_norm": 0.822734534740448, "learning_rate": 4.633035701531867e-05, "loss": 0.8107, "step": 163910 }, { "epoch": 1.047238158516796, "grad_norm": 1.3529523611068726, "learning_rate": 4.632535288095563e-05, "loss": 0.8808, "step": 163920 }, { "epoch": 1.0473020456665347, "grad_norm": 1.056562900543213, "learning_rate": 4.6320348783599465e-05, "loss": 0.8139, "step": 163930 }, { "epoch": 1.0473659328162734, "grad_norm": 0.8713958859443665, "learning_rate": 4.631534472330058e-05, "loss": 1.2265, "step": 163940 }, { "epoch": 1.0474298199660121, "grad_norm": 1.1219075918197632, "learning_rate": 4.6310340700109355e-05, "loss": 0.798, "step": 163950 }, { "epoch": 1.0474937071157506, "grad_norm": 0.8713673949241638, "learning_rate": 4.6305336714076195e-05, "loss": 1.0113, "step": 163960 }, { "epoch": 1.0475575942654893, "grad_norm": 0.8879077434539795, "learning_rate": 4.6300332765251485e-05, "loss": 0.8673, "step": 163970 }, { "epoch": 1.047621481415228, "grad_norm": 0.7119232416152954, "learning_rate": 4.6295328853685626e-05, "loss": 0.7477, "step": 163980 }, { "epoch": 1.0476853685649667, "grad_norm": 0.6839287281036377, "learning_rate": 4.629032497942901e-05, "loss": 0.7913, "step": 163990 }, { "epoch": 1.0477492557147055, "grad_norm": 0.6696777939796448, "learning_rate": 4.628532114253203e-05, "loss": 0.652, "step": 164000 }, { "epoch": 1.0478131428644442, "grad_norm": 0.7793158292770386, "learning_rate": 4.628031734304508e-05, "loss": 0.5978, "step": 164010 }, { "epoch": 1.0478770300141829, "grad_norm": 0.8177552819252014, "learning_rate": 4.627531358101855e-05, "loss": 0.9784, "step": 164020 }, { "epoch": 1.0479409171639216, "grad_norm": 1.0433216094970703, "learning_rate": 4.6270309856502844e-05, "loss": 0.8135, "step": 164030 }, { "epoch": 1.0480048043136603, "grad_norm": 1.0402250289916992, "learning_rate": 4.6265306169548344e-05, "loss": 0.9751, "step": 164040 }, { "epoch": 1.048068691463399, "grad_norm": 0.9214003086090088, "learning_rate": 4.6260302520205434e-05, "loss": 0.9328, "step": 164050 }, { "epoch": 1.0481325786131377, "grad_norm": 0.8937646746635437, "learning_rate": 4.625529890852452e-05, "loss": 0.8227, "step": 164060 }, { "epoch": 1.0481964657628764, "grad_norm": 0.989142656326294, "learning_rate": 4.6250295334555984e-05, "loss": 0.6317, "step": 164070 }, { "epoch": 1.048260352912615, "grad_norm": 0.8708199858665466, "learning_rate": 4.6245291798350214e-05, "loss": 0.8059, "step": 164080 }, { "epoch": 1.0483242400623538, "grad_norm": 0.7595809102058411, "learning_rate": 4.624028829995761e-05, "loss": 0.8904, "step": 164090 }, { "epoch": 1.0483881272120925, "grad_norm": 0.7372194528579712, "learning_rate": 4.623528483942855e-05, "loss": 0.7613, "step": 164100 }, { "epoch": 1.0484520143618312, "grad_norm": 1.089552402496338, "learning_rate": 4.623028141681343e-05, "loss": 1.0416, "step": 164110 }, { "epoch": 1.04851590151157, "grad_norm": 1.4906351566314697, "learning_rate": 4.6225278032162647e-05, "loss": 0.7298, "step": 164120 }, { "epoch": 1.0485797886613086, "grad_norm": 0.742764413356781, "learning_rate": 4.622027468552658e-05, "loss": 0.9095, "step": 164130 }, { "epoch": 1.0486436758110473, "grad_norm": 1.2194721698760986, "learning_rate": 4.6215271376955606e-05, "loss": 1.0118, "step": 164140 }, { "epoch": 1.048707562960786, "grad_norm": 0.6740885376930237, "learning_rate": 4.621026810650012e-05, "loss": 1.0442, "step": 164150 }, { "epoch": 1.0487714501105247, "grad_norm": 0.9453091621398926, "learning_rate": 4.620526487421052e-05, "loss": 0.811, "step": 164160 }, { "epoch": 1.0488353372602635, "grad_norm": 0.7730445265769958, "learning_rate": 4.620026168013718e-05, "loss": 0.8522, "step": 164170 }, { "epoch": 1.0488992244100022, "grad_norm": 0.7625861167907715, "learning_rate": 4.61952585243305e-05, "loss": 1.0819, "step": 164180 }, { "epoch": 1.0489631115597409, "grad_norm": 0.8981362581253052, "learning_rate": 4.6190255406840855e-05, "loss": 0.9319, "step": 164190 }, { "epoch": 1.0490269987094796, "grad_norm": 0.9963375926017761, "learning_rate": 4.618525232771863e-05, "loss": 0.9983, "step": 164200 }, { "epoch": 1.0490908858592183, "grad_norm": 0.873155951499939, "learning_rate": 4.618024928701422e-05, "loss": 0.85, "step": 164210 }, { "epoch": 1.049154773008957, "grad_norm": 1.5402940511703491, "learning_rate": 4.6175246284778e-05, "loss": 0.7995, "step": 164220 }, { "epoch": 1.0492186601586957, "grad_norm": 0.8315989375114441, "learning_rate": 4.6170243321060356e-05, "loss": 0.9546, "step": 164230 }, { "epoch": 1.0492825473084344, "grad_norm": 0.9614139795303345, "learning_rate": 4.616524039591168e-05, "loss": 0.8737, "step": 164240 }, { "epoch": 1.049346434458173, "grad_norm": 0.5436043739318848, "learning_rate": 4.616023750938235e-05, "loss": 0.8369, "step": 164250 }, { "epoch": 1.0494103216079118, "grad_norm": 1.519921898841858, "learning_rate": 4.615523466152275e-05, "loss": 0.9794, "step": 164260 }, { "epoch": 1.0494742087576505, "grad_norm": 1.4589173793792725, "learning_rate": 4.6150231852383264e-05, "loss": 0.8924, "step": 164270 }, { "epoch": 1.0495380959073892, "grad_norm": 0.6913012266159058, "learning_rate": 4.6145229082014276e-05, "loss": 0.8225, "step": 164280 }, { "epoch": 1.049601983057128, "grad_norm": 1.0679293870925903, "learning_rate": 4.614022635046616e-05, "loss": 0.6774, "step": 164290 }, { "epoch": 1.0496658702068666, "grad_norm": 1.1619223356246948, "learning_rate": 4.613522365778931e-05, "loss": 0.822, "step": 164300 }, { "epoch": 1.0497297573566053, "grad_norm": 1.5839335918426514, "learning_rate": 4.6130221004034084e-05, "loss": 0.8866, "step": 164310 }, { "epoch": 1.049793644506344, "grad_norm": 0.945656418800354, "learning_rate": 4.6125218389250894e-05, "loss": 0.988, "step": 164320 }, { "epoch": 1.0498575316560828, "grad_norm": 1.125430703163147, "learning_rate": 4.612021581349011e-05, "loss": 0.8968, "step": 164330 }, { "epoch": 1.0499214188058215, "grad_norm": 0.88238126039505, "learning_rate": 4.6115213276802104e-05, "loss": 0.7057, "step": 164340 }, { "epoch": 1.0499853059555602, "grad_norm": 0.8494474291801453, "learning_rate": 4.611021077923727e-05, "loss": 0.7707, "step": 164350 }, { "epoch": 1.0500491931052989, "grad_norm": 1.4997683763504028, "learning_rate": 4.6105208320845966e-05, "loss": 1.0624, "step": 164360 }, { "epoch": 1.0501130802550376, "grad_norm": 1.758766531944275, "learning_rate": 4.61002059016786e-05, "loss": 0.8057, "step": 164370 }, { "epoch": 1.0501769674047763, "grad_norm": 1.3619985580444336, "learning_rate": 4.6095203521785516e-05, "loss": 0.7932, "step": 164380 }, { "epoch": 1.050240854554515, "grad_norm": 0.7864593863487244, "learning_rate": 4.609020118121712e-05, "loss": 0.9398, "step": 164390 }, { "epoch": 1.0503047417042537, "grad_norm": 0.868182897567749, "learning_rate": 4.6085198880023774e-05, "loss": 1.1271, "step": 164400 }, { "epoch": 1.0503686288539924, "grad_norm": 1.1137135028839111, "learning_rate": 4.608019661825587e-05, "loss": 1.1727, "step": 164410 }, { "epoch": 1.050432516003731, "grad_norm": 0.7334727048873901, "learning_rate": 4.607519439596378e-05, "loss": 1.04, "step": 164420 }, { "epoch": 1.0504964031534698, "grad_norm": 0.8515543341636658, "learning_rate": 4.607019221319787e-05, "loss": 0.7822, "step": 164430 }, { "epoch": 1.0505602903032085, "grad_norm": 0.8414304256439209, "learning_rate": 4.606519007000853e-05, "loss": 0.8864, "step": 164440 }, { "epoch": 1.050624177452947, "grad_norm": 0.7646651268005371, "learning_rate": 4.606018796644612e-05, "loss": 1.0714, "step": 164450 }, { "epoch": 1.0506880646026857, "grad_norm": 0.847108006477356, "learning_rate": 4.605518590256104e-05, "loss": 0.8618, "step": 164460 }, { "epoch": 1.0507519517524244, "grad_norm": 1.3605737686157227, "learning_rate": 4.605018387840364e-05, "loss": 0.8288, "step": 164470 }, { "epoch": 1.0508158389021631, "grad_norm": 1.32858407497406, "learning_rate": 4.604518189402431e-05, "loss": 1.0004, "step": 164480 }, { "epoch": 1.0508797260519018, "grad_norm": 0.8185241222381592, "learning_rate": 4.604017994947342e-05, "loss": 0.8581, "step": 164490 }, { "epoch": 1.0509436132016405, "grad_norm": 0.9678569436073303, "learning_rate": 4.6035178044801344e-05, "loss": 0.8152, "step": 164500 }, { "epoch": 1.0510075003513792, "grad_norm": 1.0296353101730347, "learning_rate": 4.603017618005845e-05, "loss": 0.895, "step": 164510 }, { "epoch": 1.051071387501118, "grad_norm": 0.9702723622322083, "learning_rate": 4.602517435529511e-05, "loss": 0.8082, "step": 164520 }, { "epoch": 1.0511352746508567, "grad_norm": 0.5624803304672241, "learning_rate": 4.602017257056171e-05, "loss": 0.915, "step": 164530 }, { "epoch": 1.0511991618005954, "grad_norm": 1.2291347980499268, "learning_rate": 4.6015170825908614e-05, "loss": 0.7313, "step": 164540 }, { "epoch": 1.051263048950334, "grad_norm": 0.8868616223335266, "learning_rate": 4.60101691213862e-05, "loss": 0.8421, "step": 164550 }, { "epoch": 1.0513269361000728, "grad_norm": 0.8309329152107239, "learning_rate": 4.600516745704484e-05, "loss": 0.9126, "step": 164560 }, { "epoch": 1.0513908232498115, "grad_norm": 0.7704881429672241, "learning_rate": 4.600016583293489e-05, "loss": 0.7842, "step": 164570 }, { "epoch": 1.0514547103995502, "grad_norm": 0.8722350001335144, "learning_rate": 4.599516424910673e-05, "loss": 0.8703, "step": 164580 }, { "epoch": 1.051518597549289, "grad_norm": 1.562312126159668, "learning_rate": 4.599016270561074e-05, "loss": 0.7106, "step": 164590 }, { "epoch": 1.0515824846990276, "grad_norm": 1.1941059827804565, "learning_rate": 4.5985161202497275e-05, "loss": 0.8637, "step": 164600 }, { "epoch": 1.0516463718487663, "grad_norm": 1.0434215068817139, "learning_rate": 4.598015973981673e-05, "loss": 0.8606, "step": 164610 }, { "epoch": 1.051710258998505, "grad_norm": 0.5463379621505737, "learning_rate": 4.597515831761943e-05, "loss": 0.9113, "step": 164620 }, { "epoch": 1.0517741461482437, "grad_norm": 0.5214067697525024, "learning_rate": 4.597015693595577e-05, "loss": 0.7315, "step": 164630 }, { "epoch": 1.0518380332979824, "grad_norm": 0.9531989097595215, "learning_rate": 4.596515559487611e-05, "loss": 0.8608, "step": 164640 }, { "epoch": 1.0519019204477211, "grad_norm": 0.7098289728164673, "learning_rate": 4.5960154294430836e-05, "loss": 0.898, "step": 164650 }, { "epoch": 1.0519658075974598, "grad_norm": 0.8016313314437866, "learning_rate": 4.595515303467029e-05, "loss": 0.8499, "step": 164660 }, { "epoch": 1.0520296947471985, "grad_norm": 1.1667627096176147, "learning_rate": 4.5950151815644866e-05, "loss": 0.8077, "step": 164670 }, { "epoch": 1.0520935818969372, "grad_norm": 0.8660734295845032, "learning_rate": 4.594515063740491e-05, "loss": 1.0402, "step": 164680 }, { "epoch": 1.052157469046676, "grad_norm": 0.9622820615768433, "learning_rate": 4.59401495000008e-05, "loss": 0.7553, "step": 164690 }, { "epoch": 1.0522213561964147, "grad_norm": 1.0822231769561768, "learning_rate": 4.593514840348289e-05, "loss": 0.8874, "step": 164700 }, { "epoch": 1.0522852433461534, "grad_norm": 0.6997173428535461, "learning_rate": 4.5930147347901556e-05, "loss": 0.9951, "step": 164710 }, { "epoch": 1.052349130495892, "grad_norm": 0.49910104274749756, "learning_rate": 4.5925146333307164e-05, "loss": 0.8204, "step": 164720 }, { "epoch": 1.0524130176456308, "grad_norm": 2.026914358139038, "learning_rate": 4.592014535975007e-05, "loss": 1.1756, "step": 164730 }, { "epoch": 1.0524769047953695, "grad_norm": 0.9706935286521912, "learning_rate": 4.591514442728064e-05, "loss": 0.9323, "step": 164740 }, { "epoch": 1.0525407919451082, "grad_norm": 1.4648338556289673, "learning_rate": 4.591014353594923e-05, "loss": 0.7388, "step": 164750 }, { "epoch": 1.052604679094847, "grad_norm": 1.224882960319519, "learning_rate": 4.5905142685806226e-05, "loss": 0.7921, "step": 164760 }, { "epoch": 1.0526685662445856, "grad_norm": 0.6611737012863159, "learning_rate": 4.590014187690198e-05, "loss": 0.7512, "step": 164770 }, { "epoch": 1.0527324533943243, "grad_norm": 1.2943297624588013, "learning_rate": 4.5895141109286846e-05, "loss": 0.9351, "step": 164780 }, { "epoch": 1.052796340544063, "grad_norm": 0.680542528629303, "learning_rate": 4.5890140383011194e-05, "loss": 1.1433, "step": 164790 }, { "epoch": 1.0528602276938017, "grad_norm": 1.3205195665359497, "learning_rate": 4.588513969812538e-05, "loss": 0.7543, "step": 164800 }, { "epoch": 1.0529241148435404, "grad_norm": 1.1977238655090332, "learning_rate": 4.588013905467977e-05, "loss": 0.934, "step": 164810 }, { "epoch": 1.0529880019932791, "grad_norm": 0.6131165623664856, "learning_rate": 4.587513845272473e-05, "loss": 0.9284, "step": 164820 }, { "epoch": 1.0530518891430178, "grad_norm": 0.8425104022026062, "learning_rate": 4.5870137892310607e-05, "loss": 1.1248, "step": 164830 }, { "epoch": 1.0531157762927565, "grad_norm": 0.9462717771530151, "learning_rate": 4.586513737348776e-05, "loss": 0.8297, "step": 164840 }, { "epoch": 1.0531796634424953, "grad_norm": 1.0752277374267578, "learning_rate": 4.586013689630657e-05, "loss": 1.0739, "step": 164850 }, { "epoch": 1.053243550592234, "grad_norm": 0.9188938140869141, "learning_rate": 4.5855136460817385e-05, "loss": 0.9023, "step": 164860 }, { "epoch": 1.0533074377419727, "grad_norm": 0.6802998781204224, "learning_rate": 4.585013606707055e-05, "loss": 0.6506, "step": 164870 }, { "epoch": 1.0533713248917114, "grad_norm": 0.7449166774749756, "learning_rate": 4.5845135715116444e-05, "loss": 1.0255, "step": 164880 }, { "epoch": 1.05343521204145, "grad_norm": 1.3200147151947021, "learning_rate": 4.584013540500542e-05, "loss": 0.8287, "step": 164890 }, { "epoch": 1.0534990991911888, "grad_norm": 0.9720762372016907, "learning_rate": 4.583513513678782e-05, "loss": 0.956, "step": 164900 }, { "epoch": 1.0535629863409275, "grad_norm": 0.6958931684494019, "learning_rate": 4.583013491051402e-05, "loss": 0.8158, "step": 164910 }, { "epoch": 1.0536268734906662, "grad_norm": 1.0599778890609741, "learning_rate": 4.582513472623436e-05, "loss": 0.8731, "step": 164920 }, { "epoch": 1.053690760640405, "grad_norm": 0.5339720249176025, "learning_rate": 4.582013458399922e-05, "loss": 0.9776, "step": 164930 }, { "epoch": 1.0537546477901434, "grad_norm": 0.8194118738174438, "learning_rate": 4.581513448385893e-05, "loss": 0.7849, "step": 164940 }, { "epoch": 1.053818534939882, "grad_norm": 0.7215436697006226, "learning_rate": 4.581013442586386e-05, "loss": 0.7049, "step": 164950 }, { "epoch": 1.0538824220896208, "grad_norm": 0.7318156361579895, "learning_rate": 4.580513441006436e-05, "loss": 0.8978, "step": 164960 }, { "epoch": 1.0539463092393595, "grad_norm": 1.3139148950576782, "learning_rate": 4.580013443651079e-05, "loss": 0.8481, "step": 164970 }, { "epoch": 1.0540101963890982, "grad_norm": 1.0408555269241333, "learning_rate": 4.579513450525349e-05, "loss": 0.655, "step": 164980 }, { "epoch": 1.054074083538837, "grad_norm": 0.7458349466323853, "learning_rate": 4.579013461634283e-05, "loss": 0.7734, "step": 164990 }, { "epoch": 1.0541379706885756, "grad_norm": 0.6548849940299988, "learning_rate": 4.5785134769829156e-05, "loss": 1.0121, "step": 165000 }, { "epoch": 1.0542018578383143, "grad_norm": 1.47128164768219, "learning_rate": 4.578013496576282e-05, "loss": 0.9101, "step": 165010 }, { "epoch": 1.054265744988053, "grad_norm": 2.107875347137451, "learning_rate": 4.5775135204194176e-05, "loss": 0.9387, "step": 165020 }, { "epoch": 1.0543296321377917, "grad_norm": 1.0853489637374878, "learning_rate": 4.5770135485173574e-05, "loss": 0.8148, "step": 165030 }, { "epoch": 1.0543935192875304, "grad_norm": 0.6279266476631165, "learning_rate": 4.5765135808751357e-05, "loss": 0.9949, "step": 165040 }, { "epoch": 1.0544574064372692, "grad_norm": 0.9035767912864685, "learning_rate": 4.57601361749779e-05, "loss": 0.8647, "step": 165050 }, { "epoch": 1.0545212935870079, "grad_norm": 0.9950432181358337, "learning_rate": 4.5755136583903535e-05, "loss": 1.0575, "step": 165060 }, { "epoch": 1.0545851807367466, "grad_norm": 0.6531002521514893, "learning_rate": 4.5750137035578625e-05, "loss": 0.9391, "step": 165070 }, { "epoch": 1.0546490678864853, "grad_norm": 0.6531468629837036, "learning_rate": 4.57451375300535e-05, "loss": 0.9551, "step": 165080 }, { "epoch": 1.054712955036224, "grad_norm": 1.396674394607544, "learning_rate": 4.574013806737853e-05, "loss": 0.9346, "step": 165090 }, { "epoch": 1.0547768421859627, "grad_norm": 1.2501516342163086, "learning_rate": 4.573513864760407e-05, "loss": 0.7332, "step": 165100 }, { "epoch": 1.0548407293357014, "grad_norm": 1.078444004058838, "learning_rate": 4.573013927078044e-05, "loss": 1.02, "step": 165110 }, { "epoch": 1.05490461648544, "grad_norm": 0.7868189811706543, "learning_rate": 4.5725139936958e-05, "loss": 0.7525, "step": 165120 }, { "epoch": 1.0549685036351788, "grad_norm": 0.5394753813743591, "learning_rate": 4.5720140646187096e-05, "loss": 0.8605, "step": 165130 }, { "epoch": 1.0550323907849175, "grad_norm": 1.1440558433532715, "learning_rate": 4.5715141398518076e-05, "loss": 0.9491, "step": 165140 }, { "epoch": 1.0550962779346562, "grad_norm": 0.6817034482955933, "learning_rate": 4.5710142194001285e-05, "loss": 0.8546, "step": 165150 }, { "epoch": 1.055160165084395, "grad_norm": 0.8437715172767639, "learning_rate": 4.570514303268707e-05, "loss": 0.7823, "step": 165160 }, { "epoch": 1.0552240522341336, "grad_norm": 0.793656587600708, "learning_rate": 4.5700143914625794e-05, "loss": 0.9057, "step": 165170 }, { "epoch": 1.0552879393838723, "grad_norm": 0.8038541078567505, "learning_rate": 4.569514483986778e-05, "loss": 0.9739, "step": 165180 }, { "epoch": 1.055351826533611, "grad_norm": 1.1511846780776978, "learning_rate": 4.569014580846339e-05, "loss": 1.0002, "step": 165190 }, { "epoch": 1.0554157136833497, "grad_norm": 0.7070904970169067, "learning_rate": 4.568514682046295e-05, "loss": 0.9256, "step": 165200 }, { "epoch": 1.0554796008330884, "grad_norm": 1.058930516242981, "learning_rate": 4.568014787591683e-05, "loss": 0.9628, "step": 165210 }, { "epoch": 1.0555434879828272, "grad_norm": 1.03263521194458, "learning_rate": 4.567514897487535e-05, "loss": 0.9915, "step": 165220 }, { "epoch": 1.0556073751325659, "grad_norm": 1.2190163135528564, "learning_rate": 4.567015011738885e-05, "loss": 0.965, "step": 165230 }, { "epoch": 1.0556712622823046, "grad_norm": 0.6620869040489197, "learning_rate": 4.5665151303507704e-05, "loss": 0.696, "step": 165240 }, { "epoch": 1.0557351494320433, "grad_norm": 0.9180126786231995, "learning_rate": 4.566015253328222e-05, "loss": 0.8117, "step": 165250 }, { "epoch": 1.055799036581782, "grad_norm": 1.2093178033828735, "learning_rate": 4.5655153806762766e-05, "loss": 1.1724, "step": 165260 }, { "epoch": 1.0558629237315207, "grad_norm": 0.9234322309494019, "learning_rate": 4.565015512399966e-05, "loss": 0.8175, "step": 165270 }, { "epoch": 1.0559268108812594, "grad_norm": 1.0635215044021606, "learning_rate": 4.564515648504326e-05, "loss": 0.9139, "step": 165280 }, { "epoch": 1.055990698030998, "grad_norm": 0.6104520559310913, "learning_rate": 4.56401578899439e-05, "loss": 0.6578, "step": 165290 }, { "epoch": 1.0560545851807368, "grad_norm": 1.0595885515213013, "learning_rate": 4.563515933875193e-05, "loss": 0.9892, "step": 165300 }, { "epoch": 1.0561184723304755, "grad_norm": 0.8852406144142151, "learning_rate": 4.5630160831517675e-05, "loss": 1.0354, "step": 165310 }, { "epoch": 1.0561823594802142, "grad_norm": 2.066617965698242, "learning_rate": 4.562516236829148e-05, "loss": 1.1012, "step": 165320 }, { "epoch": 1.056246246629953, "grad_norm": 0.6148334741592407, "learning_rate": 4.5620163949123687e-05, "loss": 0.9188, "step": 165330 }, { "epoch": 1.0563101337796916, "grad_norm": 0.8803834915161133, "learning_rate": 4.5615165574064634e-05, "loss": 0.9608, "step": 165340 }, { "epoch": 1.0563740209294303, "grad_norm": 0.8797494769096375, "learning_rate": 4.5610167243164655e-05, "loss": 0.9332, "step": 165350 }, { "epoch": 1.056437908079169, "grad_norm": 0.9440881013870239, "learning_rate": 4.560516895647408e-05, "loss": 0.7448, "step": 165360 }, { "epoch": 1.0565017952289077, "grad_norm": 0.8351962566375732, "learning_rate": 4.560017071404326e-05, "loss": 0.8806, "step": 165370 }, { "epoch": 1.0565656823786465, "grad_norm": 1.0572452545166016, "learning_rate": 4.559517251592253e-05, "loss": 0.8308, "step": 165380 }, { "epoch": 1.0566295695283852, "grad_norm": 1.751478672027588, "learning_rate": 4.559017436216223e-05, "loss": 0.8669, "step": 165390 }, { "epoch": 1.0566934566781239, "grad_norm": 0.6728960871696472, "learning_rate": 4.558517625281268e-05, "loss": 0.7418, "step": 165400 }, { "epoch": 1.0567573438278623, "grad_norm": 0.8979251384735107, "learning_rate": 4.5580178187924235e-05, "loss": 1.1276, "step": 165410 }, { "epoch": 1.0568212309776013, "grad_norm": 0.9827498197555542, "learning_rate": 4.557518016754721e-05, "loss": 0.8415, "step": 165420 }, { "epoch": 1.0568851181273398, "grad_norm": 1.329075574874878, "learning_rate": 4.5570182191731956e-05, "loss": 0.9694, "step": 165430 }, { "epoch": 1.0569490052770785, "grad_norm": 0.7447249293327332, "learning_rate": 4.556518426052879e-05, "loss": 0.777, "step": 165440 }, { "epoch": 1.0570128924268172, "grad_norm": 0.7475079894065857, "learning_rate": 4.5560186373988065e-05, "loss": 0.7823, "step": 165450 }, { "epoch": 1.0570767795765559, "grad_norm": 1.0540409088134766, "learning_rate": 4.55551885321601e-05, "loss": 0.7704, "step": 165460 }, { "epoch": 1.0571406667262946, "grad_norm": 0.9531934857368469, "learning_rate": 4.5550190735095235e-05, "loss": 1.0657, "step": 165470 }, { "epoch": 1.0572045538760333, "grad_norm": 1.0872973203659058, "learning_rate": 4.5545192982843795e-05, "loss": 1.0177, "step": 165480 }, { "epoch": 1.057268441025772, "grad_norm": 0.9160931706428528, "learning_rate": 4.554019527545612e-05, "loss": 0.877, "step": 165490 }, { "epoch": 1.0573323281755107, "grad_norm": 0.6422150135040283, "learning_rate": 4.553519761298253e-05, "loss": 1.1888, "step": 165500 }, { "epoch": 1.0573962153252494, "grad_norm": 1.0594791173934937, "learning_rate": 4.553019999547337e-05, "loss": 0.8751, "step": 165510 }, { "epoch": 1.0574601024749881, "grad_norm": 0.8945194482803345, "learning_rate": 4.5525202422978955e-05, "loss": 1.0125, "step": 165520 }, { "epoch": 1.0575239896247268, "grad_norm": 0.8445724248886108, "learning_rate": 4.552020489554963e-05, "loss": 1.0213, "step": 165530 }, { "epoch": 1.0575878767744655, "grad_norm": 0.7421507239341736, "learning_rate": 4.551520741323571e-05, "loss": 0.7153, "step": 165540 }, { "epoch": 1.0576517639242042, "grad_norm": 1.3028641939163208, "learning_rate": 4.551020997608754e-05, "loss": 0.7857, "step": 165550 }, { "epoch": 1.057715651073943, "grad_norm": 0.816175639629364, "learning_rate": 4.550521258415543e-05, "loss": 0.8056, "step": 165560 }, { "epoch": 1.0577795382236816, "grad_norm": 0.7016112804412842, "learning_rate": 4.550021523748971e-05, "loss": 0.7017, "step": 165570 }, { "epoch": 1.0578434253734204, "grad_norm": 0.6792407631874084, "learning_rate": 4.549521793614076e-05, "loss": 0.8311, "step": 165580 }, { "epoch": 1.057907312523159, "grad_norm": 1.004659652709961, "learning_rate": 4.5490220680158825e-05, "loss": 0.8236, "step": 165590 }, { "epoch": 1.0579711996728978, "grad_norm": 1.2447409629821777, "learning_rate": 4.548522346959427e-05, "loss": 0.727, "step": 165600 }, { "epoch": 1.0580350868226365, "grad_norm": 0.8811039328575134, "learning_rate": 4.548022630449743e-05, "loss": 0.7418, "step": 165610 }, { "epoch": 1.0580989739723752, "grad_norm": 0.8841410875320435, "learning_rate": 4.547522918491862e-05, "loss": 0.7467, "step": 165620 }, { "epoch": 1.0581628611221139, "grad_norm": 0.8277806639671326, "learning_rate": 4.547023211090816e-05, "loss": 0.8483, "step": 165630 }, { "epoch": 1.0582267482718526, "grad_norm": 0.8341606855392456, "learning_rate": 4.5465235082516387e-05, "loss": 1.0341, "step": 165640 }, { "epoch": 1.0582906354215913, "grad_norm": 0.7356009483337402, "learning_rate": 4.546023809979362e-05, "loss": 0.8042, "step": 165650 }, { "epoch": 1.05835452257133, "grad_norm": 1.4884812831878662, "learning_rate": 4.545524116279018e-05, "loss": 0.8323, "step": 165660 }, { "epoch": 1.0584184097210687, "grad_norm": 0.9706101417541504, "learning_rate": 4.54502442715564e-05, "loss": 1.1248, "step": 165670 }, { "epoch": 1.0584822968708074, "grad_norm": 0.9353876709938049, "learning_rate": 4.5445247426142586e-05, "loss": 0.7526, "step": 165680 }, { "epoch": 1.0585461840205461, "grad_norm": 1.7931292057037354, "learning_rate": 4.544025062659906e-05, "loss": 0.8956, "step": 165690 }, { "epoch": 1.0586100711702848, "grad_norm": 1.6735409498214722, "learning_rate": 4.543525387297618e-05, "loss": 0.9402, "step": 165700 }, { "epoch": 1.0586739583200235, "grad_norm": 0.7010751962661743, "learning_rate": 4.543025716532422e-05, "loss": 0.8364, "step": 165710 }, { "epoch": 1.0587378454697622, "grad_norm": 0.9951981902122498, "learning_rate": 4.542526050369355e-05, "loss": 1.0676, "step": 165720 }, { "epoch": 1.058801732619501, "grad_norm": 2.725141763687134, "learning_rate": 4.542026388813444e-05, "loss": 1.0406, "step": 165730 }, { "epoch": 1.0588656197692397, "grad_norm": 0.6263905763626099, "learning_rate": 4.541526731869725e-05, "loss": 1.0343, "step": 165740 }, { "epoch": 1.0589295069189784, "grad_norm": 1.4677350521087646, "learning_rate": 4.541027079543228e-05, "loss": 0.9855, "step": 165750 }, { "epoch": 1.058993394068717, "grad_norm": 0.8590619564056396, "learning_rate": 4.540527431838986e-05, "loss": 0.8498, "step": 165760 }, { "epoch": 1.0590572812184558, "grad_norm": 2.01846981048584, "learning_rate": 4.540027788762029e-05, "loss": 0.9391, "step": 165770 }, { "epoch": 1.0591211683681945, "grad_norm": 0.6292256116867065, "learning_rate": 4.539528150317391e-05, "loss": 0.8408, "step": 165780 }, { "epoch": 1.0591850555179332, "grad_norm": 0.7966293096542358, "learning_rate": 4.539028516510102e-05, "loss": 0.8568, "step": 165790 }, { "epoch": 1.0592489426676719, "grad_norm": 0.9135885834693909, "learning_rate": 4.538528887345196e-05, "loss": 0.8816, "step": 165800 }, { "epoch": 1.0593128298174106, "grad_norm": 0.8115611672401428, "learning_rate": 4.538029262827702e-05, "loss": 0.8993, "step": 165810 }, { "epoch": 1.0593767169671493, "grad_norm": 0.9862786531448364, "learning_rate": 4.537529642962654e-05, "loss": 0.8257, "step": 165820 }, { "epoch": 1.059440604116888, "grad_norm": 0.9464985728263855, "learning_rate": 4.537030027755082e-05, "loss": 0.9438, "step": 165830 }, { "epoch": 1.0595044912666267, "grad_norm": 0.6717821359634399, "learning_rate": 4.536530417210019e-05, "loss": 0.7328, "step": 165840 }, { "epoch": 1.0595683784163654, "grad_norm": 1.0255380868911743, "learning_rate": 4.5360308113324947e-05, "loss": 0.8817, "step": 165850 }, { "epoch": 1.0596322655661041, "grad_norm": 0.5127224922180176, "learning_rate": 4.5355811700376274e-05, "loss": 1.0763, "step": 165860 }, { "epoch": 1.0596961527158428, "grad_norm": 0.7932383418083191, "learning_rate": 4.5350815730422905e-05, "loss": 0.7545, "step": 165870 }, { "epoch": 1.0597600398655815, "grad_norm": 1.7010321617126465, "learning_rate": 4.5345819807290847e-05, "loss": 0.8557, "step": 165880 }, { "epoch": 1.0598239270153202, "grad_norm": 0.7503814697265625, "learning_rate": 4.53408239310304e-05, "loss": 0.9328, "step": 165890 }, { "epoch": 1.0598878141650587, "grad_norm": 1.0629501342773438, "learning_rate": 4.53358281016919e-05, "loss": 0.8286, "step": 165900 }, { "epoch": 1.0599517013147974, "grad_norm": 0.9964657425880432, "learning_rate": 4.533083231932563e-05, "loss": 0.8564, "step": 165910 }, { "epoch": 1.0600155884645361, "grad_norm": 0.8007980585098267, "learning_rate": 4.532583658398193e-05, "loss": 0.8858, "step": 165920 }, { "epoch": 1.0600794756142748, "grad_norm": 0.8962225914001465, "learning_rate": 4.5320840895711095e-05, "loss": 0.8047, "step": 165930 }, { "epoch": 1.0601433627640136, "grad_norm": 1.0580693483352661, "learning_rate": 4.531584525456344e-05, "loss": 0.7851, "step": 165940 }, { "epoch": 1.0602072499137523, "grad_norm": 0.8029645681381226, "learning_rate": 4.531084966058928e-05, "loss": 0.8152, "step": 165950 }, { "epoch": 1.060271137063491, "grad_norm": 0.8548164367675781, "learning_rate": 4.5305854113838914e-05, "loss": 1.0714, "step": 165960 }, { "epoch": 1.0603350242132297, "grad_norm": 0.8717803359031677, "learning_rate": 4.530085861436266e-05, "loss": 0.8307, "step": 165970 }, { "epoch": 1.0603989113629684, "grad_norm": 0.7216166853904724, "learning_rate": 4.529586316221083e-05, "loss": 0.8735, "step": 165980 }, { "epoch": 1.060462798512707, "grad_norm": 1.3361812829971313, "learning_rate": 4.529086775743372e-05, "loss": 0.8581, "step": 165990 }, { "epoch": 1.0605266856624458, "grad_norm": 1.0749822854995728, "learning_rate": 4.528587240008165e-05, "loss": 1.1413, "step": 166000 }, { "epoch": 1.0605905728121845, "grad_norm": 1.623487114906311, "learning_rate": 4.5280877090204915e-05, "loss": 1.1168, "step": 166010 }, { "epoch": 1.0606544599619232, "grad_norm": 0.959581196308136, "learning_rate": 4.527588182785384e-05, "loss": 0.8113, "step": 166020 }, { "epoch": 1.060718347111662, "grad_norm": 1.2680141925811768, "learning_rate": 4.5270886613078716e-05, "loss": 0.9222, "step": 166030 }, { "epoch": 1.0607822342614006, "grad_norm": 1.1884212493896484, "learning_rate": 4.526589144592986e-05, "loss": 0.7005, "step": 166040 }, { "epoch": 1.0608461214111393, "grad_norm": 1.0362354516983032, "learning_rate": 4.526089632645757e-05, "loss": 0.765, "step": 166050 }, { "epoch": 1.060910008560878, "grad_norm": 1.0656408071517944, "learning_rate": 4.5255901254712156e-05, "loss": 1.0426, "step": 166060 }, { "epoch": 1.0609738957106167, "grad_norm": 0.8089607357978821, "learning_rate": 4.5250906230743925e-05, "loss": 1.4514, "step": 166070 }, { "epoch": 1.0610377828603554, "grad_norm": 0.8284891247749329, "learning_rate": 4.5245911254603166e-05, "loss": 0.7829, "step": 166080 }, { "epoch": 1.0611016700100941, "grad_norm": 0.869490385055542, "learning_rate": 4.5240916326340205e-05, "loss": 0.8162, "step": 166090 }, { "epoch": 1.0611655571598329, "grad_norm": 0.9870272874832153, "learning_rate": 4.523592144600532e-05, "loss": 0.8931, "step": 166100 }, { "epoch": 1.0612294443095716, "grad_norm": 1.2657639980316162, "learning_rate": 4.523092661364885e-05, "loss": 0.8136, "step": 166110 }, { "epoch": 1.0612933314593103, "grad_norm": 0.9630696773529053, "learning_rate": 4.5225931829321056e-05, "loss": 0.9948, "step": 166120 }, { "epoch": 1.061357218609049, "grad_norm": 3.2127113342285156, "learning_rate": 4.5220937093072265e-05, "loss": 0.7691, "step": 166130 }, { "epoch": 1.0614211057587877, "grad_norm": 0.7906152606010437, "learning_rate": 4.521594240495277e-05, "loss": 0.9219, "step": 166140 }, { "epoch": 1.0614849929085264, "grad_norm": 0.8807733058929443, "learning_rate": 4.5210947765012876e-05, "loss": 1.0482, "step": 166150 }, { "epoch": 1.061548880058265, "grad_norm": 0.849648654460907, "learning_rate": 4.520595317330287e-05, "loss": 0.7637, "step": 166160 }, { "epoch": 1.0616127672080038, "grad_norm": 0.848292350769043, "learning_rate": 4.5200958629873074e-05, "loss": 0.8503, "step": 166170 }, { "epoch": 1.0616766543577425, "grad_norm": 0.877740740776062, "learning_rate": 4.519596413477378e-05, "loss": 0.8768, "step": 166180 }, { "epoch": 1.0617405415074812, "grad_norm": 0.7507287859916687, "learning_rate": 4.5190969688055275e-05, "loss": 0.8739, "step": 166190 }, { "epoch": 1.06180442865722, "grad_norm": 1.0137852430343628, "learning_rate": 4.5185975289767866e-05, "loss": 0.9776, "step": 166200 }, { "epoch": 1.0618683158069586, "grad_norm": 0.5660897493362427, "learning_rate": 4.518098093996187e-05, "loss": 0.8763, "step": 166210 }, { "epoch": 1.0619322029566973, "grad_norm": 1.1215473413467407, "learning_rate": 4.5175986638687546e-05, "loss": 0.8374, "step": 166220 }, { "epoch": 1.061996090106436, "grad_norm": 0.8180109262466431, "learning_rate": 4.5170992385995214e-05, "loss": 0.8279, "step": 166230 }, { "epoch": 1.0620599772561747, "grad_norm": 1.204626202583313, "learning_rate": 4.5165998181935164e-05, "loss": 1.0606, "step": 166240 }, { "epoch": 1.0621238644059134, "grad_norm": 0.8536267280578613, "learning_rate": 4.5161004026557696e-05, "loss": 0.8713, "step": 166250 }, { "epoch": 1.0621877515556521, "grad_norm": 0.7017921209335327, "learning_rate": 4.51560099199131e-05, "loss": 0.7666, "step": 166260 }, { "epoch": 1.0622516387053909, "grad_norm": 0.7818034887313843, "learning_rate": 4.515101586205168e-05, "loss": 1.0096, "step": 166270 }, { "epoch": 1.0623155258551296, "grad_norm": 1.2084392309188843, "learning_rate": 4.5146021853023715e-05, "loss": 1.1418, "step": 166280 }, { "epoch": 1.0623794130048683, "grad_norm": 1.1068148612976074, "learning_rate": 4.514102789287952e-05, "loss": 0.9818, "step": 166290 }, { "epoch": 1.062443300154607, "grad_norm": 0.9860485196113586, "learning_rate": 4.5136033981669376e-05, "loss": 0.9613, "step": 166300 }, { "epoch": 1.0625071873043457, "grad_norm": 0.842819333076477, "learning_rate": 4.513104011944357e-05, "loss": 0.9115, "step": 166310 }, { "epoch": 1.0625710744540844, "grad_norm": 0.5500154495239258, "learning_rate": 4.512604630625241e-05, "loss": 0.9704, "step": 166320 }, { "epoch": 1.062634961603823, "grad_norm": 0.7272401452064514, "learning_rate": 4.512105254214617e-05, "loss": 1.0708, "step": 166330 }, { "epoch": 1.0626988487535618, "grad_norm": 0.5617895126342773, "learning_rate": 4.511605882717516e-05, "loss": 0.7842, "step": 166340 }, { "epoch": 1.0627627359033005, "grad_norm": 0.9489856958389282, "learning_rate": 4.5111065161389667e-05, "loss": 0.8623, "step": 166350 }, { "epoch": 1.0628266230530392, "grad_norm": 0.7376025915145874, "learning_rate": 4.510607154483997e-05, "loss": 0.8115, "step": 166360 }, { "epoch": 1.062890510202778, "grad_norm": 2.6469199657440186, "learning_rate": 4.5101077977576376e-05, "loss": 0.8634, "step": 166370 }, { "epoch": 1.0629543973525166, "grad_norm": 0.6932651996612549, "learning_rate": 4.509608445964916e-05, "loss": 0.6602, "step": 166380 }, { "epoch": 1.063018284502255, "grad_norm": 1.501181960105896, "learning_rate": 4.509109099110861e-05, "loss": 1.0629, "step": 166390 }, { "epoch": 1.063082171651994, "grad_norm": 1.0305095911026, "learning_rate": 4.508609757200503e-05, "loss": 0.9877, "step": 166400 }, { "epoch": 1.0631460588017325, "grad_norm": 0.8262702226638794, "learning_rate": 4.508110420238869e-05, "loss": 0.8587, "step": 166410 }, { "epoch": 1.0632099459514712, "grad_norm": 0.8938581347465515, "learning_rate": 4.507611088230989e-05, "loss": 0.8692, "step": 166420 }, { "epoch": 1.06327383310121, "grad_norm": 1.1800354719161987, "learning_rate": 4.5071117611818914e-05, "loss": 0.9498, "step": 166430 }, { "epoch": 1.0633377202509486, "grad_norm": 0.742709755897522, "learning_rate": 4.5066124390966045e-05, "loss": 0.7247, "step": 166440 }, { "epoch": 1.0634016074006873, "grad_norm": 0.7936336398124695, "learning_rate": 4.506113121980158e-05, "loss": 0.8067, "step": 166450 }, { "epoch": 1.063465494550426, "grad_norm": 0.7889736890792847, "learning_rate": 4.505613809837579e-05, "loss": 1.0327, "step": 166460 }, { "epoch": 1.0635293817001648, "grad_norm": 1.5141351222991943, "learning_rate": 4.505114502673896e-05, "loss": 1.1342, "step": 166470 }, { "epoch": 1.0635932688499035, "grad_norm": 0.8879356980323792, "learning_rate": 4.504615200494139e-05, "loss": 1.1157, "step": 166480 }, { "epoch": 1.0636571559996422, "grad_norm": 0.4957992732524872, "learning_rate": 4.5041159033033356e-05, "loss": 0.8696, "step": 166490 }, { "epoch": 1.0637210431493809, "grad_norm": 1.099480390548706, "learning_rate": 4.5036166111065136e-05, "loss": 0.88, "step": 166500 }, { "epoch": 1.0637849302991196, "grad_norm": 1.1279675960540771, "learning_rate": 4.503117323908702e-05, "loss": 0.839, "step": 166510 }, { "epoch": 1.0638488174488583, "grad_norm": 1.0145577192306519, "learning_rate": 4.5026180417149284e-05, "loss": 1.0136, "step": 166520 }, { "epoch": 1.063912704598597, "grad_norm": 0.5523449778556824, "learning_rate": 4.502118764530222e-05, "loss": 0.748, "step": 166530 }, { "epoch": 1.0639765917483357, "grad_norm": 1.2338347434997559, "learning_rate": 4.501619492359609e-05, "loss": 0.7316, "step": 166540 }, { "epoch": 1.0640404788980744, "grad_norm": 0.9280697703361511, "learning_rate": 4.50112022520812e-05, "loss": 0.849, "step": 166550 }, { "epoch": 1.064104366047813, "grad_norm": 1.0032998323440552, "learning_rate": 4.500620963080782e-05, "loss": 0.9341, "step": 166560 }, { "epoch": 1.0641682531975518, "grad_norm": 1.1202173233032227, "learning_rate": 4.500121705982622e-05, "loss": 0.9334, "step": 166570 }, { "epoch": 1.0642321403472905, "grad_norm": 1.0374928712844849, "learning_rate": 4.49962245391867e-05, "loss": 0.7728, "step": 166580 }, { "epoch": 1.0642960274970292, "grad_norm": 0.8324866890907288, "learning_rate": 4.499123206893953e-05, "loss": 0.8554, "step": 166590 }, { "epoch": 1.064359914646768, "grad_norm": 0.8850876688957214, "learning_rate": 4.4986239649134975e-05, "loss": 0.7597, "step": 166600 }, { "epoch": 1.0644238017965066, "grad_norm": 0.7691602110862732, "learning_rate": 4.498124727982333e-05, "loss": 0.7727, "step": 166610 }, { "epoch": 1.0644876889462453, "grad_norm": 0.7466904520988464, "learning_rate": 4.497625496105487e-05, "loss": 0.8507, "step": 166620 }, { "epoch": 1.064551576095984, "grad_norm": 1.0000793933868408, "learning_rate": 4.497126269287986e-05, "loss": 0.7191, "step": 166630 }, { "epoch": 1.0646154632457228, "grad_norm": 0.7658026218414307, "learning_rate": 4.4966270475348596e-05, "loss": 0.6359, "step": 166640 }, { "epoch": 1.0646793503954615, "grad_norm": 0.7452802658081055, "learning_rate": 4.496127830851133e-05, "loss": 0.8817, "step": 166650 }, { "epoch": 1.0647432375452002, "grad_norm": 0.8648143410682678, "learning_rate": 4.4956286192418364e-05, "loss": 0.7892, "step": 166660 }, { "epoch": 1.0648071246949389, "grad_norm": 0.7528172731399536, "learning_rate": 4.4951294127119955e-05, "loss": 0.7276, "step": 166670 }, { "epoch": 1.0648710118446776, "grad_norm": 0.9941525459289551, "learning_rate": 4.4946302112666386e-05, "loss": 0.6766, "step": 166680 }, { "epoch": 1.0649348989944163, "grad_norm": 0.9956261515617371, "learning_rate": 4.4941310149107916e-05, "loss": 0.7099, "step": 166690 }, { "epoch": 1.064998786144155, "grad_norm": 0.6918345093727112, "learning_rate": 4.4936318236494846e-05, "loss": 0.6829, "step": 166700 }, { "epoch": 1.0650626732938937, "grad_norm": 1.1491965055465698, "learning_rate": 4.493132637487742e-05, "loss": 0.7505, "step": 166710 }, { "epoch": 1.0651265604436324, "grad_norm": 0.7228277921676636, "learning_rate": 4.492633456430592e-05, "loss": 0.6065, "step": 166720 }, { "epoch": 1.0651904475933711, "grad_norm": 1.5048450231552124, "learning_rate": 4.492134280483063e-05, "loss": 0.8662, "step": 166730 }, { "epoch": 1.0652543347431098, "grad_norm": 0.8829289078712463, "learning_rate": 4.49163510965018e-05, "loss": 1.1516, "step": 166740 }, { "epoch": 1.0653182218928485, "grad_norm": 3.7568225860595703, "learning_rate": 4.491135943936972e-05, "loss": 0.9261, "step": 166750 }, { "epoch": 1.0653821090425872, "grad_norm": 0.618725061416626, "learning_rate": 4.490636783348465e-05, "loss": 0.7957, "step": 166760 }, { "epoch": 1.065445996192326, "grad_norm": 0.86220383644104, "learning_rate": 4.4901376278896865e-05, "loss": 1.0595, "step": 166770 }, { "epoch": 1.0655098833420646, "grad_norm": 1.004080891609192, "learning_rate": 4.489638477565663e-05, "loss": 0.9258, "step": 166780 }, { "epoch": 1.0655737704918034, "grad_norm": 0.8573571443557739, "learning_rate": 4.4891393323814214e-05, "loss": 0.7236, "step": 166790 }, { "epoch": 1.065637657641542, "grad_norm": 0.7342075109481812, "learning_rate": 4.488640192341988e-05, "loss": 0.9191, "step": 166800 }, { "epoch": 1.0657015447912808, "grad_norm": 1.0184838771820068, "learning_rate": 4.4881410574523916e-05, "loss": 1.002, "step": 166810 }, { "epoch": 1.0657654319410195, "grad_norm": 0.7228994965553284, "learning_rate": 4.487641927717657e-05, "loss": 0.9738, "step": 166820 }, { "epoch": 1.0658293190907582, "grad_norm": 1.0932427644729614, "learning_rate": 4.4871428031428116e-05, "loss": 0.9745, "step": 166830 }, { "epoch": 1.0658932062404969, "grad_norm": 1.0328798294067383, "learning_rate": 4.4866436837328816e-05, "loss": 0.7275, "step": 166840 }, { "epoch": 1.0659570933902356, "grad_norm": 0.9208298921585083, "learning_rate": 4.486144569492894e-05, "loss": 0.7779, "step": 166850 }, { "epoch": 1.066020980539974, "grad_norm": 1.0833368301391602, "learning_rate": 4.485645460427874e-05, "loss": 0.8716, "step": 166860 }, { "epoch": 1.066084867689713, "grad_norm": 1.336681842803955, "learning_rate": 4.4851463565428504e-05, "loss": 0.9037, "step": 166870 }, { "epoch": 1.0661487548394515, "grad_norm": 0.800895631313324, "learning_rate": 4.484647257842848e-05, "loss": 0.7627, "step": 166880 }, { "epoch": 1.0662126419891902, "grad_norm": 0.9473403096199036, "learning_rate": 4.484148164332894e-05, "loss": 1.1128, "step": 166890 }, { "epoch": 1.066276529138929, "grad_norm": 1.6257963180541992, "learning_rate": 4.4836490760180136e-05, "loss": 0.7619, "step": 166900 }, { "epoch": 1.0663404162886676, "grad_norm": 0.9070813655853271, "learning_rate": 4.4831499929032353e-05, "loss": 0.9571, "step": 166910 }, { "epoch": 1.0664043034384063, "grad_norm": 3.044250249862671, "learning_rate": 4.482650914993582e-05, "loss": 0.7744, "step": 166920 }, { "epoch": 1.066468190588145, "grad_norm": 1.1026273965835571, "learning_rate": 4.4821518422940824e-05, "loss": 0.828, "step": 166930 }, { "epoch": 1.0665320777378837, "grad_norm": 1.101633906364441, "learning_rate": 4.4816527748097616e-05, "loss": 0.9907, "step": 166940 }, { "epoch": 1.0665959648876224, "grad_norm": 0.8642861247062683, "learning_rate": 4.481153712545645e-05, "loss": 0.7035, "step": 166950 }, { "epoch": 1.0666598520373611, "grad_norm": 1.0184928178787231, "learning_rate": 4.480654655506761e-05, "loss": 0.8259, "step": 166960 }, { "epoch": 1.0667237391870998, "grad_norm": 1.317468285560608, "learning_rate": 4.4801556036981324e-05, "loss": 0.8339, "step": 166970 }, { "epoch": 1.0667876263368385, "grad_norm": 1.0101289749145508, "learning_rate": 4.479656557124787e-05, "loss": 0.9275, "step": 166980 }, { "epoch": 1.0668515134865773, "grad_norm": 1.7089424133300781, "learning_rate": 4.479157515791751e-05, "loss": 0.7327, "step": 166990 }, { "epoch": 1.066915400636316, "grad_norm": 0.7492755651473999, "learning_rate": 4.4786584797040485e-05, "loss": 0.6942, "step": 167000 }, { "epoch": 1.0669792877860547, "grad_norm": 1.4743152856826782, "learning_rate": 4.4781594488667065e-05, "loss": 0.6916, "step": 167010 }, { "epoch": 1.0670431749357934, "grad_norm": 1.0931754112243652, "learning_rate": 4.47766042328475e-05, "loss": 0.8617, "step": 167020 }, { "epoch": 1.067107062085532, "grad_norm": 2.1639623641967773, "learning_rate": 4.477161402963206e-05, "loss": 0.7629, "step": 167030 }, { "epoch": 1.0671709492352708, "grad_norm": 0.7249797582626343, "learning_rate": 4.476662387907098e-05, "loss": 0.9629, "step": 167040 }, { "epoch": 1.0672348363850095, "grad_norm": 1.3922828435897827, "learning_rate": 4.476163378121452e-05, "loss": 1.0208, "step": 167050 }, { "epoch": 1.0672987235347482, "grad_norm": 0.8654850721359253, "learning_rate": 4.475664373611294e-05, "loss": 1.0369, "step": 167060 }, { "epoch": 1.067362610684487, "grad_norm": 1.1859705448150635, "learning_rate": 4.47516537438165e-05, "loss": 0.7634, "step": 167070 }, { "epoch": 1.0674264978342256, "grad_norm": 0.7614762187004089, "learning_rate": 4.474666380437545e-05, "loss": 0.79, "step": 167080 }, { "epoch": 1.0674903849839643, "grad_norm": 4.074455261230469, "learning_rate": 4.4741673917840035e-05, "loss": 0.9808, "step": 167090 }, { "epoch": 1.067554272133703, "grad_norm": 0.8677727580070496, "learning_rate": 4.473668408426052e-05, "loss": 0.9667, "step": 167100 }, { "epoch": 1.0676181592834417, "grad_norm": 0.8779893517494202, "learning_rate": 4.4731694303687144e-05, "loss": 0.8795, "step": 167110 }, { "epoch": 1.0676820464331804, "grad_norm": 0.8248956203460693, "learning_rate": 4.4726704576170165e-05, "loss": 0.8779, "step": 167120 }, { "epoch": 1.0677459335829191, "grad_norm": 0.7094441056251526, "learning_rate": 4.472171490175983e-05, "loss": 1.049, "step": 167130 }, { "epoch": 1.0678098207326578, "grad_norm": 0.6770461201667786, "learning_rate": 4.47167252805064e-05, "loss": 0.9599, "step": 167140 }, { "epoch": 1.0678737078823966, "grad_norm": 1.041219711303711, "learning_rate": 4.471173571246011e-05, "loss": 1.0222, "step": 167150 }, { "epoch": 1.0679375950321353, "grad_norm": 0.9974572062492371, "learning_rate": 4.470674619767122e-05, "loss": 0.9056, "step": 167160 }, { "epoch": 1.068001482181874, "grad_norm": 0.9295303821563721, "learning_rate": 4.470175673618998e-05, "loss": 0.9401, "step": 167170 }, { "epoch": 1.0680653693316127, "grad_norm": 1.0348182916641235, "learning_rate": 4.4696767328066626e-05, "loss": 1.0809, "step": 167180 }, { "epoch": 1.0681292564813514, "grad_norm": 0.7640472650527954, "learning_rate": 4.4691777973351426e-05, "loss": 0.9645, "step": 167190 }, { "epoch": 1.06819314363109, "grad_norm": 0.6735183000564575, "learning_rate": 4.46867886720946e-05, "loss": 0.8436, "step": 167200 }, { "epoch": 1.0682570307808288, "grad_norm": 0.9889833331108093, "learning_rate": 4.468179942434641e-05, "loss": 0.8263, "step": 167210 }, { "epoch": 1.0683209179305675, "grad_norm": 0.9082995057106018, "learning_rate": 4.4676810230157107e-05, "loss": 0.7079, "step": 167220 }, { "epoch": 1.0683848050803062, "grad_norm": 0.7941724061965942, "learning_rate": 4.467182108957692e-05, "loss": 0.6653, "step": 167230 }, { "epoch": 1.068448692230045, "grad_norm": 0.6307287216186523, "learning_rate": 4.466683200265611e-05, "loss": 0.8493, "step": 167240 }, { "epoch": 1.0685125793797836, "grad_norm": 0.5019100904464722, "learning_rate": 4.466184296944492e-05, "loss": 0.9378, "step": 167250 }, { "epoch": 1.0685764665295223, "grad_norm": 2.5777366161346436, "learning_rate": 4.465685398999358e-05, "loss": 0.9536, "step": 167260 }, { "epoch": 1.068640353679261, "grad_norm": 0.7746297121047974, "learning_rate": 4.465186506435235e-05, "loss": 1.051, "step": 167270 }, { "epoch": 1.0687042408289997, "grad_norm": 0.7943103313446045, "learning_rate": 4.4646876192571465e-05, "loss": 0.7168, "step": 167280 }, { "epoch": 1.0687681279787384, "grad_norm": 2.4050159454345703, "learning_rate": 4.464188737470117e-05, "loss": 0.7538, "step": 167290 }, { "epoch": 1.0688320151284771, "grad_norm": 1.3626781702041626, "learning_rate": 4.463689861079169e-05, "loss": 0.8716, "step": 167300 }, { "epoch": 1.0688959022782158, "grad_norm": 2.211430788040161, "learning_rate": 4.463190990089329e-05, "loss": 1.2561, "step": 167310 }, { "epoch": 1.0689597894279546, "grad_norm": 0.8661367893218994, "learning_rate": 4.462692124505621e-05, "loss": 0.8686, "step": 167320 }, { "epoch": 1.0690236765776933, "grad_norm": 0.7823354005813599, "learning_rate": 4.462193264333067e-05, "loss": 0.8867, "step": 167330 }, { "epoch": 1.069087563727432, "grad_norm": 1.1050783395767212, "learning_rate": 4.4616944095766924e-05, "loss": 0.7956, "step": 167340 }, { "epoch": 1.0691514508771705, "grad_norm": 2.726477861404419, "learning_rate": 4.4611955602415215e-05, "loss": 0.9001, "step": 167350 }, { "epoch": 1.0692153380269094, "grad_norm": 0.719448447227478, "learning_rate": 4.4606967163325765e-05, "loss": 0.9276, "step": 167360 }, { "epoch": 1.0692792251766479, "grad_norm": 1.078935146331787, "learning_rate": 4.460197877854882e-05, "loss": 0.8703, "step": 167370 }, { "epoch": 1.0693431123263866, "grad_norm": 0.5898913145065308, "learning_rate": 4.4596990448134625e-05, "loss": 0.895, "step": 167380 }, { "epoch": 1.0694069994761253, "grad_norm": 0.8286154270172119, "learning_rate": 4.459200217213339e-05, "loss": 0.9249, "step": 167390 }, { "epoch": 1.069470886625864, "grad_norm": 2.352092742919922, "learning_rate": 4.45870139505954e-05, "loss": 1.0513, "step": 167400 }, { "epoch": 1.0695347737756027, "grad_norm": 1.0438710451126099, "learning_rate": 4.458202578357085e-05, "loss": 0.6509, "step": 167410 }, { "epoch": 1.0695986609253414, "grad_norm": 1.3391057252883911, "learning_rate": 4.457703767110999e-05, "loss": 0.8631, "step": 167420 }, { "epoch": 1.06966254807508, "grad_norm": 1.0806117057800293, "learning_rate": 4.4572049613263055e-05, "loss": 0.8065, "step": 167430 }, { "epoch": 1.0697264352248188, "grad_norm": 0.7838671803474426, "learning_rate": 4.456706161008027e-05, "loss": 0.8195, "step": 167440 }, { "epoch": 1.0697903223745575, "grad_norm": 0.7801133394241333, "learning_rate": 4.456207366161188e-05, "loss": 0.665, "step": 167450 }, { "epoch": 1.0698542095242962, "grad_norm": 1.2091721296310425, "learning_rate": 4.4557085767908116e-05, "loss": 1.0544, "step": 167460 }, { "epoch": 1.069918096674035, "grad_norm": 0.7456448674201965, "learning_rate": 4.45520979290192e-05, "loss": 0.8551, "step": 167470 }, { "epoch": 1.0699819838237736, "grad_norm": 0.9205488562583923, "learning_rate": 4.454711014499538e-05, "loss": 0.8446, "step": 167480 }, { "epoch": 1.0700458709735123, "grad_norm": 0.8250446319580078, "learning_rate": 4.4542122415886863e-05, "loss": 0.9594, "step": 167490 }, { "epoch": 1.070109758123251, "grad_norm": 1.068128228187561, "learning_rate": 4.45371347417439e-05, "loss": 0.8108, "step": 167500 }, { "epoch": 1.0701736452729897, "grad_norm": 0.781631350517273, "learning_rate": 4.453214712261672e-05, "loss": 0.7391, "step": 167510 }, { "epoch": 1.0702375324227285, "grad_norm": 0.6052827835083008, "learning_rate": 4.452715955855555e-05, "loss": 0.8435, "step": 167520 }, { "epoch": 1.0703014195724672, "grad_norm": 1.0644205808639526, "learning_rate": 4.4522172049610613e-05, "loss": 0.9365, "step": 167530 }, { "epoch": 1.0703653067222059, "grad_norm": 0.9585718512535095, "learning_rate": 4.451718459583215e-05, "loss": 1.0394, "step": 167540 }, { "epoch": 1.0704291938719446, "grad_norm": 0.5789369344711304, "learning_rate": 4.451219719727038e-05, "loss": 0.8574, "step": 167550 }, { "epoch": 1.0704930810216833, "grad_norm": 0.7152701020240784, "learning_rate": 4.450720985397553e-05, "loss": 1.0427, "step": 167560 }, { "epoch": 1.070556968171422, "grad_norm": 0.8098849058151245, "learning_rate": 4.4502222565997826e-05, "loss": 1.0818, "step": 167570 }, { "epoch": 1.0706208553211607, "grad_norm": 0.870178759098053, "learning_rate": 4.44972353333875e-05, "loss": 1.1725, "step": 167580 }, { "epoch": 1.0706847424708994, "grad_norm": 0.6339501142501831, "learning_rate": 4.449224815619476e-05, "loss": 0.8222, "step": 167590 }, { "epoch": 1.070748629620638, "grad_norm": 0.4935135543346405, "learning_rate": 4.448775974414488e-05, "loss": 0.9549, "step": 167600 }, { "epoch": 1.0708125167703768, "grad_norm": 1.5905778408050537, "learning_rate": 4.448277267238396e-05, "loss": 1.0304, "step": 167610 }, { "epoch": 1.0708764039201155, "grad_norm": 0.7705449461936951, "learning_rate": 4.44777856561863e-05, "loss": 0.858, "step": 167620 }, { "epoch": 1.0709402910698542, "grad_norm": 0.9918437600135803, "learning_rate": 4.447279869560211e-05, "loss": 0.8912, "step": 167630 }, { "epoch": 1.071004178219593, "grad_norm": 1.0865930318832397, "learning_rate": 4.4467811790681626e-05, "loss": 0.8048, "step": 167640 }, { "epoch": 1.0710680653693316, "grad_norm": 0.8210586905479431, "learning_rate": 4.446282494147506e-05, "loss": 0.8904, "step": 167650 }, { "epoch": 1.0711319525190703, "grad_norm": 0.9417276978492737, "learning_rate": 4.445783814803263e-05, "loss": 0.9236, "step": 167660 }, { "epoch": 1.071195839668809, "grad_norm": 1.0197242498397827, "learning_rate": 4.4452851410404575e-05, "loss": 0.7601, "step": 167670 }, { "epoch": 1.0712597268185478, "grad_norm": 1.1198569536209106, "learning_rate": 4.4447864728641106e-05, "loss": 0.7624, "step": 167680 }, { "epoch": 1.0713236139682865, "grad_norm": 0.7067335844039917, "learning_rate": 4.4442878102792436e-05, "loss": 0.8205, "step": 167690 }, { "epoch": 1.0713875011180252, "grad_norm": 0.8206075429916382, "learning_rate": 4.443789153290879e-05, "loss": 0.8195, "step": 167700 }, { "epoch": 1.0714513882677639, "grad_norm": 0.9611579775810242, "learning_rate": 4.4432905019040386e-05, "loss": 0.6071, "step": 167710 }, { "epoch": 1.0715152754175026, "grad_norm": 1.1848875284194946, "learning_rate": 4.442791856123744e-05, "loss": 0.7083, "step": 167720 }, { "epoch": 1.0715791625672413, "grad_norm": 0.7153217196464539, "learning_rate": 4.442293215955019e-05, "loss": 1.1518, "step": 167730 }, { "epoch": 1.07164304971698, "grad_norm": 1.418792963027954, "learning_rate": 4.4417945814028834e-05, "loss": 0.9059, "step": 167740 }, { "epoch": 1.0717069368667187, "grad_norm": 1.3425724506378174, "learning_rate": 4.4412959524723586e-05, "loss": 0.6204, "step": 167750 }, { "epoch": 1.0717708240164574, "grad_norm": 0.91298907995224, "learning_rate": 4.440797329168467e-05, "loss": 0.8499, "step": 167760 }, { "epoch": 1.071834711166196, "grad_norm": 0.994581937789917, "learning_rate": 4.440298711496231e-05, "loss": 1.0169, "step": 167770 }, { "epoch": 1.0718985983159348, "grad_norm": 0.8729711174964905, "learning_rate": 4.439800099460671e-05, "loss": 0.6801, "step": 167780 }, { "epoch": 1.0719624854656735, "grad_norm": 0.9728224873542786, "learning_rate": 4.4393014930668084e-05, "loss": 0.6659, "step": 167790 }, { "epoch": 1.0720263726154122, "grad_norm": 1.115736722946167, "learning_rate": 4.4388028923196645e-05, "loss": 1.0955, "step": 167800 }, { "epoch": 1.072090259765151, "grad_norm": 0.9253178834915161, "learning_rate": 4.438304297224261e-05, "loss": 0.7719, "step": 167810 }, { "epoch": 1.0721541469148896, "grad_norm": 0.7113856673240662, "learning_rate": 4.4378057077856216e-05, "loss": 0.991, "step": 167820 }, { "epoch": 1.0722180340646283, "grad_norm": 1.2730010747909546, "learning_rate": 4.4373071240087624e-05, "loss": 1.0908, "step": 167830 }, { "epoch": 1.0722819212143668, "grad_norm": 0.6992239356040955, "learning_rate": 4.4368085458987075e-05, "loss": 0.8704, "step": 167840 }, { "epoch": 1.0723458083641058, "grad_norm": 0.8266318440437317, "learning_rate": 4.436309973460478e-05, "loss": 0.8084, "step": 167850 }, { "epoch": 1.0724096955138442, "grad_norm": 0.6330587863922119, "learning_rate": 4.4358114066990944e-05, "loss": 0.8448, "step": 167860 }, { "epoch": 1.072473582663583, "grad_norm": 2.0142178535461426, "learning_rate": 4.435312845619579e-05, "loss": 0.7857, "step": 167870 }, { "epoch": 1.0725374698133217, "grad_norm": 1.0071065425872803, "learning_rate": 4.434814290226951e-05, "loss": 1.2855, "step": 167880 }, { "epoch": 1.0726013569630604, "grad_norm": 0.9862484335899353, "learning_rate": 4.434315740526232e-05, "loss": 1.083, "step": 167890 }, { "epoch": 1.072665244112799, "grad_norm": 0.7634795904159546, "learning_rate": 4.433817196522443e-05, "loss": 0.9183, "step": 167900 }, { "epoch": 1.0727291312625378, "grad_norm": 1.3216174840927124, "learning_rate": 4.433318658220605e-05, "loss": 1.0064, "step": 167910 }, { "epoch": 1.0727930184122765, "grad_norm": 1.0118058919906616, "learning_rate": 4.432820125625738e-05, "loss": 0.8958, "step": 167920 }, { "epoch": 1.0728569055620152, "grad_norm": 1.1793543100357056, "learning_rate": 4.432321598742863e-05, "loss": 1.077, "step": 167930 }, { "epoch": 1.072920792711754, "grad_norm": 0.829182505607605, "learning_rate": 4.4318230775770006e-05, "loss": 0.6589, "step": 167940 }, { "epoch": 1.0729846798614926, "grad_norm": 0.9053888916969299, "learning_rate": 4.431324562133172e-05, "loss": 0.7827, "step": 167950 }, { "epoch": 1.0730485670112313, "grad_norm": 1.006364345550537, "learning_rate": 4.430826052416396e-05, "loss": 1.0471, "step": 167960 }, { "epoch": 1.07311245416097, "grad_norm": 3.569615125656128, "learning_rate": 4.430327548431695e-05, "loss": 1.0903, "step": 167970 }, { "epoch": 1.0731763413107087, "grad_norm": 1.1795485019683838, "learning_rate": 4.429829050184088e-05, "loss": 0.8895, "step": 167980 }, { "epoch": 1.0732402284604474, "grad_norm": 1.0050119161605835, "learning_rate": 4.429330557678595e-05, "loss": 0.8886, "step": 167990 }, { "epoch": 1.0733041156101861, "grad_norm": 0.7378489971160889, "learning_rate": 4.428832070920238e-05, "loss": 0.7695, "step": 168000 }, { "epoch": 1.0733680027599248, "grad_norm": 1.542384147644043, "learning_rate": 4.428333589914036e-05, "loss": 0.6757, "step": 168010 }, { "epoch": 1.0734318899096635, "grad_norm": 0.9338440299034119, "learning_rate": 4.4278351146650086e-05, "loss": 0.8971, "step": 168020 }, { "epoch": 1.0734957770594022, "grad_norm": 0.7650867700576782, "learning_rate": 4.427336645178177e-05, "loss": 0.7302, "step": 168030 }, { "epoch": 1.073559664209141, "grad_norm": 0.7335389852523804, "learning_rate": 4.42683818145856e-05, "loss": 1.0885, "step": 168040 }, { "epoch": 1.0736235513588797, "grad_norm": 1.6143547296524048, "learning_rate": 4.4263397235111795e-05, "loss": 0.9723, "step": 168050 }, { "epoch": 1.0736874385086184, "grad_norm": 0.8721071481704712, "learning_rate": 4.425841271341055e-05, "loss": 0.9947, "step": 168060 }, { "epoch": 1.073751325658357, "grad_norm": 0.7515314221382141, "learning_rate": 4.425342824953204e-05, "loss": 1.0497, "step": 168070 }, { "epoch": 1.0738152128080958, "grad_norm": 1.048852801322937, "learning_rate": 4.424844384352649e-05, "loss": 1.0662, "step": 168080 }, { "epoch": 1.0738790999578345, "grad_norm": 0.9293131232261658, "learning_rate": 4.424345949544408e-05, "loss": 0.96, "step": 168090 }, { "epoch": 1.0739429871075732, "grad_norm": 0.540847659111023, "learning_rate": 4.4238475205335015e-05, "loss": 1.0502, "step": 168100 }, { "epoch": 1.074006874257312, "grad_norm": 0.7694253325462341, "learning_rate": 4.423349097324949e-05, "loss": 0.8383, "step": 168110 }, { "epoch": 1.0740707614070506, "grad_norm": 0.9716971516609192, "learning_rate": 4.42285067992377e-05, "loss": 1.1018, "step": 168120 }, { "epoch": 1.0741346485567893, "grad_norm": 1.4167827367782593, "learning_rate": 4.4223522683349835e-05, "loss": 0.9259, "step": 168130 }, { "epoch": 1.074198535706528, "grad_norm": 0.784041702747345, "learning_rate": 4.42185386256361e-05, "loss": 0.7515, "step": 168140 }, { "epoch": 1.0742624228562667, "grad_norm": 0.80231112241745, "learning_rate": 4.421355462614668e-05, "loss": 0.9655, "step": 168150 }, { "epoch": 1.0743263100060054, "grad_norm": 0.7538440823554993, "learning_rate": 4.420857068493178e-05, "loss": 0.7496, "step": 168160 }, { "epoch": 1.0743901971557441, "grad_norm": 2.33453106880188, "learning_rate": 4.4203586802041566e-05, "loss": 0.7772, "step": 168170 }, { "epoch": 1.0744540843054828, "grad_norm": 1.0160599946975708, "learning_rate": 4.419860297752626e-05, "loss": 0.6744, "step": 168180 }, { "epoch": 1.0745179714552215, "grad_norm": 0.8873048424720764, "learning_rate": 4.419361921143604e-05, "loss": 0.9338, "step": 168190 }, { "epoch": 1.0745818586049602, "grad_norm": 0.8100428581237793, "learning_rate": 4.4188635503821094e-05, "loss": 0.6733, "step": 168200 }, { "epoch": 1.074645745754699, "grad_norm": 0.87029629945755, "learning_rate": 4.418365185473162e-05, "loss": 0.8515, "step": 168210 }, { "epoch": 1.0747096329044377, "grad_norm": 1.0414305925369263, "learning_rate": 4.4178668264217796e-05, "loss": 1.0144, "step": 168220 }, { "epoch": 1.0747735200541764, "grad_norm": 1.6467934846878052, "learning_rate": 4.417368473232982e-05, "loss": 0.8989, "step": 168230 }, { "epoch": 1.074837407203915, "grad_norm": 1.692230224609375, "learning_rate": 4.416870125911788e-05, "loss": 0.9082, "step": 168240 }, { "epoch": 1.0749012943536538, "grad_norm": 1.0755226612091064, "learning_rate": 4.416371784463216e-05, "loss": 0.8357, "step": 168250 }, { "epoch": 1.0749651815033925, "grad_norm": 0.6711715459823608, "learning_rate": 4.415873448892286e-05, "loss": 1.0267, "step": 168260 }, { "epoch": 1.0750290686531312, "grad_norm": 0.965099036693573, "learning_rate": 4.4153751192040153e-05, "loss": 1.1305, "step": 168270 }, { "epoch": 1.07509295580287, "grad_norm": 0.9926470518112183, "learning_rate": 4.414876795403423e-05, "loss": 0.7589, "step": 168280 }, { "epoch": 1.0751568429526086, "grad_norm": 0.9975265264511108, "learning_rate": 4.4143784774955274e-05, "loss": 0.7783, "step": 168290 }, { "epoch": 1.0752207301023473, "grad_norm": 0.831061601638794, "learning_rate": 4.41388016548535e-05, "loss": 0.941, "step": 168300 }, { "epoch": 1.075284617252086, "grad_norm": 0.7290365099906921, "learning_rate": 4.413381859377904e-05, "loss": 0.7004, "step": 168310 }, { "epoch": 1.0753485044018247, "grad_norm": 1.004852294921875, "learning_rate": 4.412883559178209e-05, "loss": 1.2023, "step": 168320 }, { "epoch": 1.0754123915515632, "grad_norm": 1.3445194959640503, "learning_rate": 4.412385264891286e-05, "loss": 0.8475, "step": 168330 }, { "epoch": 1.0754762787013021, "grad_norm": 1.051191806793213, "learning_rate": 4.411886976522151e-05, "loss": 0.8842, "step": 168340 }, { "epoch": 1.0755401658510406, "grad_norm": 1.453165888786316, "learning_rate": 4.411388694075822e-05, "loss": 0.763, "step": 168350 }, { "epoch": 1.0756040530007793, "grad_norm": 1.1620047092437744, "learning_rate": 4.410890417557319e-05, "loss": 0.769, "step": 168360 }, { "epoch": 1.075667940150518, "grad_norm": 0.6074997782707214, "learning_rate": 4.410392146971659e-05, "loss": 0.808, "step": 168370 }, { "epoch": 1.0757318273002567, "grad_norm": 0.8900678157806396, "learning_rate": 4.409893882323861e-05, "loss": 0.9424, "step": 168380 }, { "epoch": 1.0757957144499954, "grad_norm": 0.6844345331192017, "learning_rate": 4.409395623618941e-05, "loss": 0.5761, "step": 168390 }, { "epoch": 1.0758596015997341, "grad_norm": 1.0848898887634277, "learning_rate": 4.408897370861919e-05, "loss": 0.7388, "step": 168400 }, { "epoch": 1.0759234887494729, "grad_norm": 1.1647626161575317, "learning_rate": 4.4083991240578115e-05, "loss": 0.9816, "step": 168410 }, { "epoch": 1.0759873758992116, "grad_norm": 1.0366533994674683, "learning_rate": 4.407900883211636e-05, "loss": 0.8788, "step": 168420 }, { "epoch": 1.0760512630489503, "grad_norm": 1.0390725135803223, "learning_rate": 4.407402648328412e-05, "loss": 0.7624, "step": 168430 }, { "epoch": 1.076115150198689, "grad_norm": 0.7953748106956482, "learning_rate": 4.406904419413155e-05, "loss": 0.8238, "step": 168440 }, { "epoch": 1.0761790373484277, "grad_norm": 0.825472354888916, "learning_rate": 4.406406196470884e-05, "loss": 1.1502, "step": 168450 }, { "epoch": 1.0762429244981664, "grad_norm": 0.642017126083374, "learning_rate": 4.4059079795066164e-05, "loss": 0.8664, "step": 168460 }, { "epoch": 1.076306811647905, "grad_norm": 1.6154723167419434, "learning_rate": 4.4054097685253694e-05, "loss": 0.7728, "step": 168470 }, { "epoch": 1.0763706987976438, "grad_norm": 1.057303786277771, "learning_rate": 4.4049115635321595e-05, "loss": 0.7316, "step": 168480 }, { "epoch": 1.0764345859473825, "grad_norm": 1.460866928100586, "learning_rate": 4.404413364532006e-05, "loss": 0.9117, "step": 168490 }, { "epoch": 1.0764984730971212, "grad_norm": 0.7985673546791077, "learning_rate": 4.403915171529925e-05, "loss": 0.857, "step": 168500 }, { "epoch": 1.07656236024686, "grad_norm": 0.9446043372154236, "learning_rate": 4.403416984530934e-05, "loss": 1.1648, "step": 168510 }, { "epoch": 1.0766262473965986, "grad_norm": 1.0071111917495728, "learning_rate": 4.402918803540049e-05, "loss": 0.7617, "step": 168520 }, { "epoch": 1.0766901345463373, "grad_norm": 1.2365188598632812, "learning_rate": 4.402420628562289e-05, "loss": 0.8075, "step": 168530 }, { "epoch": 1.076754021696076, "grad_norm": 0.8958815932273865, "learning_rate": 4.4019224596026706e-05, "loss": 0.819, "step": 168540 }, { "epoch": 1.0768179088458147, "grad_norm": 0.5266879796981812, "learning_rate": 4.40142429666621e-05, "loss": 0.796, "step": 168550 }, { "epoch": 1.0768817959955534, "grad_norm": 1.222324013710022, "learning_rate": 4.400926139757924e-05, "loss": 0.6938, "step": 168560 }, { "epoch": 1.0769456831452922, "grad_norm": 1.7840367555618286, "learning_rate": 4.40042798888283e-05, "loss": 0.8517, "step": 168570 }, { "epoch": 1.0770095702950309, "grad_norm": 0.7238796353340149, "learning_rate": 4.3999298440459455e-05, "loss": 0.834, "step": 168580 }, { "epoch": 1.0770734574447696, "grad_norm": 0.7589558362960815, "learning_rate": 4.399431705252287e-05, "loss": 0.6977, "step": 168590 }, { "epoch": 1.0771373445945083, "grad_norm": 0.9841206669807434, "learning_rate": 4.398933572506871e-05, "loss": 0.7392, "step": 168600 }, { "epoch": 1.077201231744247, "grad_norm": 1.0497477054595947, "learning_rate": 4.398435445814713e-05, "loss": 0.7858, "step": 168610 }, { "epoch": 1.0772651188939857, "grad_norm": 0.9010785818099976, "learning_rate": 4.3979373251808307e-05, "loss": 1.0822, "step": 168620 }, { "epoch": 1.0773290060437244, "grad_norm": 0.7049776315689087, "learning_rate": 4.3974392106102405e-05, "loss": 0.7629, "step": 168630 }, { "epoch": 1.077392893193463, "grad_norm": 0.7971298098564148, "learning_rate": 4.396941102107959e-05, "loss": 0.712, "step": 168640 }, { "epoch": 1.0774567803432018, "grad_norm": 0.9001279473304749, "learning_rate": 4.396442999679003e-05, "loss": 0.92, "step": 168650 }, { "epoch": 1.0775206674929405, "grad_norm": 0.7700995802879333, "learning_rate": 4.395944903328387e-05, "loss": 0.6326, "step": 168660 }, { "epoch": 1.0775845546426792, "grad_norm": 1.122528314590454, "learning_rate": 4.395446813061128e-05, "loss": 0.7356, "step": 168670 }, { "epoch": 1.077648441792418, "grad_norm": 1.0956536531448364, "learning_rate": 4.3949487288822434e-05, "loss": 0.9384, "step": 168680 }, { "epoch": 1.0777123289421566, "grad_norm": 0.9272425174713135, "learning_rate": 4.3944506507967484e-05, "loss": 0.9868, "step": 168690 }, { "epoch": 1.0777762160918953, "grad_norm": 0.9327694177627563, "learning_rate": 4.3939525788096595e-05, "loss": 1.1208, "step": 168700 }, { "epoch": 1.077840103241634, "grad_norm": 0.92158043384552, "learning_rate": 4.3934545129259925e-05, "loss": 1.16, "step": 168710 }, { "epoch": 1.0779039903913727, "grad_norm": 0.7717714905738831, "learning_rate": 4.3929564531507627e-05, "loss": 0.9948, "step": 168720 }, { "epoch": 1.0779678775411115, "grad_norm": 1.565289855003357, "learning_rate": 4.392458399488987e-05, "loss": 0.6847, "step": 168730 }, { "epoch": 1.0780317646908502, "grad_norm": 0.6341485977172852, "learning_rate": 4.3919603519456806e-05, "loss": 0.8132, "step": 168740 }, { "epoch": 1.0780956518405889, "grad_norm": 1.279700517654419, "learning_rate": 4.391462310525859e-05, "loss": 0.9369, "step": 168750 }, { "epoch": 1.0781595389903276, "grad_norm": 0.7400231957435608, "learning_rate": 4.390964275234538e-05, "loss": 1.0159, "step": 168760 }, { "epoch": 1.0782234261400663, "grad_norm": 1.025972843170166, "learning_rate": 4.3904662460767346e-05, "loss": 0.9119, "step": 168770 }, { "epoch": 1.078287313289805, "grad_norm": 0.7673271298408508, "learning_rate": 4.389968223057464e-05, "loss": 0.8859, "step": 168780 }, { "epoch": 1.0783512004395437, "grad_norm": 0.8631071448326111, "learning_rate": 4.389470206181743e-05, "loss": 0.7731, "step": 168790 }, { "epoch": 1.0784150875892824, "grad_norm": 0.7734096646308899, "learning_rate": 4.388972195454583e-05, "loss": 0.8901, "step": 168800 }, { "epoch": 1.078478974739021, "grad_norm": 1.3677899837493896, "learning_rate": 4.388474190881e-05, "loss": 1.0029, "step": 168810 }, { "epoch": 1.0785428618887596, "grad_norm": 1.3638877868652344, "learning_rate": 4.3879761924660135e-05, "loss": 0.8329, "step": 168820 }, { "epoch": 1.0786067490384985, "grad_norm": 0.7826031446456909, "learning_rate": 4.387478200214635e-05, "loss": 0.8171, "step": 168830 }, { "epoch": 1.078670636188237, "grad_norm": 1.2444953918457031, "learning_rate": 4.3869802141318804e-05, "loss": 0.9164, "step": 168840 }, { "epoch": 1.0787345233379757, "grad_norm": 0.8797706961631775, "learning_rate": 4.3864822342227664e-05, "loss": 0.9033, "step": 168850 }, { "epoch": 1.0787984104877144, "grad_norm": 0.9462030529975891, "learning_rate": 4.3859842604923065e-05, "loss": 0.8665, "step": 168860 }, { "epoch": 1.0788622976374531, "grad_norm": 0.860593318939209, "learning_rate": 4.3854862929455164e-05, "loss": 0.8267, "step": 168870 }, { "epoch": 1.0789261847871918, "grad_norm": 3.0111243724823, "learning_rate": 4.38498833158741e-05, "loss": 0.8713, "step": 168880 }, { "epoch": 1.0789900719369305, "grad_norm": 0.8164293169975281, "learning_rate": 4.384490376423004e-05, "loss": 0.981, "step": 168890 }, { "epoch": 1.0790539590866692, "grad_norm": 1.224902868270874, "learning_rate": 4.383992427457312e-05, "loss": 0.8621, "step": 168900 }, { "epoch": 1.079117846236408, "grad_norm": 1.4257479906082153, "learning_rate": 4.38349448469535e-05, "loss": 0.6887, "step": 168910 }, { "epoch": 1.0791817333861466, "grad_norm": 1.2082140445709229, "learning_rate": 4.382996548142132e-05, "loss": 0.6849, "step": 168920 }, { "epoch": 1.0792456205358854, "grad_norm": 0.7488890886306763, "learning_rate": 4.3824986178026725e-05, "loss": 1.042, "step": 168930 }, { "epoch": 1.079309507685624, "grad_norm": 1.0096524953842163, "learning_rate": 4.3820006936819856e-05, "loss": 0.8779, "step": 168940 }, { "epoch": 1.0793733948353628, "grad_norm": 1.3145537376403809, "learning_rate": 4.381502775785086e-05, "loss": 0.8794, "step": 168950 }, { "epoch": 1.0794372819851015, "grad_norm": 0.878990888595581, "learning_rate": 4.38100486411699e-05, "loss": 0.8852, "step": 168960 }, { "epoch": 1.0795011691348402, "grad_norm": 1.4810287952423096, "learning_rate": 4.380506958682709e-05, "loss": 0.7115, "step": 168970 }, { "epoch": 1.0795650562845789, "grad_norm": 2.4938902854919434, "learning_rate": 4.3800090594872594e-05, "loss": 0.9315, "step": 168980 }, { "epoch": 1.0796289434343176, "grad_norm": 0.8952536582946777, "learning_rate": 4.379511166535655e-05, "loss": 1.0401, "step": 168990 }, { "epoch": 1.0796928305840563, "grad_norm": 0.9901078939437866, "learning_rate": 4.37901327983291e-05, "loss": 0.7888, "step": 169000 }, { "epoch": 1.079756717733795, "grad_norm": 1.0167852640151978, "learning_rate": 4.3785153993840386e-05, "loss": 0.8806, "step": 169010 }, { "epoch": 1.0798206048835337, "grad_norm": 0.898930013179779, "learning_rate": 4.378017525194055e-05, "loss": 0.7056, "step": 169020 }, { "epoch": 1.0798844920332724, "grad_norm": 0.9970487356185913, "learning_rate": 4.3775196572679724e-05, "loss": 0.8798, "step": 169030 }, { "epoch": 1.0799483791830111, "grad_norm": 0.7721714377403259, "learning_rate": 4.377021795610805e-05, "loss": 0.5651, "step": 169040 }, { "epoch": 1.0800122663327498, "grad_norm": 0.7181320786476135, "learning_rate": 4.3765239402275685e-05, "loss": 0.9484, "step": 169050 }, { "epoch": 1.0800761534824885, "grad_norm": 0.7529858946800232, "learning_rate": 4.3760260911232745e-05, "loss": 0.9099, "step": 169060 }, { "epoch": 1.0801400406322272, "grad_norm": 1.4071221351623535, "learning_rate": 4.3755282483029376e-05, "loss": 0.8341, "step": 169070 }, { "epoch": 1.080203927781966, "grad_norm": 1.0312004089355469, "learning_rate": 4.3750304117715704e-05, "loss": 1.3534, "step": 169080 }, { "epoch": 1.0802678149317047, "grad_norm": 0.9659938812255859, "learning_rate": 4.3745325815341885e-05, "loss": 0.7913, "step": 169090 }, { "epoch": 1.0803317020814434, "grad_norm": 0.763280987739563, "learning_rate": 4.374034757595805e-05, "loss": 0.9172, "step": 169100 }, { "epoch": 1.080395589231182, "grad_norm": 4.690800189971924, "learning_rate": 4.373536939961433e-05, "loss": 0.8613, "step": 169110 }, { "epoch": 1.0804594763809208, "grad_norm": 1.3843324184417725, "learning_rate": 4.373039128636085e-05, "loss": 0.7185, "step": 169120 }, { "epoch": 1.0805233635306595, "grad_norm": 1.8589551448822021, "learning_rate": 4.372541323624777e-05, "loss": 1.0096, "step": 169130 }, { "epoch": 1.0805872506803982, "grad_norm": 1.5525774955749512, "learning_rate": 4.3720435249325196e-05, "loss": 1.0313, "step": 169140 }, { "epoch": 1.0806511378301369, "grad_norm": 0.9464132785797119, "learning_rate": 4.3715457325643274e-05, "loss": 0.8473, "step": 169150 }, { "epoch": 1.0807150249798756, "grad_norm": 0.9257469773292542, "learning_rate": 4.3710479465252135e-05, "loss": 0.6759, "step": 169160 }, { "epoch": 1.0807789121296143, "grad_norm": 3.840132474899292, "learning_rate": 4.370550166820191e-05, "loss": 0.8859, "step": 169170 }, { "epoch": 1.080842799279353, "grad_norm": 0.8638167977333069, "learning_rate": 4.370052393454272e-05, "loss": 0.9481, "step": 169180 }, { "epoch": 1.0809066864290917, "grad_norm": 0.8762006759643555, "learning_rate": 4.3695546264324716e-05, "loss": 1.11, "step": 169190 }, { "epoch": 1.0809705735788304, "grad_norm": 0.763314425945282, "learning_rate": 4.369056865759801e-05, "loss": 0.8666, "step": 169200 }, { "epoch": 1.0810344607285691, "grad_norm": 0.9983099102973938, "learning_rate": 4.368559111441274e-05, "loss": 0.9914, "step": 169210 }, { "epoch": 1.0810983478783078, "grad_norm": 0.7259060144424438, "learning_rate": 4.3680613634819026e-05, "loss": 1.0586, "step": 169220 }, { "epoch": 1.0811622350280465, "grad_norm": 1.0532219409942627, "learning_rate": 4.3675636218867e-05, "loss": 0.865, "step": 169230 }, { "epoch": 1.0812261221777852, "grad_norm": 2.120307207107544, "learning_rate": 4.367065886660678e-05, "loss": 0.8903, "step": 169240 }, { "epoch": 1.081290009327524, "grad_norm": 0.9013161659240723, "learning_rate": 4.366568157808851e-05, "loss": 1.0287, "step": 169250 }, { "epoch": 1.0813538964772627, "grad_norm": 1.2690575122833252, "learning_rate": 4.3660704353362316e-05, "loss": 1.1216, "step": 169260 }, { "epoch": 1.0814177836270014, "grad_norm": 0.8027593493461609, "learning_rate": 4.36557271924783e-05, "loss": 0.8628, "step": 169270 }, { "epoch": 1.08148167077674, "grad_norm": 0.7389708757400513, "learning_rate": 4.3650750095486616e-05, "loss": 0.8239, "step": 169280 }, { "epoch": 1.0815455579264786, "grad_norm": 1.0916862487792969, "learning_rate": 4.3645773062437354e-05, "loss": 0.6986, "step": 169290 }, { "epoch": 1.0816094450762175, "grad_norm": 0.9672138690948486, "learning_rate": 4.3640796093380666e-05, "loss": 0.6625, "step": 169300 }, { "epoch": 1.081673332225956, "grad_norm": 0.8081925511360168, "learning_rate": 4.3635819188366655e-05, "loss": 0.7711, "step": 169310 }, { "epoch": 1.0817372193756947, "grad_norm": 0.949112594127655, "learning_rate": 4.363084234744545e-05, "loss": 1.0129, "step": 169320 }, { "epoch": 1.0818011065254334, "grad_norm": 0.9872422218322754, "learning_rate": 4.3625865570667174e-05, "loss": 0.9821, "step": 169330 }, { "epoch": 1.081864993675172, "grad_norm": 0.7962661385536194, "learning_rate": 4.3620888858081945e-05, "loss": 1.1761, "step": 169340 }, { "epoch": 1.0819288808249108, "grad_norm": 0.5037175416946411, "learning_rate": 4.361591220973988e-05, "loss": 1.0152, "step": 169350 }, { "epoch": 1.0819927679746495, "grad_norm": 1.953133225440979, "learning_rate": 4.361093562569111e-05, "loss": 0.7962, "step": 169360 }, { "epoch": 1.0820566551243882, "grad_norm": 0.6017458438873291, "learning_rate": 4.3605959105985746e-05, "loss": 0.8448, "step": 169370 }, { "epoch": 1.082120542274127, "grad_norm": 0.8771808743476868, "learning_rate": 4.360098265067391e-05, "loss": 0.8213, "step": 169380 }, { "epoch": 1.0821844294238656, "grad_norm": 1.3374742269515991, "learning_rate": 4.35960062598057e-05, "loss": 0.7434, "step": 169390 }, { "epoch": 1.0822483165736043, "grad_norm": 0.7994939088821411, "learning_rate": 4.359102993343125e-05, "loss": 0.8355, "step": 169400 }, { "epoch": 1.082312203723343, "grad_norm": 0.8211526274681091, "learning_rate": 4.358605367160067e-05, "loss": 0.9097, "step": 169410 }, { "epoch": 1.0823760908730817, "grad_norm": 0.9132190346717834, "learning_rate": 4.3581077474364084e-05, "loss": 0.8039, "step": 169420 }, { "epoch": 1.0824399780228204, "grad_norm": 0.6380227208137512, "learning_rate": 4.35761013417716e-05, "loss": 0.8588, "step": 169430 }, { "epoch": 1.0825038651725591, "grad_norm": 1.1924580335617065, "learning_rate": 4.357112527387333e-05, "loss": 1.1468, "step": 169440 }, { "epoch": 1.0825677523222978, "grad_norm": 0.803373396396637, "learning_rate": 4.3566149270719404e-05, "loss": 0.7185, "step": 169450 }, { "epoch": 1.0826316394720366, "grad_norm": 1.080508828163147, "learning_rate": 4.356117333235992e-05, "loss": 0.7215, "step": 169460 }, { "epoch": 1.0826955266217753, "grad_norm": 0.8322065472602844, "learning_rate": 4.355619745884498e-05, "loss": 0.8798, "step": 169470 }, { "epoch": 1.082759413771514, "grad_norm": 1.1867259740829468, "learning_rate": 4.355122165022471e-05, "loss": 0.8808, "step": 169480 }, { "epoch": 1.0828233009212527, "grad_norm": 1.0579557418823242, "learning_rate": 4.354624590654922e-05, "loss": 1.1633, "step": 169490 }, { "epoch": 1.0828871880709914, "grad_norm": 0.992499828338623, "learning_rate": 4.354127022786861e-05, "loss": 0.8273, "step": 169500 }, { "epoch": 1.08295107522073, "grad_norm": 1.0000821352005005, "learning_rate": 4.3536294614233e-05, "loss": 1.0181, "step": 169510 }, { "epoch": 1.0830149623704688, "grad_norm": 1.0470143556594849, "learning_rate": 4.3531319065692494e-05, "loss": 1.181, "step": 169520 }, { "epoch": 1.0830788495202075, "grad_norm": 0.9132643938064575, "learning_rate": 4.3526343582297205e-05, "loss": 0.8465, "step": 169530 }, { "epoch": 1.0831427366699462, "grad_norm": 1.1474629640579224, "learning_rate": 4.352136816409723e-05, "loss": 0.9753, "step": 169540 }, { "epoch": 1.083206623819685, "grad_norm": 1.3476228713989258, "learning_rate": 4.351639281114269e-05, "loss": 0.8403, "step": 169550 }, { "epoch": 1.0832705109694236, "grad_norm": 0.955660879611969, "learning_rate": 4.351141752348368e-05, "loss": 0.7374, "step": 169560 }, { "epoch": 1.0833343981191623, "grad_norm": 0.7566315531730652, "learning_rate": 4.3506442301170305e-05, "loss": 1.0007, "step": 169570 }, { "epoch": 1.083398285268901, "grad_norm": 1.4220582246780396, "learning_rate": 4.3501467144252686e-05, "loss": 1.0204, "step": 169580 }, { "epoch": 1.0834621724186397, "grad_norm": 1.1132736206054688, "learning_rate": 4.3496492052780904e-05, "loss": 0.8878, "step": 169590 }, { "epoch": 1.0835260595683784, "grad_norm": 0.5826320052146912, "learning_rate": 4.349151702680507e-05, "loss": 1.0499, "step": 169600 }, { "epoch": 1.0835899467181171, "grad_norm": 0.7217887043952942, "learning_rate": 4.348654206637529e-05, "loss": 0.784, "step": 169610 }, { "epoch": 1.0836538338678559, "grad_norm": 1.1123169660568237, "learning_rate": 4.348156717154167e-05, "loss": 0.9233, "step": 169620 }, { "epoch": 1.0837177210175946, "grad_norm": 0.9296445250511169, "learning_rate": 4.347659234235431e-05, "loss": 0.9011, "step": 169630 }, { "epoch": 1.0837816081673333, "grad_norm": 0.9957021474838257, "learning_rate": 4.347161757886331e-05, "loss": 0.8949, "step": 169640 }, { "epoch": 1.083845495317072, "grad_norm": 0.7610149383544922, "learning_rate": 4.346664288111877e-05, "loss": 0.9835, "step": 169650 }, { "epoch": 1.0839093824668107, "grad_norm": 0.9562907218933105, "learning_rate": 4.346166824917079e-05, "loss": 0.7712, "step": 169660 }, { "epoch": 1.0839732696165494, "grad_norm": 1.062423586845398, "learning_rate": 4.345669368306946e-05, "loss": 1.179, "step": 169670 }, { "epoch": 1.084037156766288, "grad_norm": 1.3824454545974731, "learning_rate": 4.3451719182864894e-05, "loss": 1.1116, "step": 169680 }, { "epoch": 1.0841010439160268, "grad_norm": 0.847102701663971, "learning_rate": 4.344674474860717e-05, "loss": 1.0358, "step": 169690 }, { "epoch": 1.0841649310657655, "grad_norm": 2.5781049728393555, "learning_rate": 4.34417703803464e-05, "loss": 0.9208, "step": 169700 }, { "epoch": 1.0842288182155042, "grad_norm": 0.65672367811203, "learning_rate": 4.343679607813268e-05, "loss": 0.748, "step": 169710 }, { "epoch": 1.084292705365243, "grad_norm": 1.4862734079360962, "learning_rate": 4.3431821842016104e-05, "loss": 0.862, "step": 169720 }, { "epoch": 1.0843565925149816, "grad_norm": 1.015182375907898, "learning_rate": 4.342684767204675e-05, "loss": 1.0431, "step": 169730 }, { "epoch": 1.0844204796647203, "grad_norm": 0.8225411772727966, "learning_rate": 4.342187356827474e-05, "loss": 0.8876, "step": 169740 }, { "epoch": 1.084484366814459, "grad_norm": 0.8683805465698242, "learning_rate": 4.341689953075015e-05, "loss": 0.8132, "step": 169750 }, { "epoch": 1.0845482539641977, "grad_norm": 1.6863847970962524, "learning_rate": 4.3411925559523096e-05, "loss": 0.9326, "step": 169760 }, { "epoch": 1.0846121411139364, "grad_norm": 1.3636550903320312, "learning_rate": 4.340695165464362e-05, "loss": 0.8445, "step": 169770 }, { "epoch": 1.084676028263675, "grad_norm": 1.0071210861206055, "learning_rate": 4.340197781616186e-05, "loss": 0.8123, "step": 169780 }, { "epoch": 1.0847399154134139, "grad_norm": 0.7153182625770569, "learning_rate": 4.339700404412789e-05, "loss": 0.9171, "step": 169790 }, { "epoch": 1.0848038025631523, "grad_norm": 1.7383946180343628, "learning_rate": 4.33920303385918e-05, "loss": 0.9486, "step": 169800 }, { "epoch": 1.084867689712891, "grad_norm": 0.558637797832489, "learning_rate": 4.338705669960368e-05, "loss": 0.7979, "step": 169810 }, { "epoch": 1.0849315768626298, "grad_norm": 1.0765694379806519, "learning_rate": 4.338208312721362e-05, "loss": 0.8457, "step": 169820 }, { "epoch": 1.0849954640123685, "grad_norm": 0.681546151638031, "learning_rate": 4.337710962147171e-05, "loss": 0.6883, "step": 169830 }, { "epoch": 1.0850593511621072, "grad_norm": 1.1017876863479614, "learning_rate": 4.3372136182428037e-05, "loss": 0.9216, "step": 169840 }, { "epoch": 1.0851232383118459, "grad_norm": 1.7058666944503784, "learning_rate": 4.3367162810132685e-05, "loss": 1.0559, "step": 169850 }, { "epoch": 1.0851871254615846, "grad_norm": 1.3724285364151, "learning_rate": 4.336218950463574e-05, "loss": 0.6846, "step": 169860 }, { "epoch": 1.0852510126113233, "grad_norm": 0.880271315574646, "learning_rate": 4.335721626598729e-05, "loss": 0.9166, "step": 169870 }, { "epoch": 1.085314899761062, "grad_norm": 1.2698674201965332, "learning_rate": 4.335224309423742e-05, "loss": 0.8341, "step": 169880 }, { "epoch": 1.0853787869108007, "grad_norm": 1.0333696603775024, "learning_rate": 4.3347269989436214e-05, "loss": 1.3271, "step": 169890 }, { "epoch": 1.0854426740605394, "grad_norm": 1.2140263319015503, "learning_rate": 4.334229695163375e-05, "loss": 0.8656, "step": 169900 }, { "epoch": 1.085506561210278, "grad_norm": 0.5510571599006653, "learning_rate": 4.3337323980880123e-05, "loss": 1.0138, "step": 169910 }, { "epoch": 1.0855704483600168, "grad_norm": 0.4985210597515106, "learning_rate": 4.33323510772254e-05, "loss": 0.7532, "step": 169920 }, { "epoch": 1.0856343355097555, "grad_norm": 0.7603302597999573, "learning_rate": 4.332737824071966e-05, "loss": 0.8652, "step": 169930 }, { "epoch": 1.0856982226594942, "grad_norm": 0.925507664680481, "learning_rate": 4.3322405471413006e-05, "loss": 0.817, "step": 169940 }, { "epoch": 1.085762109809233, "grad_norm": 1.286191701889038, "learning_rate": 4.3317432769355514e-05, "loss": 0.905, "step": 169950 }, { "epoch": 1.0858259969589716, "grad_norm": 0.7807539105415344, "learning_rate": 4.331246013459724e-05, "loss": 0.9113, "step": 169960 }, { "epoch": 1.0858898841087103, "grad_norm": 0.8085950016975403, "learning_rate": 4.3307487567188294e-05, "loss": 0.7799, "step": 169970 }, { "epoch": 1.085953771258449, "grad_norm": 0.7874223589897156, "learning_rate": 4.330251506717873e-05, "loss": 0.9971, "step": 169980 }, { "epoch": 1.0860176584081878, "grad_norm": 0.9304120540618896, "learning_rate": 4.329754263461863e-05, "loss": 0.7827, "step": 169990 }, { "epoch": 1.0860815455579265, "grad_norm": 0.9361356496810913, "learning_rate": 4.329257026955808e-05, "loss": 1.0771, "step": 170000 }, { "epoch": 1.0861454327076652, "grad_norm": 1.004862666130066, "learning_rate": 4.3287597972047144e-05, "loss": 0.9004, "step": 170010 }, { "epoch": 1.0862093198574039, "grad_norm": 1.2250120639801025, "learning_rate": 4.328262574213591e-05, "loss": 0.7129, "step": 170020 }, { "epoch": 1.0862732070071426, "grad_norm": 0.5635613799095154, "learning_rate": 4.3277653579874445e-05, "loss": 0.8634, "step": 170030 }, { "epoch": 1.0863370941568813, "grad_norm": 0.9488706588745117, "learning_rate": 4.3272681485312824e-05, "loss": 1.0111, "step": 170040 }, { "epoch": 1.08640098130662, "grad_norm": 0.8792842626571655, "learning_rate": 4.326770945850111e-05, "loss": 0.6489, "step": 170050 }, { "epoch": 1.0864648684563587, "grad_norm": 0.5924078226089478, "learning_rate": 4.32627374994894e-05, "loss": 0.7982, "step": 170060 }, { "epoch": 1.0865287556060974, "grad_norm": 1.1501904726028442, "learning_rate": 4.325776560832775e-05, "loss": 0.8234, "step": 170070 }, { "epoch": 1.0865926427558361, "grad_norm": 0.9579604268074036, "learning_rate": 4.3252793785066234e-05, "loss": 0.8457, "step": 170080 }, { "epoch": 1.0866565299055748, "grad_norm": 0.599661648273468, "learning_rate": 4.3247822029754915e-05, "loss": 0.9024, "step": 170090 }, { "epoch": 1.0867204170553135, "grad_norm": 0.861957848072052, "learning_rate": 4.324285034244387e-05, "loss": 0.8308, "step": 170100 }, { "epoch": 1.0867843042050522, "grad_norm": 1.1481958627700806, "learning_rate": 4.323787872318317e-05, "loss": 0.8787, "step": 170110 }, { "epoch": 1.086848191354791, "grad_norm": 0.8969278931617737, "learning_rate": 4.323290717202289e-05, "loss": 1.1709, "step": 170120 }, { "epoch": 1.0869120785045296, "grad_norm": 0.944645345211029, "learning_rate": 4.322793568901308e-05, "loss": 0.8074, "step": 170130 }, { "epoch": 1.0869759656542684, "grad_norm": 0.8422662019729614, "learning_rate": 4.32229642742038e-05, "loss": 0.9204, "step": 170140 }, { "epoch": 1.087039852804007, "grad_norm": 1.7301738262176514, "learning_rate": 4.321799292764515e-05, "loss": 0.7814, "step": 170150 }, { "epoch": 1.0871037399537458, "grad_norm": 0.9981438517570496, "learning_rate": 4.321302164938717e-05, "loss": 0.8506, "step": 170160 }, { "epoch": 1.0871676271034845, "grad_norm": 0.6587639451026917, "learning_rate": 4.3208050439479955e-05, "loss": 0.7711, "step": 170170 }, { "epoch": 1.0872315142532232, "grad_norm": 0.6386655569076538, "learning_rate": 4.320307929797353e-05, "loss": 0.9447, "step": 170180 }, { "epoch": 1.0872954014029619, "grad_norm": 0.953752875328064, "learning_rate": 4.319810822491798e-05, "loss": 0.5579, "step": 170190 }, { "epoch": 1.0873592885527006, "grad_norm": 1.0442869663238525, "learning_rate": 4.3193137220363366e-05, "loss": 0.767, "step": 170200 }, { "epoch": 1.0874231757024393, "grad_norm": 1.0589914321899414, "learning_rate": 4.318816628435975e-05, "loss": 0.632, "step": 170210 }, { "epoch": 1.087487062852178, "grad_norm": 0.8957975506782532, "learning_rate": 4.318319541695719e-05, "loss": 1.2458, "step": 170220 }, { "epoch": 1.0875509500019167, "grad_norm": 0.7432546615600586, "learning_rate": 4.3178224618205755e-05, "loss": 0.6911, "step": 170230 }, { "epoch": 1.0876148371516554, "grad_norm": 0.9806839227676392, "learning_rate": 4.3173253888155496e-05, "loss": 0.9478, "step": 170240 }, { "epoch": 1.0876787243013941, "grad_norm": 1.5259449481964111, "learning_rate": 4.3168283226856496e-05, "loss": 0.7437, "step": 170250 }, { "epoch": 1.0877426114511328, "grad_norm": 0.9942728281021118, "learning_rate": 4.316331263435876e-05, "loss": 0.9433, "step": 170260 }, { "epoch": 1.0878064986008713, "grad_norm": 1.1904678344726562, "learning_rate": 4.31583421107124e-05, "loss": 0.8075, "step": 170270 }, { "epoch": 1.0878703857506102, "grad_norm": 1.0735795497894287, "learning_rate": 4.315337165596745e-05, "loss": 0.9411, "step": 170280 }, { "epoch": 1.0879342729003487, "grad_norm": 1.5544825792312622, "learning_rate": 4.3148401270173963e-05, "loss": 0.8414, "step": 170290 }, { "epoch": 1.0879981600500874, "grad_norm": 1.0982941389083862, "learning_rate": 4.314343095338201e-05, "loss": 1.1805, "step": 170300 }, { "epoch": 1.0880620471998261, "grad_norm": 0.6447432041168213, "learning_rate": 4.3138460705641645e-05, "loss": 0.8576, "step": 170310 }, { "epoch": 1.0881259343495648, "grad_norm": 0.844802737236023, "learning_rate": 4.313349052700291e-05, "loss": 0.8521, "step": 170320 }, { "epoch": 1.0881898214993035, "grad_norm": 1.66594660282135, "learning_rate": 4.312852041751586e-05, "loss": 1.0375, "step": 170330 }, { "epoch": 1.0882537086490423, "grad_norm": 2.11464786529541, "learning_rate": 4.312355037723056e-05, "loss": 0.9433, "step": 170340 }, { "epoch": 1.088317595798781, "grad_norm": 0.6536024808883667, "learning_rate": 4.311858040619706e-05, "loss": 0.8515, "step": 170350 }, { "epoch": 1.0883814829485197, "grad_norm": 0.7490361332893372, "learning_rate": 4.31136105044654e-05, "loss": 0.9418, "step": 170360 }, { "epoch": 1.0884453700982584, "grad_norm": 0.8330373167991638, "learning_rate": 4.310864067208564e-05, "loss": 0.7771, "step": 170370 }, { "epoch": 1.088509257247997, "grad_norm": 0.6857531666755676, "learning_rate": 4.310367090910784e-05, "loss": 0.9539, "step": 170380 }, { "epoch": 1.0885731443977358, "grad_norm": 0.7235297560691833, "learning_rate": 4.309870121558204e-05, "loss": 0.5731, "step": 170390 }, { "epoch": 1.0886370315474745, "grad_norm": 0.9833201766014099, "learning_rate": 4.3093731591558285e-05, "loss": 0.8506, "step": 170400 }, { "epoch": 1.0887009186972132, "grad_norm": 0.8931936025619507, "learning_rate": 4.308876203708662e-05, "loss": 0.7127, "step": 170410 }, { "epoch": 1.088764805846952, "grad_norm": 0.9253937602043152, "learning_rate": 4.308379255221711e-05, "loss": 0.7261, "step": 170420 }, { "epoch": 1.0888286929966906, "grad_norm": 1.6246715784072876, "learning_rate": 4.307882313699979e-05, "loss": 0.8979, "step": 170430 }, { "epoch": 1.0888925801464293, "grad_norm": 0.6399512887001038, "learning_rate": 4.307385379148471e-05, "loss": 0.7505, "step": 170440 }, { "epoch": 1.088956467296168, "grad_norm": 1.1002649068832397, "learning_rate": 4.3068884515721905e-05, "loss": 0.7275, "step": 170450 }, { "epoch": 1.0890203544459067, "grad_norm": 0.920734167098999, "learning_rate": 4.306391530976143e-05, "loss": 1.1204, "step": 170460 }, { "epoch": 1.0890842415956454, "grad_norm": 0.6019224524497986, "learning_rate": 4.3058946173653336e-05, "loss": 0.8799, "step": 170470 }, { "epoch": 1.0891481287453841, "grad_norm": 0.9627069234848022, "learning_rate": 4.3053977107447656e-05, "loss": 0.8298, "step": 170480 }, { "epoch": 1.0892120158951228, "grad_norm": 0.9707046747207642, "learning_rate": 4.3049008111194436e-05, "loss": 0.9176, "step": 170490 }, { "epoch": 1.0892759030448615, "grad_norm": 1.699053406715393, "learning_rate": 4.3044039184943725e-05, "loss": 0.9308, "step": 170500 }, { "epoch": 1.0893397901946003, "grad_norm": 2.0104095935821533, "learning_rate": 4.303907032874555e-05, "loss": 0.8551, "step": 170510 }, { "epoch": 1.089403677344339, "grad_norm": 1.3910030126571655, "learning_rate": 4.303410154264996e-05, "loss": 0.7657, "step": 170520 }, { "epoch": 1.0894675644940777, "grad_norm": 1.23843252658844, "learning_rate": 4.3029132826706994e-05, "loss": 1.1489, "step": 170530 }, { "epoch": 1.0895314516438164, "grad_norm": 1.2719831466674805, "learning_rate": 4.30241641809667e-05, "loss": 0.8545, "step": 170540 }, { "epoch": 1.089595338793555, "grad_norm": 0.8064283728599548, "learning_rate": 4.301919560547909e-05, "loss": 1.2119, "step": 170550 }, { "epoch": 1.0896592259432938, "grad_norm": 0.8944299221038818, "learning_rate": 4.301422710029423e-05, "loss": 0.8799, "step": 170560 }, { "epoch": 1.0897231130930325, "grad_norm": 1.0481112003326416, "learning_rate": 4.300925866546215e-05, "loss": 0.8182, "step": 170570 }, { "epoch": 1.0897870002427712, "grad_norm": 0.682194709777832, "learning_rate": 4.300429030103288e-05, "loss": 0.834, "step": 170580 }, { "epoch": 1.08985088739251, "grad_norm": 0.7931283712387085, "learning_rate": 4.2999322007056456e-05, "loss": 0.9273, "step": 170590 }, { "epoch": 1.0899147745422486, "grad_norm": 0.880792498588562, "learning_rate": 4.2994353783582916e-05, "loss": 0.7912, "step": 170600 }, { "epoch": 1.0899786616919873, "grad_norm": 0.9642816185951233, "learning_rate": 4.2989385630662295e-05, "loss": 0.8244, "step": 170610 }, { "epoch": 1.090042548841726, "grad_norm": 1.038432240486145, "learning_rate": 4.298441754834462e-05, "loss": 0.7977, "step": 170620 }, { "epoch": 1.0901064359914647, "grad_norm": 0.7804505228996277, "learning_rate": 4.297944953667994e-05, "loss": 0.821, "step": 170630 }, { "epoch": 1.0901703231412034, "grad_norm": 0.8476502299308777, "learning_rate": 4.297448159571827e-05, "loss": 0.9512, "step": 170640 }, { "epoch": 1.0902342102909421, "grad_norm": 0.8416975140571594, "learning_rate": 4.296951372550965e-05, "loss": 0.9175, "step": 170650 }, { "epoch": 1.0902980974406808, "grad_norm": 1.0005344152450562, "learning_rate": 4.296454592610412e-05, "loss": 0.9392, "step": 170660 }, { "epoch": 1.0903619845904196, "grad_norm": 0.8752073049545288, "learning_rate": 4.295957819755167e-05, "loss": 0.952, "step": 170670 }, { "epoch": 1.0904258717401583, "grad_norm": 1.113079309463501, "learning_rate": 4.2954610539902376e-05, "loss": 0.8772, "step": 170680 }, { "epoch": 1.090489758889897, "grad_norm": 0.7961655259132385, "learning_rate": 4.294964295320625e-05, "loss": 0.8065, "step": 170690 }, { "epoch": 1.0905536460396357, "grad_norm": 0.9878672957420349, "learning_rate": 4.294467543751332e-05, "loss": 0.6863, "step": 170700 }, { "epoch": 1.0906175331893744, "grad_norm": 1.0683012008666992, "learning_rate": 4.2939707992873614e-05, "loss": 0.8474, "step": 170710 }, { "epoch": 1.090681420339113, "grad_norm": 1.4252039194107056, "learning_rate": 4.293474061933715e-05, "loss": 0.9683, "step": 170720 }, { "epoch": 1.0907453074888518, "grad_norm": 0.6621407866477966, "learning_rate": 4.2929773316953986e-05, "loss": 0.9312, "step": 170730 }, { "epoch": 1.0908091946385905, "grad_norm": 0.9435315132141113, "learning_rate": 4.29248060857741e-05, "loss": 0.6707, "step": 170740 }, { "epoch": 1.0908730817883292, "grad_norm": 0.7621443867683411, "learning_rate": 4.291983892584754e-05, "loss": 0.7015, "step": 170750 }, { "epoch": 1.0909369689380677, "grad_norm": 0.7548527717590332, "learning_rate": 4.2914871837224325e-05, "loss": 0.8002, "step": 170760 }, { "epoch": 1.0910008560878066, "grad_norm": 1.0962146520614624, "learning_rate": 4.2909904819954474e-05, "loss": 0.9549, "step": 170770 }, { "epoch": 1.091064743237545, "grad_norm": 1.036853313446045, "learning_rate": 4.290493787408801e-05, "loss": 0.804, "step": 170780 }, { "epoch": 1.0911286303872838, "grad_norm": 0.9214669466018677, "learning_rate": 4.289997099967497e-05, "loss": 0.9074, "step": 170790 }, { "epoch": 1.0911925175370225, "grad_norm": 1.0445566177368164, "learning_rate": 4.289500419676537e-05, "loss": 1.0604, "step": 170800 }, { "epoch": 1.0912564046867612, "grad_norm": 1.1054303646087646, "learning_rate": 4.289053413532349e-05, "loss": 0.9242, "step": 170810 }, { "epoch": 1.0913202918365, "grad_norm": 0.9858868718147278, "learning_rate": 4.288556746840821e-05, "loss": 0.9317, "step": 170820 }, { "epoch": 1.0913841789862386, "grad_norm": 1.280604362487793, "learning_rate": 4.288060087314143e-05, "loss": 0.8943, "step": 170830 }, { "epoch": 1.0914480661359773, "grad_norm": 0.986193060874939, "learning_rate": 4.287563434957315e-05, "loss": 0.9631, "step": 170840 }, { "epoch": 1.091511953285716, "grad_norm": 0.850288987159729, "learning_rate": 4.28706678977534e-05, "loss": 0.8876, "step": 170850 }, { "epoch": 1.0915758404354547, "grad_norm": 0.8429303169250488, "learning_rate": 4.2865701517732194e-05, "loss": 0.9627, "step": 170860 }, { "epoch": 1.0916397275851935, "grad_norm": 1.297688364982605, "learning_rate": 4.286073520955954e-05, "loss": 1.0169, "step": 170870 }, { "epoch": 1.0917036147349322, "grad_norm": 1.3055537939071655, "learning_rate": 4.285576897328548e-05, "loss": 0.9055, "step": 170880 }, { "epoch": 1.0917675018846709, "grad_norm": 0.5656216740608215, "learning_rate": 4.285080280895999e-05, "loss": 0.8723, "step": 170890 }, { "epoch": 1.0918313890344096, "grad_norm": 1.0163016319274902, "learning_rate": 4.284583671663309e-05, "loss": 0.7025, "step": 170900 }, { "epoch": 1.0918952761841483, "grad_norm": 1.388039469718933, "learning_rate": 4.2840870696354815e-05, "loss": 0.9179, "step": 170910 }, { "epoch": 1.091959163333887, "grad_norm": 0.740880012512207, "learning_rate": 4.283590474817516e-05, "loss": 0.8821, "step": 170920 }, { "epoch": 1.0920230504836257, "grad_norm": 1.1587016582489014, "learning_rate": 4.283093887214414e-05, "loss": 0.8497, "step": 170930 }, { "epoch": 1.0920869376333644, "grad_norm": 0.5431670546531677, "learning_rate": 4.282597306831178e-05, "loss": 0.7304, "step": 170940 }, { "epoch": 1.092150824783103, "grad_norm": 0.808057427406311, "learning_rate": 4.282100733672807e-05, "loss": 0.751, "step": 170950 }, { "epoch": 1.0922147119328418, "grad_norm": 1.3623803853988647, "learning_rate": 4.281604167744303e-05, "loss": 0.9272, "step": 170960 }, { "epoch": 1.0922785990825805, "grad_norm": 0.8062224388122559, "learning_rate": 4.2811076090506665e-05, "loss": 1.1279, "step": 170970 }, { "epoch": 1.0923424862323192, "grad_norm": 0.6659354567527771, "learning_rate": 4.280611057596898e-05, "loss": 0.8028, "step": 170980 }, { "epoch": 1.092406373382058, "grad_norm": 0.9792410731315613, "learning_rate": 4.2801145133879984e-05, "loss": 0.8559, "step": 170990 }, { "epoch": 1.0924702605317966, "grad_norm": 1.0196290016174316, "learning_rate": 4.2796179764289685e-05, "loss": 0.7903, "step": 171000 }, { "epoch": 1.0925341476815353, "grad_norm": 0.648735761642456, "learning_rate": 4.279121446724809e-05, "loss": 0.6385, "step": 171010 }, { "epoch": 1.092598034831274, "grad_norm": 0.98899906873703, "learning_rate": 4.27862492428052e-05, "loss": 0.9158, "step": 171020 }, { "epoch": 1.0926619219810128, "grad_norm": 1.0701076984405518, "learning_rate": 4.278128409101102e-05, "loss": 0.8305, "step": 171030 }, { "epoch": 1.0927258091307515, "grad_norm": 0.6789625287055969, "learning_rate": 4.277631901191556e-05, "loss": 0.794, "step": 171040 }, { "epoch": 1.0927896962804902, "grad_norm": 1.3493213653564453, "learning_rate": 4.277135400556881e-05, "loss": 0.9299, "step": 171050 }, { "epoch": 1.0928535834302289, "grad_norm": 0.4776621460914612, "learning_rate": 4.2766389072020787e-05, "loss": 0.7761, "step": 171060 }, { "epoch": 1.0929174705799676, "grad_norm": 1.0845409631729126, "learning_rate": 4.276142421132148e-05, "loss": 0.8007, "step": 171070 }, { "epoch": 1.0929813577297063, "grad_norm": 1.085318684577942, "learning_rate": 4.275645942352089e-05, "loss": 0.8909, "step": 171080 }, { "epoch": 1.093045244879445, "grad_norm": 0.7613808512687683, "learning_rate": 4.275149470866902e-05, "loss": 0.9719, "step": 171090 }, { "epoch": 1.0931091320291837, "grad_norm": 1.1465986967086792, "learning_rate": 4.274653006681587e-05, "loss": 0.9457, "step": 171100 }, { "epoch": 1.0931730191789224, "grad_norm": 1.1371395587921143, "learning_rate": 4.274156549801143e-05, "loss": 0.8983, "step": 171110 }, { "epoch": 1.093236906328661, "grad_norm": 0.8858291506767273, "learning_rate": 4.2736601002305715e-05, "loss": 0.6213, "step": 171120 }, { "epoch": 1.0933007934783998, "grad_norm": 1.0316749811172485, "learning_rate": 4.2731636579748714e-05, "loss": 0.835, "step": 171130 }, { "epoch": 1.0933646806281385, "grad_norm": 1.0872924327850342, "learning_rate": 4.2726672230390416e-05, "loss": 0.9478, "step": 171140 }, { "epoch": 1.0934285677778772, "grad_norm": 1.0157170295715332, "learning_rate": 4.2721707954280824e-05, "loss": 0.9286, "step": 171150 }, { "epoch": 1.093492454927616, "grad_norm": 1.3078545331954956, "learning_rate": 4.2716743751469926e-05, "loss": 0.8732, "step": 171160 }, { "epoch": 1.0935563420773546, "grad_norm": 0.5241159796714783, "learning_rate": 4.271177962200772e-05, "loss": 0.8715, "step": 171170 }, { "epoch": 1.0936202292270933, "grad_norm": 1.0080080032348633, "learning_rate": 4.27068155659442e-05, "loss": 0.9691, "step": 171180 }, { "epoch": 1.093684116376832, "grad_norm": 0.7973745465278625, "learning_rate": 4.2701851583329356e-05, "loss": 0.7358, "step": 171190 }, { "epoch": 1.0937480035265708, "grad_norm": 0.9834219813346863, "learning_rate": 4.269688767421318e-05, "loss": 0.8833, "step": 171200 }, { "epoch": 1.0938118906763095, "grad_norm": 1.2644120454788208, "learning_rate": 4.269192383864567e-05, "loss": 1.0025, "step": 171210 }, { "epoch": 1.0938757778260482, "grad_norm": 4.959319591522217, "learning_rate": 4.2686960076676794e-05, "loss": 0.8961, "step": 171220 }, { "epoch": 1.0939396649757869, "grad_norm": 0.6554162502288818, "learning_rate": 4.268199638835657e-05, "loss": 0.8272, "step": 171230 }, { "epoch": 1.0940035521255256, "grad_norm": 1.006784439086914, "learning_rate": 4.267703277373497e-05, "loss": 1.1133, "step": 171240 }, { "epoch": 1.094067439275264, "grad_norm": 1.1141198873519897, "learning_rate": 4.2672069232861984e-05, "loss": 1.0126, "step": 171250 }, { "epoch": 1.094131326425003, "grad_norm": 1.0400590896606445, "learning_rate": 4.2667105765787604e-05, "loss": 0.772, "step": 171260 }, { "epoch": 1.0941952135747415, "grad_norm": 1.063521146774292, "learning_rate": 4.266214237256181e-05, "loss": 0.9628, "step": 171270 }, { "epoch": 1.0942591007244802, "grad_norm": 0.8146291971206665, "learning_rate": 4.265717905323459e-05, "loss": 0.687, "step": 171280 }, { "epoch": 1.0943229878742189, "grad_norm": 0.9968299269676208, "learning_rate": 4.2652215807855924e-05, "loss": 0.846, "step": 171290 }, { "epoch": 1.0943868750239576, "grad_norm": 1.3008157014846802, "learning_rate": 4.264725263647581e-05, "loss": 0.7657, "step": 171300 }, { "epoch": 1.0944507621736963, "grad_norm": 1.059985876083374, "learning_rate": 4.264228953914421e-05, "loss": 1.1265, "step": 171310 }, { "epoch": 1.094514649323435, "grad_norm": 2.1042439937591553, "learning_rate": 4.263732651591113e-05, "loss": 0.7885, "step": 171320 }, { "epoch": 1.0945785364731737, "grad_norm": 1.5627658367156982, "learning_rate": 4.263236356682654e-05, "loss": 0.9555, "step": 171330 }, { "epoch": 1.0946424236229124, "grad_norm": 1.1831436157226562, "learning_rate": 4.262740069194042e-05, "loss": 0.7085, "step": 171340 }, { "epoch": 1.0947063107726511, "grad_norm": 0.998292863368988, "learning_rate": 4.262243789130276e-05, "loss": 0.8089, "step": 171350 }, { "epoch": 1.0947701979223898, "grad_norm": 0.8351050615310669, "learning_rate": 4.261747516496353e-05, "loss": 0.7589, "step": 171360 }, { "epoch": 1.0948340850721285, "grad_norm": 0.9817091226577759, "learning_rate": 4.261251251297273e-05, "loss": 1.1004, "step": 171370 }, { "epoch": 1.0948979722218672, "grad_norm": 0.8968245387077332, "learning_rate": 4.260754993538031e-05, "loss": 0.7585, "step": 171380 }, { "epoch": 1.094961859371606, "grad_norm": 0.994646430015564, "learning_rate": 4.260258743223625e-05, "loss": 1.0548, "step": 171390 }, { "epoch": 1.0950257465213447, "grad_norm": 0.6734488010406494, "learning_rate": 4.2597625003590534e-05, "loss": 1.1092, "step": 171400 }, { "epoch": 1.0950896336710834, "grad_norm": 1.889375925064087, "learning_rate": 4.259266264949314e-05, "loss": 0.7207, "step": 171410 }, { "epoch": 1.095153520820822, "grad_norm": 1.042815923690796, "learning_rate": 4.258770036999404e-05, "loss": 0.7736, "step": 171420 }, { "epoch": 1.0952174079705608, "grad_norm": 0.9770397543907166, "learning_rate": 4.25827381651432e-05, "loss": 0.841, "step": 171430 }, { "epoch": 1.0952812951202995, "grad_norm": 0.7401404976844788, "learning_rate": 4.2577776034990604e-05, "loss": 0.642, "step": 171440 }, { "epoch": 1.0953451822700382, "grad_norm": 0.7844344973564148, "learning_rate": 4.257281397958624e-05, "loss": 0.8013, "step": 171450 }, { "epoch": 1.095409069419777, "grad_norm": 0.5132720470428467, "learning_rate": 4.2567851998980055e-05, "loss": 0.9082, "step": 171460 }, { "epoch": 1.0954729565695156, "grad_norm": 0.7843263745307922, "learning_rate": 4.256289009322204e-05, "loss": 0.5077, "step": 171470 }, { "epoch": 1.0955368437192543, "grad_norm": 1.0947779417037964, "learning_rate": 4.2557928262362145e-05, "loss": 0.9435, "step": 171480 }, { "epoch": 1.095600730868993, "grad_norm": 0.9761757850646973, "learning_rate": 4.2552966506450355e-05, "loss": 0.6885, "step": 171490 }, { "epoch": 1.0956646180187317, "grad_norm": 1.1741174459457397, "learning_rate": 4.254800482553664e-05, "loss": 0.7751, "step": 171500 }, { "epoch": 1.0957285051684704, "grad_norm": 0.848971962928772, "learning_rate": 4.2543043219670954e-05, "loss": 0.8029, "step": 171510 }, { "epoch": 1.0957923923182091, "grad_norm": 1.3921595811843872, "learning_rate": 4.253808168890327e-05, "loss": 0.9036, "step": 171520 }, { "epoch": 1.0958562794679478, "grad_norm": 1.0362133979797363, "learning_rate": 4.2533120233283576e-05, "loss": 1.1471, "step": 171530 }, { "epoch": 1.0959201666176865, "grad_norm": 0.9512225985527039, "learning_rate": 4.252815885286181e-05, "loss": 0.7721, "step": 171540 }, { "epoch": 1.0959840537674252, "grad_norm": 1.5971604585647583, "learning_rate": 4.252319754768795e-05, "loss": 0.7549, "step": 171550 }, { "epoch": 1.096047940917164, "grad_norm": 1.265650749206543, "learning_rate": 4.251823631781195e-05, "loss": 0.8241, "step": 171560 }, { "epoch": 1.0961118280669027, "grad_norm": 1.4117658138275146, "learning_rate": 4.25132751632838e-05, "loss": 1.1055, "step": 171570 }, { "epoch": 1.0961757152166414, "grad_norm": 0.8779417276382446, "learning_rate": 4.2508314084153434e-05, "loss": 1.0223, "step": 171580 }, { "epoch": 1.09623960236638, "grad_norm": 1.1261694431304932, "learning_rate": 4.250335308047083e-05, "loss": 1.1164, "step": 171590 }, { "epoch": 1.0963034895161188, "grad_norm": 0.9011442065238953, "learning_rate": 4.249839215228595e-05, "loss": 0.7676, "step": 171600 }, { "epoch": 1.0963673766658575, "grad_norm": 0.8143396377563477, "learning_rate": 4.249343129964875e-05, "loss": 0.9507, "step": 171610 }, { "epoch": 1.0964312638155962, "grad_norm": 1.3709062337875366, "learning_rate": 4.248847052260919e-05, "loss": 0.9756, "step": 171620 }, { "epoch": 1.096495150965335, "grad_norm": 1.1121571063995361, "learning_rate": 4.248350982121722e-05, "loss": 0.942, "step": 171630 }, { "epoch": 1.0965590381150736, "grad_norm": 0.9342554807662964, "learning_rate": 4.247854919552281e-05, "loss": 0.8853, "step": 171640 }, { "epoch": 1.0966229252648123, "grad_norm": 0.7645041942596436, "learning_rate": 4.2473588645575925e-05, "loss": 0.9555, "step": 171650 }, { "epoch": 1.096686812414551, "grad_norm": 0.7210363149642944, "learning_rate": 4.246862817142651e-05, "loss": 0.9715, "step": 171660 }, { "epoch": 1.0967506995642897, "grad_norm": 0.8885617256164551, "learning_rate": 4.2463667773124526e-05, "loss": 0.8339, "step": 171670 }, { "epoch": 1.0968145867140284, "grad_norm": 1.1784389019012451, "learning_rate": 4.245870745071993e-05, "loss": 1.0082, "step": 171680 }, { "epoch": 1.0968784738637671, "grad_norm": 0.9615663290023804, "learning_rate": 4.245374720426267e-05, "loss": 1.1509, "step": 171690 }, { "epoch": 1.0969423610135058, "grad_norm": 1.2281625270843506, "learning_rate": 4.244878703380271e-05, "loss": 0.9602, "step": 171700 }, { "epoch": 1.0970062481632445, "grad_norm": 0.9366394281387329, "learning_rate": 4.244382693939e-05, "loss": 0.9398, "step": 171710 }, { "epoch": 1.097070135312983, "grad_norm": 0.8655837774276733, "learning_rate": 4.243886692107448e-05, "loss": 0.7933, "step": 171720 }, { "epoch": 1.097134022462722, "grad_norm": 0.7708938717842102, "learning_rate": 4.2433906978906114e-05, "loss": 0.9384, "step": 171730 }, { "epoch": 1.0971979096124604, "grad_norm": 1.2257397174835205, "learning_rate": 4.2428947112934854e-05, "loss": 1.0811, "step": 171740 }, { "epoch": 1.0972617967621991, "grad_norm": 1.1159895658493042, "learning_rate": 4.2423987323210646e-05, "loss": 0.8552, "step": 171750 }, { "epoch": 1.0973256839119379, "grad_norm": 0.725980818271637, "learning_rate": 4.241902760978343e-05, "loss": 0.9862, "step": 171760 }, { "epoch": 1.0973895710616766, "grad_norm": 1.0240581035614014, "learning_rate": 4.241406797270318e-05, "loss": 0.7955, "step": 171770 }, { "epoch": 1.0974534582114153, "grad_norm": 0.8556148409843445, "learning_rate": 4.240910841201982e-05, "loss": 1.0128, "step": 171780 }, { "epoch": 1.097517345361154, "grad_norm": 0.7460873126983643, "learning_rate": 4.24041489277833e-05, "loss": 0.8961, "step": 171790 }, { "epoch": 1.0975812325108927, "grad_norm": 0.9523458480834961, "learning_rate": 4.239918952004358e-05, "loss": 0.9272, "step": 171800 }, { "epoch": 1.0976451196606314, "grad_norm": 0.7124657034873962, "learning_rate": 4.23942301888506e-05, "loss": 0.7402, "step": 171810 }, { "epoch": 1.09770900681037, "grad_norm": 0.6065206527709961, "learning_rate": 4.23892709342543e-05, "loss": 0.8897, "step": 171820 }, { "epoch": 1.0977728939601088, "grad_norm": 1.2093256711959839, "learning_rate": 4.2384311756304616e-05, "loss": 0.6684, "step": 171830 }, { "epoch": 1.0978367811098475, "grad_norm": 0.9949159026145935, "learning_rate": 4.23793526550515e-05, "loss": 0.8771, "step": 171840 }, { "epoch": 1.0979006682595862, "grad_norm": 0.8249183893203735, "learning_rate": 4.2374393630544925e-05, "loss": 1.0064, "step": 171850 }, { "epoch": 1.097964555409325, "grad_norm": 1.299425482749939, "learning_rate": 4.236943468283478e-05, "loss": 0.9749, "step": 171860 }, { "epoch": 1.0980284425590636, "grad_norm": 0.5668662190437317, "learning_rate": 4.236447581197103e-05, "loss": 0.7784, "step": 171870 }, { "epoch": 1.0980923297088023, "grad_norm": 0.7093889117240906, "learning_rate": 4.235951701800362e-05, "loss": 1.1923, "step": 171880 }, { "epoch": 1.098156216858541, "grad_norm": 0.6421130299568176, "learning_rate": 4.235455830098248e-05, "loss": 0.5921, "step": 171890 }, { "epoch": 1.0982201040082797, "grad_norm": 1.3373634815216064, "learning_rate": 4.2349599660957545e-05, "loss": 1.1682, "step": 171900 }, { "epoch": 1.0982839911580184, "grad_norm": 0.7919142246246338, "learning_rate": 4.234464109797877e-05, "loss": 0.7321, "step": 171910 }, { "epoch": 1.0983478783077572, "grad_norm": 1.1525673866271973, "learning_rate": 4.2339682612096075e-05, "loss": 0.9742, "step": 171920 }, { "epoch": 1.0984117654574959, "grad_norm": 0.77737957239151, "learning_rate": 4.23347242033594e-05, "loss": 0.8141, "step": 171930 }, { "epoch": 1.0984756526072346, "grad_norm": 2.1227810382843018, "learning_rate": 4.23297658718187e-05, "loss": 0.8464, "step": 171940 }, { "epoch": 1.0985395397569733, "grad_norm": 0.8301504850387573, "learning_rate": 4.2324807617523865e-05, "loss": 0.7757, "step": 171950 }, { "epoch": 1.098603426906712, "grad_norm": 0.7667549252510071, "learning_rate": 4.2319849440524877e-05, "loss": 0.8712, "step": 171960 }, { "epoch": 1.0986673140564507, "grad_norm": 1.3217402696609497, "learning_rate": 4.231489134087165e-05, "loss": 0.9119, "step": 171970 }, { "epoch": 1.0987312012061894, "grad_norm": 0.9311858415603638, "learning_rate": 4.2309933318614116e-05, "loss": 0.7721, "step": 171980 }, { "epoch": 1.098795088355928, "grad_norm": 0.8711034059524536, "learning_rate": 4.23049753738022e-05, "loss": 0.7246, "step": 171990 }, { "epoch": 1.0988589755056668, "grad_norm": 1.1722370386123657, "learning_rate": 4.230001750648584e-05, "loss": 1.0147, "step": 172000 }, { "epoch": 1.0989228626554055, "grad_norm": 0.976512610912323, "learning_rate": 4.2295059716714965e-05, "loss": 0.9354, "step": 172010 }, { "epoch": 1.0989867498051442, "grad_norm": 1.198662281036377, "learning_rate": 4.229010200453951e-05, "loss": 0.8933, "step": 172020 }, { "epoch": 1.099050636954883, "grad_norm": 1.7006891965866089, "learning_rate": 4.22851443700094e-05, "loss": 1.0453, "step": 172030 }, { "epoch": 1.0991145241046216, "grad_norm": 0.9124136567115784, "learning_rate": 4.228018681317456e-05, "loss": 0.6503, "step": 172040 }, { "epoch": 1.0991784112543603, "grad_norm": 0.9206166863441467, "learning_rate": 4.227522933408491e-05, "loss": 0.8653, "step": 172050 }, { "epoch": 1.099242298404099, "grad_norm": 0.5899572968482971, "learning_rate": 4.2270271932790386e-05, "loss": 0.794, "step": 172060 }, { "epoch": 1.0993061855538377, "grad_norm": 1.1387134790420532, "learning_rate": 4.2265314609340915e-05, "loss": 0.7775, "step": 172070 }, { "epoch": 1.0993700727035765, "grad_norm": 1.3362581729888916, "learning_rate": 4.226035736378641e-05, "loss": 0.7042, "step": 172080 }, { "epoch": 1.0994339598533152, "grad_norm": 0.7027170658111572, "learning_rate": 4.225540019617681e-05, "loss": 0.7286, "step": 172090 }, { "epoch": 1.0994978470030539, "grad_norm": 0.8913945555686951, "learning_rate": 4.225044310656202e-05, "loss": 1.0044, "step": 172100 }, { "epoch": 1.0995617341527926, "grad_norm": 1.0845966339111328, "learning_rate": 4.224548609499198e-05, "loss": 0.8733, "step": 172110 }, { "epoch": 1.0996256213025313, "grad_norm": 0.90619957447052, "learning_rate": 4.22405291615166e-05, "loss": 0.7557, "step": 172120 }, { "epoch": 1.09968950845227, "grad_norm": 1.1762818098068237, "learning_rate": 4.2235572306185805e-05, "loss": 1.0823, "step": 172130 }, { "epoch": 1.0997533956020087, "grad_norm": 0.9015527367591858, "learning_rate": 4.223061552904952e-05, "loss": 0.8001, "step": 172140 }, { "epoch": 1.0998172827517474, "grad_norm": 1.2849770784378052, "learning_rate": 4.222565883015765e-05, "loss": 0.9836, "step": 172150 }, { "epoch": 1.099881169901486, "grad_norm": 0.5528566837310791, "learning_rate": 4.222070220956012e-05, "loss": 0.7608, "step": 172160 }, { "epoch": 1.0999450570512248, "grad_norm": 1.3537547588348389, "learning_rate": 4.2215745667306846e-05, "loss": 0.956, "step": 172170 }, { "epoch": 1.1000089442009635, "grad_norm": 0.8691318035125732, "learning_rate": 4.2210789203447755e-05, "loss": 0.8627, "step": 172180 }, { "epoch": 1.1000728313507022, "grad_norm": 1.3839830160140991, "learning_rate": 4.220583281803275e-05, "loss": 0.8302, "step": 172190 }, { "epoch": 1.100136718500441, "grad_norm": 0.724374532699585, "learning_rate": 4.220087651111176e-05, "loss": 0.6618, "step": 172200 }, { "epoch": 1.1002006056501794, "grad_norm": 0.858903706073761, "learning_rate": 4.2195920282734694e-05, "loss": 0.7735, "step": 172210 }, { "epoch": 1.1002644927999183, "grad_norm": 1.1366229057312012, "learning_rate": 4.219096413295145e-05, "loss": 0.9598, "step": 172220 }, { "epoch": 1.1003283799496568, "grad_norm": 1.0029979944229126, "learning_rate": 4.218600806181196e-05, "loss": 0.8373, "step": 172230 }, { "epoch": 1.1003922670993955, "grad_norm": 0.6481642127037048, "learning_rate": 4.218105206936613e-05, "loss": 1.1082, "step": 172240 }, { "epoch": 1.1004561542491342, "grad_norm": 0.9797278046607971, "learning_rate": 4.2176096155663866e-05, "loss": 0.6281, "step": 172250 }, { "epoch": 1.100520041398873, "grad_norm": 0.7582735419273376, "learning_rate": 4.217114032075508e-05, "loss": 0.9182, "step": 172260 }, { "epoch": 1.1005839285486116, "grad_norm": 0.9007226824760437, "learning_rate": 4.216618456468969e-05, "loss": 0.7436, "step": 172270 }, { "epoch": 1.1006478156983504, "grad_norm": 1.785007357597351, "learning_rate": 4.2161228887517594e-05, "loss": 0.9624, "step": 172280 }, { "epoch": 1.100711702848089, "grad_norm": 0.7608382105827332, "learning_rate": 4.215627328928871e-05, "loss": 0.7186, "step": 172290 }, { "epoch": 1.1007755899978278, "grad_norm": 1.9284123182296753, "learning_rate": 4.215131777005294e-05, "loss": 0.8328, "step": 172300 }, { "epoch": 1.1008394771475665, "grad_norm": 0.9450188279151917, "learning_rate": 4.2146362329860186e-05, "loss": 0.9594, "step": 172310 }, { "epoch": 1.1009033642973052, "grad_norm": 0.9953072667121887, "learning_rate": 4.2141406968760356e-05, "loss": 0.9788, "step": 172320 }, { "epoch": 1.1009672514470439, "grad_norm": 1.1051782369613647, "learning_rate": 4.2136451686803355e-05, "loss": 0.8349, "step": 172330 }, { "epoch": 1.1010311385967826, "grad_norm": 0.7446302771568298, "learning_rate": 4.213149648403911e-05, "loss": 1.0095, "step": 172340 }, { "epoch": 1.1010950257465213, "grad_norm": 0.9063861966133118, "learning_rate": 4.212654136051748e-05, "loss": 1.2343, "step": 172350 }, { "epoch": 1.10115891289626, "grad_norm": 1.1501379013061523, "learning_rate": 4.21215863162884e-05, "loss": 0.8306, "step": 172360 }, { "epoch": 1.1012228000459987, "grad_norm": 1.0388190746307373, "learning_rate": 4.2116631351401756e-05, "loss": 0.9503, "step": 172370 }, { "epoch": 1.1012866871957374, "grad_norm": 2.0269041061401367, "learning_rate": 4.211167646590746e-05, "loss": 0.8782, "step": 172380 }, { "epoch": 1.1013505743454761, "grad_norm": 0.9322276711463928, "learning_rate": 4.2106721659855395e-05, "loss": 0.9964, "step": 172390 }, { "epoch": 1.1014144614952148, "grad_norm": 0.815617024898529, "learning_rate": 4.210176693329548e-05, "loss": 1.1831, "step": 172400 }, { "epoch": 1.1014783486449535, "grad_norm": 0.8040037155151367, "learning_rate": 4.20968122862776e-05, "loss": 0.829, "step": 172410 }, { "epoch": 1.1015422357946922, "grad_norm": 0.7182419896125793, "learning_rate": 4.209185771885166e-05, "loss": 0.6466, "step": 172420 }, { "epoch": 1.101606122944431, "grad_norm": 0.6640510559082031, "learning_rate": 4.208690323106755e-05, "loss": 1.0263, "step": 172430 }, { "epoch": 1.1016700100941696, "grad_norm": 1.316969394683838, "learning_rate": 4.2081948822975184e-05, "loss": 0.8435, "step": 172440 }, { "epoch": 1.1017338972439084, "grad_norm": 1.1966426372528076, "learning_rate": 4.2076994494624436e-05, "loss": 0.8266, "step": 172450 }, { "epoch": 1.101797784393647, "grad_norm": 0.8291551470756531, "learning_rate": 4.20720402460652e-05, "loss": 0.947, "step": 172460 }, { "epoch": 1.1018616715433858, "grad_norm": 0.6528846025466919, "learning_rate": 4.206708607734739e-05, "loss": 1.0032, "step": 172470 }, { "epoch": 1.1019255586931245, "grad_norm": 0.9250202178955078, "learning_rate": 4.2062131988520866e-05, "loss": 0.9916, "step": 172480 }, { "epoch": 1.1019894458428632, "grad_norm": 0.8550183773040771, "learning_rate": 4.2057177979635554e-05, "loss": 0.6554, "step": 172490 }, { "epoch": 1.1020533329926019, "grad_norm": 1.0084480047225952, "learning_rate": 4.205222405074133e-05, "loss": 1.3241, "step": 172500 }, { "epoch": 1.1021172201423406, "grad_norm": 0.6729216575622559, "learning_rate": 4.204727020188809e-05, "loss": 0.6531, "step": 172510 }, { "epoch": 1.1021811072920793, "grad_norm": 1.1924982070922852, "learning_rate": 4.204231643312571e-05, "loss": 0.8125, "step": 172520 }, { "epoch": 1.102244994441818, "grad_norm": 1.1781163215637207, "learning_rate": 4.2037362744504096e-05, "loss": 0.8448, "step": 172530 }, { "epoch": 1.1023088815915567, "grad_norm": 1.3087345361709595, "learning_rate": 4.2032409136073125e-05, "loss": 0.8734, "step": 172540 }, { "epoch": 1.1023727687412954, "grad_norm": 0.7616584300994873, "learning_rate": 4.202745560788269e-05, "loss": 1.1044, "step": 172550 }, { "epoch": 1.1024366558910341, "grad_norm": 1.276070237159729, "learning_rate": 4.202250215998267e-05, "loss": 0.9824, "step": 172560 }, { "epoch": 1.1025005430407728, "grad_norm": 0.9662360548973083, "learning_rate": 4.201754879242296e-05, "loss": 0.849, "step": 172570 }, { "epoch": 1.1025644301905115, "grad_norm": 1.022019863128662, "learning_rate": 4.201259550525343e-05, "loss": 0.8815, "step": 172580 }, { "epoch": 1.1026283173402502, "grad_norm": 0.5287925601005554, "learning_rate": 4.200764229852398e-05, "loss": 0.8442, "step": 172590 }, { "epoch": 1.102692204489989, "grad_norm": 1.201217532157898, "learning_rate": 4.200268917228449e-05, "loss": 0.8891, "step": 172600 }, { "epoch": 1.1027560916397277, "grad_norm": 0.9720419645309448, "learning_rate": 4.199773612658483e-05, "loss": 0.7276, "step": 172610 }, { "epoch": 1.1028199787894664, "grad_norm": 0.7095947861671448, "learning_rate": 4.1992783161474894e-05, "loss": 0.838, "step": 172620 }, { "epoch": 1.102883865939205, "grad_norm": 0.8162873983383179, "learning_rate": 4.198783027700456e-05, "loss": 0.8075, "step": 172630 }, { "epoch": 1.1029477530889438, "grad_norm": 0.8493092656135559, "learning_rate": 4.1982877473223706e-05, "loss": 1.0478, "step": 172640 }, { "epoch": 1.1030116402386825, "grad_norm": 1.2920939922332764, "learning_rate": 4.197792475018221e-05, "loss": 0.9082, "step": 172650 }, { "epoch": 1.1030755273884212, "grad_norm": 1.1556893587112427, "learning_rate": 4.197297210792996e-05, "loss": 0.9246, "step": 172660 }, { "epoch": 1.10313941453816, "grad_norm": 0.756718635559082, "learning_rate": 4.196801954651682e-05, "loss": 1.0195, "step": 172670 }, { "epoch": 1.1032033016878986, "grad_norm": 1.058808445930481, "learning_rate": 4.196306706599267e-05, "loss": 0.8777, "step": 172680 }, { "epoch": 1.1032671888376373, "grad_norm": 0.9461612105369568, "learning_rate": 4.195811466640738e-05, "loss": 0.7936, "step": 172690 }, { "epoch": 1.1033310759873758, "grad_norm": 0.7724422812461853, "learning_rate": 4.195316234781084e-05, "loss": 0.7225, "step": 172700 }, { "epoch": 1.1033949631371147, "grad_norm": 0.7925411462783813, "learning_rate": 4.194821011025291e-05, "loss": 1.0131, "step": 172710 }, { "epoch": 1.1034588502868532, "grad_norm": 1.2644563913345337, "learning_rate": 4.194325795378348e-05, "loss": 0.7148, "step": 172720 }, { "epoch": 1.103522737436592, "grad_norm": 0.794563889503479, "learning_rate": 4.193830587845241e-05, "loss": 0.8343, "step": 172730 }, { "epoch": 1.1035866245863306, "grad_norm": 0.9380400776863098, "learning_rate": 4.193335388430957e-05, "loss": 0.81, "step": 172740 }, { "epoch": 1.1036505117360693, "grad_norm": 0.8075729608535767, "learning_rate": 4.192840197140484e-05, "loss": 0.9544, "step": 172750 }, { "epoch": 1.103714398885808, "grad_norm": 0.8002738356590271, "learning_rate": 4.192345013978809e-05, "loss": 0.9816, "step": 172760 }, { "epoch": 1.1037782860355467, "grad_norm": 1.1612515449523926, "learning_rate": 4.1918498389509175e-05, "loss": 0.9024, "step": 172770 }, { "epoch": 1.1038421731852854, "grad_norm": 0.9753612875938416, "learning_rate": 4.191354672061798e-05, "loss": 0.8175, "step": 172780 }, { "epoch": 1.1039060603350241, "grad_norm": 1.0018128156661987, "learning_rate": 4.190859513316436e-05, "loss": 0.944, "step": 172790 }, { "epoch": 1.1039699474847628, "grad_norm": 0.9988775849342346, "learning_rate": 4.1903643627198184e-05, "loss": 0.855, "step": 172800 }, { "epoch": 1.1040338346345016, "grad_norm": 0.9023102521896362, "learning_rate": 4.189869220276933e-05, "loss": 0.8005, "step": 172810 }, { "epoch": 1.1040977217842403, "grad_norm": 0.9544625878334045, "learning_rate": 4.189374085992766e-05, "loss": 0.6235, "step": 172820 }, { "epoch": 1.104161608933979, "grad_norm": 0.8562326431274414, "learning_rate": 4.1888789598723024e-05, "loss": 0.7493, "step": 172830 }, { "epoch": 1.1042254960837177, "grad_norm": 0.857276976108551, "learning_rate": 4.188383841920529e-05, "loss": 0.8467, "step": 172840 }, { "epoch": 1.1042893832334564, "grad_norm": 0.8983993530273438, "learning_rate": 4.1878887321424325e-05, "loss": 0.8552, "step": 172850 }, { "epoch": 1.104353270383195, "grad_norm": 2.2336559295654297, "learning_rate": 4.1873936305429995e-05, "loss": 1.1215, "step": 172860 }, { "epoch": 1.1044171575329338, "grad_norm": 0.9345127940177917, "learning_rate": 4.186898537127215e-05, "loss": 0.8787, "step": 172870 }, { "epoch": 1.1044810446826725, "grad_norm": 1.2393178939819336, "learning_rate": 4.186403451900066e-05, "loss": 1.0144, "step": 172880 }, { "epoch": 1.1045449318324112, "grad_norm": 1.1470755338668823, "learning_rate": 4.1859083748665385e-05, "loss": 0.7146, "step": 172890 }, { "epoch": 1.10460881898215, "grad_norm": 0.8653610944747925, "learning_rate": 4.185413306031617e-05, "loss": 1.0216, "step": 172900 }, { "epoch": 1.1046727061318886, "grad_norm": 0.784228503704071, "learning_rate": 4.184918245400289e-05, "loss": 0.8824, "step": 172910 }, { "epoch": 1.1047365932816273, "grad_norm": 0.7353566884994507, "learning_rate": 4.1844231929775394e-05, "loss": 0.7958, "step": 172920 }, { "epoch": 1.104800480431366, "grad_norm": 0.8124344944953918, "learning_rate": 4.1839281487683535e-05, "loss": 0.815, "step": 172930 }, { "epoch": 1.1048643675811047, "grad_norm": 1.401458501815796, "learning_rate": 4.183433112777717e-05, "loss": 0.7634, "step": 172940 }, { "epoch": 1.1049282547308434, "grad_norm": 1.014285922050476, "learning_rate": 4.182938085010616e-05, "loss": 0.9408, "step": 172950 }, { "epoch": 1.1049921418805821, "grad_norm": 1.2659311294555664, "learning_rate": 4.182443065472035e-05, "loss": 0.897, "step": 172960 }, { "epoch": 1.1050560290303209, "grad_norm": 0.4442936182022095, "learning_rate": 4.18194805416696e-05, "loss": 0.7383, "step": 172970 }, { "epoch": 1.1051199161800596, "grad_norm": 1.057563304901123, "learning_rate": 4.1814530511003755e-05, "loss": 0.8815, "step": 172980 }, { "epoch": 1.1051838033297983, "grad_norm": 0.4912680387496948, "learning_rate": 4.1809580562772674e-05, "loss": 0.8234, "step": 172990 }, { "epoch": 1.105247690479537, "grad_norm": 3.1906774044036865, "learning_rate": 4.1804630697026196e-05, "loss": 0.9966, "step": 173000 }, { "epoch": 1.1053115776292757, "grad_norm": 1.059396743774414, "learning_rate": 4.179968091381417e-05, "loss": 0.8505, "step": 173010 }, { "epoch": 1.1053754647790144, "grad_norm": 0.7790278792381287, "learning_rate": 4.1794731213186456e-05, "loss": 0.8695, "step": 173020 }, { "epoch": 1.105439351928753, "grad_norm": 0.581296980381012, "learning_rate": 4.17897815951929e-05, "loss": 0.7661, "step": 173030 }, { "epoch": 1.1055032390784918, "grad_norm": 1.0370402336120605, "learning_rate": 4.1784832059883347e-05, "loss": 0.689, "step": 173040 }, { "epoch": 1.1055671262282305, "grad_norm": 1.0399209260940552, "learning_rate": 4.177988260730765e-05, "loss": 1.1618, "step": 173050 }, { "epoch": 1.1056310133779692, "grad_norm": 2.4747142791748047, "learning_rate": 4.177493323751564e-05, "loss": 0.9111, "step": 173060 }, { "epoch": 1.105694900527708, "grad_norm": 0.7076548337936401, "learning_rate": 4.176998395055716e-05, "loss": 0.8245, "step": 173070 }, { "epoch": 1.1057587876774466, "grad_norm": 0.9863085746765137, "learning_rate": 4.1765034746482076e-05, "loss": 0.9501, "step": 173080 }, { "epoch": 1.1058226748271853, "grad_norm": 0.8387857675552368, "learning_rate": 4.1760085625340206e-05, "loss": 0.7514, "step": 173090 }, { "epoch": 1.105886561976924, "grad_norm": 0.7635478973388672, "learning_rate": 4.175513658718141e-05, "loss": 0.9059, "step": 173100 }, { "epoch": 1.1059504491266627, "grad_norm": 0.7215414643287659, "learning_rate": 4.1750187632055514e-05, "loss": 0.9656, "step": 173110 }, { "epoch": 1.1060143362764014, "grad_norm": 1.6596556901931763, "learning_rate": 4.1745238760012366e-05, "loss": 0.8594, "step": 173120 }, { "epoch": 1.1060782234261402, "grad_norm": 1.1065661907196045, "learning_rate": 4.174028997110181e-05, "loss": 0.9072, "step": 173130 }, { "epoch": 1.1061421105758789, "grad_norm": 0.6142051219940186, "learning_rate": 4.173534126537368e-05, "loss": 0.6434, "step": 173140 }, { "epoch": 1.1062059977256176, "grad_norm": 6.143871307373047, "learning_rate": 4.173039264287781e-05, "loss": 1.0416, "step": 173150 }, { "epoch": 1.1062698848753563, "grad_norm": 1.4489151239395142, "learning_rate": 4.172544410366404e-05, "loss": 0.8264, "step": 173160 }, { "epoch": 1.106333772025095, "grad_norm": 1.5547164678573608, "learning_rate": 4.172049564778221e-05, "loss": 1.1625, "step": 173170 }, { "epoch": 1.1063976591748337, "grad_norm": 1.0034717321395874, "learning_rate": 4.171554727528215e-05, "loss": 1.0096, "step": 173180 }, { "epoch": 1.1064615463245722, "grad_norm": 0.7144385576248169, "learning_rate": 4.1710598986213696e-05, "loss": 0.8016, "step": 173190 }, { "epoch": 1.106525433474311, "grad_norm": 1.2244892120361328, "learning_rate": 4.170565078062668e-05, "loss": 0.8115, "step": 173200 }, { "epoch": 1.1065893206240496, "grad_norm": 0.885942280292511, "learning_rate": 4.170070265857092e-05, "loss": 0.9264, "step": 173210 }, { "epoch": 1.1066532077737883, "grad_norm": 0.5482710003852844, "learning_rate": 4.169575462009628e-05, "loss": 1.0419, "step": 173220 }, { "epoch": 1.106717094923527, "grad_norm": 0.9318670630455017, "learning_rate": 4.169080666525258e-05, "loss": 0.6748, "step": 173230 }, { "epoch": 1.1067809820732657, "grad_norm": 0.8965703845024109, "learning_rate": 4.1685858794089646e-05, "loss": 0.8237, "step": 173240 }, { "epoch": 1.1068448692230044, "grad_norm": 2.319718599319458, "learning_rate": 4.1680911006657306e-05, "loss": 1.3752, "step": 173250 }, { "epoch": 1.106908756372743, "grad_norm": 1.0713149309158325, "learning_rate": 4.167596330300538e-05, "loss": 0.8723, "step": 173260 }, { "epoch": 1.1069726435224818, "grad_norm": 0.9905885457992554, "learning_rate": 4.167101568318371e-05, "loss": 0.9566, "step": 173270 }, { "epoch": 1.1070365306722205, "grad_norm": 0.9312426447868347, "learning_rate": 4.166606814724212e-05, "loss": 0.7414, "step": 173280 }, { "epoch": 1.1071004178219592, "grad_norm": 0.8389294147491455, "learning_rate": 4.1661120695230435e-05, "loss": 1.1426, "step": 173290 }, { "epoch": 1.107164304971698, "grad_norm": 0.496663898229599, "learning_rate": 4.165617332719847e-05, "loss": 0.7483, "step": 173300 }, { "epoch": 1.1072281921214366, "grad_norm": 1.2054636478424072, "learning_rate": 4.165122604319609e-05, "loss": 0.701, "step": 173310 }, { "epoch": 1.1072920792711753, "grad_norm": 1.9126182794570923, "learning_rate": 4.164627884327306e-05, "loss": 0.8618, "step": 173320 }, { "epoch": 1.107355966420914, "grad_norm": 1.3022087812423706, "learning_rate": 4.1641331727479216e-05, "loss": 0.9053, "step": 173330 }, { "epoch": 1.1074198535706528, "grad_norm": 1.2964845895767212, "learning_rate": 4.16363846958644e-05, "loss": 0.8173, "step": 173340 }, { "epoch": 1.1074837407203915, "grad_norm": 0.7168669104576111, "learning_rate": 4.163143774847844e-05, "loss": 0.9235, "step": 173350 }, { "epoch": 1.1075476278701302, "grad_norm": 0.8237465023994446, "learning_rate": 4.1626490885371134e-05, "loss": 0.8374, "step": 173360 }, { "epoch": 1.1076115150198689, "grad_norm": 0.9421668648719788, "learning_rate": 4.162154410659231e-05, "loss": 0.804, "step": 173370 }, { "epoch": 1.1076754021696076, "grad_norm": 0.8502464890480042, "learning_rate": 4.161659741219178e-05, "loss": 0.7407, "step": 173380 }, { "epoch": 1.1077392893193463, "grad_norm": 0.7700791954994202, "learning_rate": 4.161165080221937e-05, "loss": 1.105, "step": 173390 }, { "epoch": 1.107803176469085, "grad_norm": 0.670958936214447, "learning_rate": 4.160670427672489e-05, "loss": 1.1197, "step": 173400 }, { "epoch": 1.1078670636188237, "grad_norm": 1.0116523504257202, "learning_rate": 4.160175783575817e-05, "loss": 0.779, "step": 173410 }, { "epoch": 1.1079309507685624, "grad_norm": 1.674210548400879, "learning_rate": 4.1596811479369004e-05, "loss": 0.9408, "step": 173420 }, { "epoch": 1.1079948379183011, "grad_norm": 0.8221597075462341, "learning_rate": 4.1591865207607215e-05, "loss": 1.0986, "step": 173430 }, { "epoch": 1.1080587250680398, "grad_norm": 1.2803705930709839, "learning_rate": 4.1586919020522624e-05, "loss": 0.9425, "step": 173440 }, { "epoch": 1.1081226122177785, "grad_norm": 0.7996966242790222, "learning_rate": 4.158197291816503e-05, "loss": 0.8781, "step": 173450 }, { "epoch": 1.1081864993675172, "grad_norm": 1.3771693706512451, "learning_rate": 4.157702690058426e-05, "loss": 0.9906, "step": 173460 }, { "epoch": 1.108250386517256, "grad_norm": 1.1042697429656982, "learning_rate": 4.157208096783011e-05, "loss": 0.795, "step": 173470 }, { "epoch": 1.1083142736669946, "grad_norm": 1.042994499206543, "learning_rate": 4.156713511995241e-05, "loss": 0.741, "step": 173480 }, { "epoch": 1.1083781608167333, "grad_norm": 0.7262585163116455, "learning_rate": 4.156218935700094e-05, "loss": 0.7013, "step": 173490 }, { "epoch": 1.108442047966472, "grad_norm": 0.6869814395904541, "learning_rate": 4.155724367902552e-05, "loss": 0.7797, "step": 173500 }, { "epoch": 1.1085059351162108, "grad_norm": 0.9764127731323242, "learning_rate": 4.155229808607596e-05, "loss": 0.9213, "step": 173510 }, { "epoch": 1.1085698222659495, "grad_norm": 0.9626973271369934, "learning_rate": 4.1547352578202074e-05, "loss": 1.0226, "step": 173520 }, { "epoch": 1.1086337094156882, "grad_norm": 1.117993712425232, "learning_rate": 4.154240715545366e-05, "loss": 0.7672, "step": 173530 }, { "epoch": 1.1086975965654269, "grad_norm": 1.0907342433929443, "learning_rate": 4.153746181788051e-05, "loss": 0.8026, "step": 173540 }, { "epoch": 1.1087614837151656, "grad_norm": 1.1601186990737915, "learning_rate": 4.153251656553246e-05, "loss": 0.8438, "step": 173550 }, { "epoch": 1.1088253708649043, "grad_norm": 0.5594744086265564, "learning_rate": 4.152757139845928e-05, "loss": 1.0255, "step": 173560 }, { "epoch": 1.108889258014643, "grad_norm": 0.759391725063324, "learning_rate": 4.152262631671079e-05, "loss": 0.8922, "step": 173570 }, { "epoch": 1.1089531451643817, "grad_norm": 1.0028988122940063, "learning_rate": 4.151768132033679e-05, "loss": 0.6874, "step": 173580 }, { "epoch": 1.1090170323141204, "grad_norm": 1.068331003189087, "learning_rate": 4.1512736409387075e-05, "loss": 0.8646, "step": 173590 }, { "epoch": 1.1090809194638591, "grad_norm": 0.9323468208312988, "learning_rate": 4.150779158391145e-05, "loss": 0.6483, "step": 173600 }, { "epoch": 1.1091448066135978, "grad_norm": 1.2181113958358765, "learning_rate": 4.1502846843959706e-05, "loss": 0.9429, "step": 173610 }, { "epoch": 1.1092086937633365, "grad_norm": 1.4081296920776367, "learning_rate": 4.149790218958165e-05, "loss": 0.9354, "step": 173620 }, { "epoch": 1.1092725809130752, "grad_norm": 0.8330966234207153, "learning_rate": 4.1492957620827066e-05, "loss": 0.814, "step": 173630 }, { "epoch": 1.109336468062814, "grad_norm": 0.9767903089523315, "learning_rate": 4.148801313774576e-05, "loss": 0.8311, "step": 173640 }, { "epoch": 1.1094003552125526, "grad_norm": 1.104324460029602, "learning_rate": 4.148306874038753e-05, "loss": 1.0736, "step": 173650 }, { "epoch": 1.1094642423622911, "grad_norm": 1.6907215118408203, "learning_rate": 4.147812442880217e-05, "loss": 0.8524, "step": 173660 }, { "epoch": 1.10952812951203, "grad_norm": 1.7395610809326172, "learning_rate": 4.147318020303946e-05, "loss": 1.1916, "step": 173670 }, { "epoch": 1.1095920166617685, "grad_norm": 0.7708711624145508, "learning_rate": 4.1468236063149216e-05, "loss": 0.9255, "step": 173680 }, { "epoch": 1.1096559038115075, "grad_norm": 1.1579172611236572, "learning_rate": 4.14632920091812e-05, "loss": 0.9762, "step": 173690 }, { "epoch": 1.109719790961246, "grad_norm": 1.0927681922912598, "learning_rate": 4.145834804118522e-05, "loss": 1.127, "step": 173700 }, { "epoch": 1.1097836781109847, "grad_norm": 1.0229488611221313, "learning_rate": 4.1453404159211074e-05, "loss": 1.0284, "step": 173710 }, { "epoch": 1.1098475652607234, "grad_norm": 1.7790634632110596, "learning_rate": 4.144846036330854e-05, "loss": 0.6832, "step": 173720 }, { "epoch": 1.109911452410462, "grad_norm": 0.9170469641685486, "learning_rate": 4.144351665352741e-05, "loss": 0.7688, "step": 173730 }, { "epoch": 1.1099753395602008, "grad_norm": 0.9874303936958313, "learning_rate": 4.1438573029917454e-05, "loss": 0.7773, "step": 173740 }, { "epoch": 1.1100392267099395, "grad_norm": 0.5128178000450134, "learning_rate": 4.1433629492528485e-05, "loss": 0.8497, "step": 173750 }, { "epoch": 1.1101031138596782, "grad_norm": 0.9220598340034485, "learning_rate": 4.142868604141028e-05, "loss": 0.9081, "step": 173760 }, { "epoch": 1.110167001009417, "grad_norm": 0.7820585370063782, "learning_rate": 4.142374267661262e-05, "loss": 0.8426, "step": 173770 }, { "epoch": 1.1102308881591556, "grad_norm": 0.7808403372764587, "learning_rate": 4.141879939818529e-05, "loss": 0.9227, "step": 173780 }, { "epoch": 1.1102947753088943, "grad_norm": 0.9865910410881042, "learning_rate": 4.141385620617808e-05, "loss": 0.7624, "step": 173790 }, { "epoch": 1.110358662458633, "grad_norm": 1.2182927131652832, "learning_rate": 4.140891310064079e-05, "loss": 0.8516, "step": 173800 }, { "epoch": 1.1104225496083717, "grad_norm": 0.6205449104309082, "learning_rate": 4.140397008162315e-05, "loss": 0.8439, "step": 173810 }, { "epoch": 1.1104864367581104, "grad_norm": 0.9484419822692871, "learning_rate": 4.1399027149174965e-05, "loss": 0.7673, "step": 173820 }, { "epoch": 1.1105503239078491, "grad_norm": 0.9140626192092896, "learning_rate": 4.139408430334601e-05, "loss": 1.0648, "step": 173830 }, { "epoch": 1.1106142110575878, "grad_norm": 0.9677501916885376, "learning_rate": 4.138914154418609e-05, "loss": 0.9232, "step": 173840 }, { "epoch": 1.1106780982073265, "grad_norm": 0.8527592420578003, "learning_rate": 4.138419887174495e-05, "loss": 0.8107, "step": 173850 }, { "epoch": 1.1107419853570653, "grad_norm": 0.6409138441085815, "learning_rate": 4.137925628607238e-05, "loss": 0.7725, "step": 173860 }, { "epoch": 1.110805872506804, "grad_norm": 0.6374943852424622, "learning_rate": 4.137431378721816e-05, "loss": 0.8226, "step": 173870 }, { "epoch": 1.1108697596565427, "grad_norm": 1.587991714477539, "learning_rate": 4.136937137523207e-05, "loss": 0.7838, "step": 173880 }, { "epoch": 1.1109336468062814, "grad_norm": 1.2672752141952515, "learning_rate": 4.136442905016387e-05, "loss": 0.918, "step": 173890 }, { "epoch": 1.11099753395602, "grad_norm": 0.7447906732559204, "learning_rate": 4.135948681206334e-05, "loss": 0.9646, "step": 173900 }, { "epoch": 1.1110614211057588, "grad_norm": 1.1759493350982666, "learning_rate": 4.135454466098026e-05, "loss": 1.2743, "step": 173910 }, { "epoch": 1.1111253082554975, "grad_norm": 0.9131328463554382, "learning_rate": 4.1349602596964386e-05, "loss": 0.8198, "step": 173920 }, { "epoch": 1.1111891954052362, "grad_norm": 0.665424108505249, "learning_rate": 4.13446606200655e-05, "loss": 0.7456, "step": 173930 }, { "epoch": 1.111253082554975, "grad_norm": 0.6461379528045654, "learning_rate": 4.133971873033338e-05, "loss": 0.8839, "step": 173940 }, { "epoch": 1.1113169697047136, "grad_norm": 0.6741138100624084, "learning_rate": 4.1334776927817776e-05, "loss": 0.8698, "step": 173950 }, { "epoch": 1.1113808568544523, "grad_norm": 1.0578811168670654, "learning_rate": 4.132983521256846e-05, "loss": 0.822, "step": 173960 }, { "epoch": 1.111444744004191, "grad_norm": 1.079478144645691, "learning_rate": 4.1324893584635214e-05, "loss": 0.7061, "step": 173970 }, { "epoch": 1.1115086311539297, "grad_norm": 1.0450830459594727, "learning_rate": 4.131995204406779e-05, "loss": 0.6351, "step": 173980 }, { "epoch": 1.1115725183036684, "grad_norm": 0.7627521753311157, "learning_rate": 4.131501059091596e-05, "loss": 0.7369, "step": 173990 }, { "epoch": 1.1116364054534071, "grad_norm": 1.1520981788635254, "learning_rate": 4.131006922522948e-05, "loss": 0.8487, "step": 174000 }, { "epoch": 1.1117002926031458, "grad_norm": 0.9615451097488403, "learning_rate": 4.130512794705813e-05, "loss": 0.9746, "step": 174010 }, { "epoch": 1.1117641797528846, "grad_norm": 1.2906620502471924, "learning_rate": 4.130018675645166e-05, "loss": 1.0011, "step": 174020 }, { "epoch": 1.1118280669026233, "grad_norm": 1.2265547513961792, "learning_rate": 4.129524565345984e-05, "loss": 0.8273, "step": 174030 }, { "epoch": 1.111891954052362, "grad_norm": 0.739889919757843, "learning_rate": 4.1290304638132414e-05, "loss": 0.8474, "step": 174040 }, { "epoch": 1.1119558412021007, "grad_norm": 0.7667902708053589, "learning_rate": 4.128536371051916e-05, "loss": 0.7887, "step": 174050 }, { "epoch": 1.1120197283518394, "grad_norm": 0.8716595768928528, "learning_rate": 4.1280422870669834e-05, "loss": 0.7622, "step": 174060 }, { "epoch": 1.112083615501578, "grad_norm": 1.0395444631576538, "learning_rate": 4.127548211863419e-05, "loss": 0.856, "step": 174070 }, { "epoch": 1.1121475026513168, "grad_norm": 0.9293608069419861, "learning_rate": 4.1270541454462e-05, "loss": 1.0687, "step": 174080 }, { "epoch": 1.1122113898010555, "grad_norm": 0.7219346165657043, "learning_rate": 4.1265600878203e-05, "loss": 0.8219, "step": 174090 }, { "epoch": 1.1122752769507942, "grad_norm": 0.8477901220321655, "learning_rate": 4.126066038990696e-05, "loss": 0.7718, "step": 174100 }, { "epoch": 1.112339164100533, "grad_norm": 0.8595911860466003, "learning_rate": 4.125571998962363e-05, "loss": 0.8496, "step": 174110 }, { "epoch": 1.1124030512502716, "grad_norm": 0.6253750920295715, "learning_rate": 4.125077967740276e-05, "loss": 0.808, "step": 174120 }, { "epoch": 1.1124669384000103, "grad_norm": 1.5641717910766602, "learning_rate": 4.124583945329412e-05, "loss": 0.8895, "step": 174130 }, { "epoch": 1.112530825549749, "grad_norm": 1.0902210474014282, "learning_rate": 4.124089931734744e-05, "loss": 0.787, "step": 174140 }, { "epoch": 1.1125947126994875, "grad_norm": 0.6443929076194763, "learning_rate": 4.123595926961248e-05, "loss": 0.7237, "step": 174150 }, { "epoch": 1.1126585998492264, "grad_norm": 0.9357183575630188, "learning_rate": 4.1231019310139e-05, "loss": 0.8374, "step": 174160 }, { "epoch": 1.112722486998965, "grad_norm": 1.554679036140442, "learning_rate": 4.122607943897674e-05, "loss": 0.8504, "step": 174170 }, { "epoch": 1.1127863741487036, "grad_norm": 0.9971223473548889, "learning_rate": 4.122113965617544e-05, "loss": 0.9429, "step": 174180 }, { "epoch": 1.1128502612984423, "grad_norm": 0.8027485013008118, "learning_rate": 4.1216199961784876e-05, "loss": 0.8253, "step": 174190 }, { "epoch": 1.112914148448181, "grad_norm": 0.7266373038291931, "learning_rate": 4.1211260355854764e-05, "loss": 0.813, "step": 174200 }, { "epoch": 1.1129780355979197, "grad_norm": 1.004325270652771, "learning_rate": 4.120632083843487e-05, "loss": 0.9, "step": 174210 }, { "epoch": 1.1130419227476585, "grad_norm": 2.024078130722046, "learning_rate": 4.120138140957493e-05, "loss": 0.8187, "step": 174220 }, { "epoch": 1.1131058098973972, "grad_norm": 1.0069423913955688, "learning_rate": 4.119644206932469e-05, "loss": 0.6752, "step": 174230 }, { "epoch": 1.1131696970471359, "grad_norm": 0.9048756957054138, "learning_rate": 4.1191502817733894e-05, "loss": 0.8194, "step": 174240 }, { "epoch": 1.1132335841968746, "grad_norm": 0.8194555044174194, "learning_rate": 4.1186563654852286e-05, "loss": 0.8253, "step": 174250 }, { "epoch": 1.1132974713466133, "grad_norm": 6.5662665367126465, "learning_rate": 4.118162458072961e-05, "loss": 1.0693, "step": 174260 }, { "epoch": 1.113361358496352, "grad_norm": 1.075524926185608, "learning_rate": 4.117668559541559e-05, "loss": 0.8809, "step": 174270 }, { "epoch": 1.1134252456460907, "grad_norm": 0.49496862292289734, "learning_rate": 4.117174669896001e-05, "loss": 0.6506, "step": 174280 }, { "epoch": 1.1134891327958294, "grad_norm": 1.0560492277145386, "learning_rate": 4.116680789141256e-05, "loss": 0.7415, "step": 174290 }, { "epoch": 1.113553019945568, "grad_norm": 0.9809279441833496, "learning_rate": 4.1161869172823e-05, "loss": 0.8428, "step": 174300 }, { "epoch": 1.1136169070953068, "grad_norm": 0.8138776421546936, "learning_rate": 4.115693054324106e-05, "loss": 1.0035, "step": 174310 }, { "epoch": 1.1136807942450455, "grad_norm": 1.1911145448684692, "learning_rate": 4.1151992002716475e-05, "loss": 0.8993, "step": 174320 }, { "epoch": 1.1137446813947842, "grad_norm": 0.5131652355194092, "learning_rate": 4.114705355129899e-05, "loss": 0.7444, "step": 174330 }, { "epoch": 1.113808568544523, "grad_norm": 1.192893624305725, "learning_rate": 4.1142115189038334e-05, "loss": 0.7713, "step": 174340 }, { "epoch": 1.1138724556942616, "grad_norm": 1.0480468273162842, "learning_rate": 4.1137176915984246e-05, "loss": 1.0559, "step": 174350 }, { "epoch": 1.1139363428440003, "grad_norm": 1.0468538999557495, "learning_rate": 4.113223873218644e-05, "loss": 0.7441, "step": 174360 }, { "epoch": 1.114000229993739, "grad_norm": 0.9152908325195312, "learning_rate": 4.112730063769468e-05, "loss": 0.8123, "step": 174370 }, { "epoch": 1.1140641171434778, "grad_norm": 1.3621870279312134, "learning_rate": 4.112236263255866e-05, "loss": 0.8506, "step": 174380 }, { "epoch": 1.1141280042932165, "grad_norm": 0.8971595168113708, "learning_rate": 4.1117424716828126e-05, "loss": 0.7592, "step": 174390 }, { "epoch": 1.1141918914429552, "grad_norm": 3.1121487617492676, "learning_rate": 4.111248689055283e-05, "loss": 0.7638, "step": 174400 }, { "epoch": 1.1142557785926939, "grad_norm": 0.7368488311767578, "learning_rate": 4.1107549153782463e-05, "loss": 0.8275, "step": 174410 }, { "epoch": 1.1143196657424326, "grad_norm": 2.520725965499878, "learning_rate": 4.110261150656678e-05, "loss": 0.9942, "step": 174420 }, { "epoch": 1.1143835528921713, "grad_norm": 0.9537742733955383, "learning_rate": 4.10976739489555e-05, "loss": 0.8468, "step": 174430 }, { "epoch": 1.11444744004191, "grad_norm": 1.5037137269973755, "learning_rate": 4.1093230223758204e-05, "loss": 0.9547, "step": 174440 }, { "epoch": 1.1145113271916487, "grad_norm": 0.5854917764663696, "learning_rate": 4.108829283653227e-05, "loss": 1.0519, "step": 174450 }, { "epoch": 1.1145752143413874, "grad_norm": 0.5292088985443115, "learning_rate": 4.1083355539054936e-05, "loss": 0.7183, "step": 174460 }, { "epoch": 1.114639101491126, "grad_norm": 1.0242040157318115, "learning_rate": 4.1078418331375924e-05, "loss": 0.8832, "step": 174470 }, { "epoch": 1.1147029886408648, "grad_norm": 1.240865707397461, "learning_rate": 4.107348121354496e-05, "loss": 0.8545, "step": 174480 }, { "epoch": 1.1147668757906035, "grad_norm": 3.079923391342163, "learning_rate": 4.106854418561176e-05, "loss": 0.7876, "step": 174490 }, { "epoch": 1.1148307629403422, "grad_norm": 1.0026297569274902, "learning_rate": 4.106360724762604e-05, "loss": 0.7126, "step": 174500 }, { "epoch": 1.114894650090081, "grad_norm": 1.4164525270462036, "learning_rate": 4.1058670399637536e-05, "loss": 0.7584, "step": 174510 }, { "epoch": 1.1149585372398196, "grad_norm": 0.6686666011810303, "learning_rate": 4.105373364169596e-05, "loss": 0.9105, "step": 174520 }, { "epoch": 1.1150224243895583, "grad_norm": 0.8393816947937012, "learning_rate": 4.104879697385102e-05, "loss": 1.1391, "step": 174530 }, { "epoch": 1.115086311539297, "grad_norm": 0.927527129650116, "learning_rate": 4.1043860396152436e-05, "loss": 0.7634, "step": 174540 }, { "epoch": 1.1151501986890358, "grad_norm": 1.2626625299453735, "learning_rate": 4.1038923908649926e-05, "loss": 0.8502, "step": 174550 }, { "epoch": 1.1152140858387745, "grad_norm": 1.2957005500793457, "learning_rate": 4.103398751139321e-05, "loss": 1.0296, "step": 174560 }, { "epoch": 1.1152779729885132, "grad_norm": 0.6817342042922974, "learning_rate": 4.1029051204432e-05, "loss": 0.7997, "step": 174570 }, { "epoch": 1.1153418601382519, "grad_norm": 1.2032334804534912, "learning_rate": 4.1024114987816e-05, "loss": 0.905, "step": 174580 }, { "epoch": 1.1154057472879906, "grad_norm": 1.033623218536377, "learning_rate": 4.101917886159492e-05, "loss": 0.8813, "step": 174590 }, { "epoch": 1.1154696344377293, "grad_norm": 0.7177959084510803, "learning_rate": 4.101424282581849e-05, "loss": 0.7331, "step": 174600 }, { "epoch": 1.115533521587468, "grad_norm": 1.2658313512802124, "learning_rate": 4.100930688053641e-05, "loss": 0.7928, "step": 174610 }, { "epoch": 1.1155974087372067, "grad_norm": 1.4184327125549316, "learning_rate": 4.100437102579838e-05, "loss": 0.878, "step": 174620 }, { "epoch": 1.1156612958869454, "grad_norm": 0.9174264669418335, "learning_rate": 4.099943526165412e-05, "loss": 0.8099, "step": 174630 }, { "epoch": 1.1157251830366839, "grad_norm": 1.151061773300171, "learning_rate": 4.099449958815333e-05, "loss": 0.9451, "step": 174640 }, { "epoch": 1.1157890701864228, "grad_norm": 0.7230824828147888, "learning_rate": 4.098956400534572e-05, "loss": 0.912, "step": 174650 }, { "epoch": 1.1158529573361613, "grad_norm": 0.8960369229316711, "learning_rate": 4.0984628513281e-05, "loss": 1.0227, "step": 174660 }, { "epoch": 1.1159168444859, "grad_norm": 0.6633699536323547, "learning_rate": 4.097969311200886e-05, "loss": 0.9389, "step": 174670 }, { "epoch": 1.1159807316356387, "grad_norm": 1.3627066612243652, "learning_rate": 4.097475780157903e-05, "loss": 1.0452, "step": 174680 }, { "epoch": 1.1160446187853774, "grad_norm": 1.2317612171173096, "learning_rate": 4.0969822582041186e-05, "loss": 0.817, "step": 174690 }, { "epoch": 1.1161085059351161, "grad_norm": 1.1265528202056885, "learning_rate": 4.0964887453445044e-05, "loss": 0.8126, "step": 174700 }, { "epoch": 1.1161723930848548, "grad_norm": 1.0905567407608032, "learning_rate": 4.095995241584029e-05, "loss": 0.9245, "step": 174710 }, { "epoch": 1.1162362802345935, "grad_norm": 1.2128629684448242, "learning_rate": 4.0955017469276646e-05, "loss": 0.9359, "step": 174720 }, { "epoch": 1.1163001673843322, "grad_norm": 0.9822705388069153, "learning_rate": 4.0950082613803804e-05, "loss": 1.0899, "step": 174730 }, { "epoch": 1.116364054534071, "grad_norm": 0.9940265417098999, "learning_rate": 4.094514784947146e-05, "loss": 0.8863, "step": 174740 }, { "epoch": 1.1164279416838097, "grad_norm": 2.056150436401367, "learning_rate": 4.094021317632931e-05, "loss": 0.7975, "step": 174750 }, { "epoch": 1.1164918288335484, "grad_norm": 1.5072576999664307, "learning_rate": 4.093527859442705e-05, "loss": 0.9716, "step": 174760 }, { "epoch": 1.116555715983287, "grad_norm": 0.8445620536804199, "learning_rate": 4.0930344103814374e-05, "loss": 1.021, "step": 174770 }, { "epoch": 1.1166196031330258, "grad_norm": 1.3397151231765747, "learning_rate": 4.0925409704540976e-05, "loss": 0.8359, "step": 174780 }, { "epoch": 1.1166834902827645, "grad_norm": 1.194464921951294, "learning_rate": 4.092047539665656e-05, "loss": 0.9008, "step": 174790 }, { "epoch": 1.1167473774325032, "grad_norm": 1.0016111135482788, "learning_rate": 4.091554118021082e-05, "loss": 0.7577, "step": 174800 }, { "epoch": 1.116811264582242, "grad_norm": 0.9145414233207703, "learning_rate": 4.0910607055253416e-05, "loss": 1.0052, "step": 174810 }, { "epoch": 1.1168751517319806, "grad_norm": 0.8176177144050598, "learning_rate": 4.090567302183408e-05, "loss": 0.6904, "step": 174820 }, { "epoch": 1.1169390388817193, "grad_norm": 0.9763671159744263, "learning_rate": 4.090073908000248e-05, "loss": 0.7303, "step": 174830 }, { "epoch": 1.117002926031458, "grad_norm": 0.9583315849304199, "learning_rate": 4.089580522980831e-05, "loss": 0.8352, "step": 174840 }, { "epoch": 1.1170668131811967, "grad_norm": 1.0675028562545776, "learning_rate": 4.089087147130126e-05, "loss": 0.8125, "step": 174850 }, { "epoch": 1.1171307003309354, "grad_norm": 0.8443053364753723, "learning_rate": 4.088593780453101e-05, "loss": 0.8599, "step": 174860 }, { "epoch": 1.1171945874806741, "grad_norm": 0.7804033160209656, "learning_rate": 4.088100422954725e-05, "loss": 0.8323, "step": 174870 }, { "epoch": 1.1172584746304128, "grad_norm": 1.0660284757614136, "learning_rate": 4.087607074639968e-05, "loss": 0.8467, "step": 174880 }, { "epoch": 1.1173223617801515, "grad_norm": 0.8337397575378418, "learning_rate": 4.0871137355137954e-05, "loss": 0.971, "step": 174890 }, { "epoch": 1.1173862489298902, "grad_norm": 1.1970064640045166, "learning_rate": 4.086620405581178e-05, "loss": 0.7649, "step": 174900 }, { "epoch": 1.117450136079629, "grad_norm": 0.8727056384086609, "learning_rate": 4.086127084847084e-05, "loss": 0.8041, "step": 174910 }, { "epoch": 1.1175140232293677, "grad_norm": 0.9415563941001892, "learning_rate": 4.085633773316481e-05, "loss": 0.851, "step": 174920 }, { "epoch": 1.1175779103791064, "grad_norm": 1.1210389137268066, "learning_rate": 4.085140470994335e-05, "loss": 0.9958, "step": 174930 }, { "epoch": 1.117641797528845, "grad_norm": 2.0388622283935547, "learning_rate": 4.084647177885617e-05, "loss": 0.7574, "step": 174940 }, { "epoch": 1.1177056846785838, "grad_norm": 1.0090299844741821, "learning_rate": 4.084153893995294e-05, "loss": 0.7524, "step": 174950 }, { "epoch": 1.1177695718283225, "grad_norm": 1.3186482191085815, "learning_rate": 4.0836606193283335e-05, "loss": 1.0176, "step": 174960 }, { "epoch": 1.1178334589780612, "grad_norm": 1.435909390449524, "learning_rate": 4.083167353889703e-05, "loss": 1.243, "step": 174970 }, { "epoch": 1.1178973461278, "grad_norm": 1.2158678770065308, "learning_rate": 4.082674097684371e-05, "loss": 0.7206, "step": 174980 }, { "epoch": 1.1179612332775386, "grad_norm": 1.0424692630767822, "learning_rate": 4.082180850717304e-05, "loss": 0.9083, "step": 174990 }, { "epoch": 1.1180251204272773, "grad_norm": 0.7622018456459045, "learning_rate": 4.081687612993469e-05, "loss": 1.0366, "step": 175000 }, { "epoch": 1.118089007577016, "grad_norm": 1.2907741069793701, "learning_rate": 4.081194384517836e-05, "loss": 0.8439, "step": 175010 }, { "epoch": 1.1181528947267547, "grad_norm": 0.8142081499099731, "learning_rate": 4.080701165295369e-05, "loss": 0.9369, "step": 175020 }, { "epoch": 1.1182167818764934, "grad_norm": 0.7973591685295105, "learning_rate": 4.0802079553310364e-05, "loss": 0.8274, "step": 175030 }, { "epoch": 1.1182806690262321, "grad_norm": 1.1246249675750732, "learning_rate": 4.079714754629806e-05, "loss": 0.7711, "step": 175040 }, { "epoch": 1.1183445561759708, "grad_norm": 1.2581672668457031, "learning_rate": 4.0792215631966444e-05, "loss": 1.0281, "step": 175050 }, { "epoch": 1.1184084433257095, "grad_norm": 0.6276068687438965, "learning_rate": 4.078728381036518e-05, "loss": 0.9746, "step": 175060 }, { "epoch": 1.1184723304754483, "grad_norm": 0.6815016865730286, "learning_rate": 4.078235208154394e-05, "loss": 0.9694, "step": 175070 }, { "epoch": 1.118536217625187, "grad_norm": 0.8250168561935425, "learning_rate": 4.077742044555238e-05, "loss": 1.0307, "step": 175080 }, { "epoch": 1.1186001047749257, "grad_norm": 0.7040315866470337, "learning_rate": 4.077248890244019e-05, "loss": 0.7931, "step": 175090 }, { "epoch": 1.1186639919246644, "grad_norm": 0.6185616850852966, "learning_rate": 4.076755745225701e-05, "loss": 0.8425, "step": 175100 }, { "epoch": 1.118727879074403, "grad_norm": 0.7982765436172485, "learning_rate": 4.076262609505252e-05, "loss": 0.987, "step": 175110 }, { "epoch": 1.1187917662241418, "grad_norm": 1.0827354192733765, "learning_rate": 4.075769483087637e-05, "loss": 0.8767, "step": 175120 }, { "epoch": 1.1188556533738803, "grad_norm": 0.7010329961776733, "learning_rate": 4.0752763659778234e-05, "loss": 0.9916, "step": 175130 }, { "epoch": 1.1189195405236192, "grad_norm": 1.2009533643722534, "learning_rate": 4.0747832581807765e-05, "loss": 1.1279, "step": 175140 }, { "epoch": 1.1189834276733577, "grad_norm": 0.8541277050971985, "learning_rate": 4.074290159701463e-05, "loss": 0.9996, "step": 175150 }, { "epoch": 1.1190473148230964, "grad_norm": 0.8169267177581787, "learning_rate": 4.073797070544848e-05, "loss": 1.2023, "step": 175160 }, { "epoch": 1.119111201972835, "grad_norm": 0.9204958081245422, "learning_rate": 4.0733039907158976e-05, "loss": 0.9974, "step": 175170 }, { "epoch": 1.1191750891225738, "grad_norm": 0.9314907789230347, "learning_rate": 4.072810920219578e-05, "loss": 0.7827, "step": 175180 }, { "epoch": 1.1192389762723125, "grad_norm": 0.9987381100654602, "learning_rate": 4.0723178590608545e-05, "loss": 0.6706, "step": 175190 }, { "epoch": 1.1193028634220512, "grad_norm": 1.2689613103866577, "learning_rate": 4.071824807244693e-05, "loss": 1.0809, "step": 175200 }, { "epoch": 1.11936675057179, "grad_norm": 0.9223312139511108, "learning_rate": 4.071331764776059e-05, "loss": 0.7422, "step": 175210 }, { "epoch": 1.1194306377215286, "grad_norm": 0.9146457314491272, "learning_rate": 4.0708387316599166e-05, "loss": 0.7824, "step": 175220 }, { "epoch": 1.1194945248712673, "grad_norm": 0.8546509146690369, "learning_rate": 4.070345707901233e-05, "loss": 1.1368, "step": 175230 }, { "epoch": 1.119558412021006, "grad_norm": 1.138922095298767, "learning_rate": 4.06985269350497e-05, "loss": 1.0918, "step": 175240 }, { "epoch": 1.1196222991707447, "grad_norm": 0.8561988472938538, "learning_rate": 4.0693596884760976e-05, "loss": 0.8103, "step": 175250 }, { "epoch": 1.1196861863204834, "grad_norm": 2.1585047245025635, "learning_rate": 4.0688666928195776e-05, "loss": 0.8861, "step": 175260 }, { "epoch": 1.1197500734702222, "grad_norm": 0.9140286445617676, "learning_rate": 4.068373706540376e-05, "loss": 0.9865, "step": 175270 }, { "epoch": 1.1198139606199609, "grad_norm": 1.0838178396224976, "learning_rate": 4.067880729643456e-05, "loss": 0.6893, "step": 175280 }, { "epoch": 1.1198778477696996, "grad_norm": 0.9828647971153259, "learning_rate": 4.067387762133784e-05, "loss": 0.7995, "step": 175290 }, { "epoch": 1.1199417349194383, "grad_norm": 1.4637941122055054, "learning_rate": 4.0668948040163244e-05, "loss": 0.8851, "step": 175300 }, { "epoch": 1.120005622069177, "grad_norm": 0.7903958559036255, "learning_rate": 4.0664018552960406e-05, "loss": 0.7814, "step": 175310 }, { "epoch": 1.1200695092189157, "grad_norm": 0.8866510987281799, "learning_rate": 4.0659089159778984e-05, "loss": 1.0234, "step": 175320 }, { "epoch": 1.1201333963686544, "grad_norm": 0.8908731937408447, "learning_rate": 4.0654159860668615e-05, "loss": 0.6416, "step": 175330 }, { "epoch": 1.120197283518393, "grad_norm": 1.0165271759033203, "learning_rate": 4.064923065567894e-05, "loss": 0.6595, "step": 175340 }, { "epoch": 1.1202611706681318, "grad_norm": 0.7236544489860535, "learning_rate": 4.064430154485961e-05, "loss": 0.8758, "step": 175350 }, { "epoch": 1.1203250578178705, "grad_norm": 1.364446997642517, "learning_rate": 4.063937252826024e-05, "loss": 0.9554, "step": 175360 }, { "epoch": 1.1203889449676092, "grad_norm": 1.0828992128372192, "learning_rate": 4.0634443605930504e-05, "loss": 0.7889, "step": 175370 }, { "epoch": 1.120452832117348, "grad_norm": 0.9547019600868225, "learning_rate": 4.062951477792002e-05, "loss": 0.6543, "step": 175380 }, { "epoch": 1.1205167192670866, "grad_norm": 0.7458175420761108, "learning_rate": 4.062458604427842e-05, "loss": 0.9494, "step": 175390 }, { "epoch": 1.1205806064168253, "grad_norm": 0.7044423818588257, "learning_rate": 4.0619657405055366e-05, "loss": 0.7833, "step": 175400 }, { "epoch": 1.120644493566564, "grad_norm": 0.512610912322998, "learning_rate": 4.0614728860300464e-05, "loss": 0.7563, "step": 175410 }, { "epoch": 1.1207083807163027, "grad_norm": 0.7722542881965637, "learning_rate": 4.0609800410063366e-05, "loss": 0.8186, "step": 175420 }, { "epoch": 1.1207722678660414, "grad_norm": 0.9468956589698792, "learning_rate": 4.06048720543937e-05, "loss": 1.1304, "step": 175430 }, { "epoch": 1.1208361550157802, "grad_norm": 0.7915295362472534, "learning_rate": 4.05999437933411e-05, "loss": 0.8595, "step": 175440 }, { "epoch": 1.1209000421655189, "grad_norm": 0.7079314589500427, "learning_rate": 4.0595015626955195e-05, "loss": 0.7979, "step": 175450 }, { "epoch": 1.1209639293152576, "grad_norm": 1.3249633312225342, "learning_rate": 4.059008755528562e-05, "loss": 0.8842, "step": 175460 }, { "epoch": 1.1210278164649963, "grad_norm": 1.0822068452835083, "learning_rate": 4.058515957838201e-05, "loss": 0.836, "step": 175470 }, { "epoch": 1.121091703614735, "grad_norm": 1.2300411462783813, "learning_rate": 4.058023169629398e-05, "loss": 0.6829, "step": 175480 }, { "epoch": 1.1211555907644737, "grad_norm": 0.7818104028701782, "learning_rate": 4.057530390907117e-05, "loss": 0.9587, "step": 175490 }, { "epoch": 1.1212194779142124, "grad_norm": 0.9931803345680237, "learning_rate": 4.057037621676321e-05, "loss": 0.935, "step": 175500 }, { "epoch": 1.121283365063951, "grad_norm": 0.8156737685203552, "learning_rate": 4.056544861941971e-05, "loss": 1.1708, "step": 175510 }, { "epoch": 1.1213472522136898, "grad_norm": 0.7829884886741638, "learning_rate": 4.056052111709031e-05, "loss": 0.8434, "step": 175520 }, { "epoch": 1.1214111393634285, "grad_norm": 0.9422668218612671, "learning_rate": 4.055559370982462e-05, "loss": 0.9067, "step": 175530 }, { "epoch": 1.1214750265131672, "grad_norm": 1.0434776544570923, "learning_rate": 4.055066639767228e-05, "loss": 0.9479, "step": 175540 }, { "epoch": 1.121538913662906, "grad_norm": 1.8336058855056763, "learning_rate": 4.0545739180682896e-05, "loss": 1.2378, "step": 175550 }, { "epoch": 1.1216028008126446, "grad_norm": 1.4612547159194946, "learning_rate": 4.0540812058906096e-05, "loss": 0.822, "step": 175560 }, { "epoch": 1.1216666879623833, "grad_norm": 0.8069930672645569, "learning_rate": 4.053588503239151e-05, "loss": 0.7872, "step": 175570 }, { "epoch": 1.121730575112122, "grad_norm": 0.6849136352539062, "learning_rate": 4.0530958101188745e-05, "loss": 0.6795, "step": 175580 }, { "epoch": 1.1217944622618607, "grad_norm": 0.5357284545898438, "learning_rate": 4.052603126534743e-05, "loss": 0.7397, "step": 175590 }, { "epoch": 1.1218583494115995, "grad_norm": 1.2236217260360718, "learning_rate": 4.052110452491717e-05, "loss": 0.7363, "step": 175600 }, { "epoch": 1.1219222365613382, "grad_norm": 1.0828033685684204, "learning_rate": 4.051617787994759e-05, "loss": 0.9291, "step": 175610 }, { "epoch": 1.1219861237110766, "grad_norm": 1.3939331769943237, "learning_rate": 4.051125133048831e-05, "loss": 1.0402, "step": 175620 }, { "epoch": 1.1220500108608156, "grad_norm": 1.7905160188674927, "learning_rate": 4.050632487658893e-05, "loss": 0.8458, "step": 175630 }, { "epoch": 1.122113898010554, "grad_norm": 1.2075114250183105, "learning_rate": 4.0501398518299074e-05, "loss": 0.8363, "step": 175640 }, { "epoch": 1.1221777851602928, "grad_norm": 0.7596954703330994, "learning_rate": 4.049647225566835e-05, "loss": 1.0533, "step": 175650 }, { "epoch": 1.1222416723100315, "grad_norm": 0.8684565424919128, "learning_rate": 4.049154608874638e-05, "loss": 0.6895, "step": 175660 }, { "epoch": 1.1223055594597702, "grad_norm": 0.6642537117004395, "learning_rate": 4.048662001758276e-05, "loss": 0.9142, "step": 175670 }, { "epoch": 1.1223694466095089, "grad_norm": 0.7803688645362854, "learning_rate": 4.0481694042227106e-05, "loss": 0.8982, "step": 175680 }, { "epoch": 1.1224333337592476, "grad_norm": 1.0256503820419312, "learning_rate": 4.0476768162729026e-05, "loss": 0.5992, "step": 175690 }, { "epoch": 1.1224972209089863, "grad_norm": 0.797667920589447, "learning_rate": 4.0471842379138137e-05, "loss": 1.0248, "step": 175700 }, { "epoch": 1.122561108058725, "grad_norm": 1.0702816247940063, "learning_rate": 4.046691669150404e-05, "loss": 0.9816, "step": 175710 }, { "epoch": 1.1226249952084637, "grad_norm": 1.9437769651412964, "learning_rate": 4.0461991099876327e-05, "loss": 1.401, "step": 175720 }, { "epoch": 1.1226888823582024, "grad_norm": 0.9341719150543213, "learning_rate": 4.0457065604304625e-05, "loss": 0.8363, "step": 175730 }, { "epoch": 1.1227527695079411, "grad_norm": 1.0677189826965332, "learning_rate": 4.045214020483852e-05, "loss": 0.8378, "step": 175740 }, { "epoch": 1.1228166566576798, "grad_norm": 0.9307116866111755, "learning_rate": 4.044721490152764e-05, "loss": 0.7788, "step": 175750 }, { "epoch": 1.1228805438074185, "grad_norm": 0.7231166958808899, "learning_rate": 4.0442289694421545e-05, "loss": 0.9322, "step": 175760 }, { "epoch": 1.1229444309571572, "grad_norm": 0.9440060257911682, "learning_rate": 4.043736458356987e-05, "loss": 0.7776, "step": 175770 }, { "epoch": 1.123008318106896, "grad_norm": 0.6914847493171692, "learning_rate": 4.0432439569022215e-05, "loss": 0.8959, "step": 175780 }, { "epoch": 1.1230722052566346, "grad_norm": 0.8245557546615601, "learning_rate": 4.0427514650828164e-05, "loss": 0.9195, "step": 175790 }, { "epoch": 1.1231360924063734, "grad_norm": 1.162238359451294, "learning_rate": 4.042258982903733e-05, "loss": 0.7828, "step": 175800 }, { "epoch": 1.123199979556112, "grad_norm": 0.9539982080459595, "learning_rate": 4.041766510369929e-05, "loss": 0.9306, "step": 175810 }, { "epoch": 1.1232638667058508, "grad_norm": 2.9992008209228516, "learning_rate": 4.041274047486366e-05, "loss": 0.7617, "step": 175820 }, { "epoch": 1.1233277538555895, "grad_norm": 0.7411594986915588, "learning_rate": 4.040781594258003e-05, "loss": 0.8345, "step": 175830 }, { "epoch": 1.1233916410053282, "grad_norm": 3.4469544887542725, "learning_rate": 4.040289150689799e-05, "loss": 0.9663, "step": 175840 }, { "epoch": 1.1234555281550669, "grad_norm": 0.5798484683036804, "learning_rate": 4.0397967167867136e-05, "loss": 0.8329, "step": 175850 }, { "epoch": 1.1235194153048056, "grad_norm": 0.6647492051124573, "learning_rate": 4.039304292553706e-05, "loss": 0.8047, "step": 175860 }, { "epoch": 1.1235833024545443, "grad_norm": 0.9796327948570251, "learning_rate": 4.0388118779957346e-05, "loss": 1.3402, "step": 175870 }, { "epoch": 1.123647189604283, "grad_norm": 0.9039154648780823, "learning_rate": 4.038319473117759e-05, "loss": 0.7715, "step": 175880 }, { "epoch": 1.1237110767540217, "grad_norm": 1.2593525648117065, "learning_rate": 4.0378270779247405e-05, "loss": 0.7266, "step": 175890 }, { "epoch": 1.1237749639037604, "grad_norm": 0.7207663655281067, "learning_rate": 4.037334692421634e-05, "loss": 0.6984, "step": 175900 }, { "epoch": 1.1238388510534991, "grad_norm": 1.0210093259811401, "learning_rate": 4.0368423166134e-05, "loss": 0.6854, "step": 175910 }, { "epoch": 1.1239027382032378, "grad_norm": 1.0901857614517212, "learning_rate": 4.036349950504997e-05, "loss": 0.7311, "step": 175920 }, { "epoch": 1.1239666253529765, "grad_norm": 0.6734563112258911, "learning_rate": 4.035857594101384e-05, "loss": 0.8838, "step": 175930 }, { "epoch": 1.1240305125027152, "grad_norm": 1.24734365940094, "learning_rate": 4.035365247407519e-05, "loss": 0.6681, "step": 175940 }, { "epoch": 1.124094399652454, "grad_norm": 1.328225016593933, "learning_rate": 4.034872910428361e-05, "loss": 0.6492, "step": 175950 }, { "epoch": 1.1241582868021927, "grad_norm": 0.9848608374595642, "learning_rate": 4.0343805831688666e-05, "loss": 0.8751, "step": 175960 }, { "epoch": 1.1242221739519314, "grad_norm": 3.334869861602783, "learning_rate": 4.033888265633996e-05, "loss": 0.878, "step": 175970 }, { "epoch": 1.12428606110167, "grad_norm": 0.7569032311439514, "learning_rate": 4.0333959578287064e-05, "loss": 1.1384, "step": 175980 }, { "epoch": 1.1243499482514088, "grad_norm": 0.623479425907135, "learning_rate": 4.0329036597579554e-05, "loss": 0.8313, "step": 175990 }, { "epoch": 1.1244138354011475, "grad_norm": 0.9155558347702026, "learning_rate": 4.032411371426701e-05, "loss": 0.6329, "step": 176000 }, { "epoch": 1.1244777225508862, "grad_norm": 0.5530851483345032, "learning_rate": 4.031919092839901e-05, "loss": 0.9966, "step": 176010 }, { "epoch": 1.124541609700625, "grad_norm": 1.3361395597457886, "learning_rate": 4.0314268240025136e-05, "loss": 0.7737, "step": 176020 }, { "epoch": 1.1246054968503636, "grad_norm": 0.7976097464561462, "learning_rate": 4.0309345649194965e-05, "loss": 0.9081, "step": 176030 }, { "epoch": 1.1246693840001023, "grad_norm": 1.1982431411743164, "learning_rate": 4.030442315595806e-05, "loss": 0.8035, "step": 176040 }, { "epoch": 1.124733271149841, "grad_norm": 1.0385111570358276, "learning_rate": 4.0299500760364003e-05, "loss": 0.9468, "step": 176050 }, { "epoch": 1.1247971582995797, "grad_norm": 0.796828031539917, "learning_rate": 4.029457846246237e-05, "loss": 0.9623, "step": 176060 }, { "epoch": 1.1248610454493184, "grad_norm": 0.6143119931221008, "learning_rate": 4.028965626230272e-05, "loss": 0.7577, "step": 176070 }, { "epoch": 1.1249249325990571, "grad_norm": 0.8207269906997681, "learning_rate": 4.028473415993464e-05, "loss": 1.015, "step": 176080 }, { "epoch": 1.1249888197487956, "grad_norm": 1.3695751428604126, "learning_rate": 4.027981215540768e-05, "loss": 0.8617, "step": 176090 }, { "epoch": 1.1250527068985345, "grad_norm": 0.821844220161438, "learning_rate": 4.027489024877143e-05, "loss": 0.8861, "step": 176100 }, { "epoch": 1.125116594048273, "grad_norm": 0.7588791847229004, "learning_rate": 4.0269968440075444e-05, "loss": 0.6437, "step": 176110 }, { "epoch": 1.125180481198012, "grad_norm": 1.1480047702789307, "learning_rate": 4.0265046729369304e-05, "loss": 1.0675, "step": 176120 }, { "epoch": 1.1252443683477504, "grad_norm": 0.909865140914917, "learning_rate": 4.026012511670256e-05, "loss": 1.0902, "step": 176130 }, { "epoch": 1.1253082554974891, "grad_norm": 0.5706981420516968, "learning_rate": 4.025520360212478e-05, "loss": 0.886, "step": 176140 }, { "epoch": 1.1253721426472278, "grad_norm": 0.8094174265861511, "learning_rate": 4.0250282185685527e-05, "loss": 0.8935, "step": 176150 }, { "epoch": 1.1254360297969666, "grad_norm": 0.8232213258743286, "learning_rate": 4.0245360867434376e-05, "loss": 1.084, "step": 176160 }, { "epoch": 1.1254999169467053, "grad_norm": 0.9300898313522339, "learning_rate": 4.0240439647420873e-05, "loss": 0.9735, "step": 176170 }, { "epoch": 1.125563804096444, "grad_norm": 1.0743767023086548, "learning_rate": 4.0235518525694594e-05, "loss": 0.7813, "step": 176180 }, { "epoch": 1.1256276912461827, "grad_norm": 0.9083045721054077, "learning_rate": 4.0230597502305085e-05, "loss": 0.7308, "step": 176190 }, { "epoch": 1.1256915783959214, "grad_norm": 0.641772985458374, "learning_rate": 4.022567657730191e-05, "loss": 0.9756, "step": 176200 }, { "epoch": 1.12575546554566, "grad_norm": 1.0198135375976562, "learning_rate": 4.022075575073463e-05, "loss": 1.1072, "step": 176210 }, { "epoch": 1.1258193526953988, "grad_norm": 0.8631082773208618, "learning_rate": 4.0215835022652796e-05, "loss": 0.8153, "step": 176220 }, { "epoch": 1.1258832398451375, "grad_norm": 1.0434458255767822, "learning_rate": 4.0210914393105975e-05, "loss": 1.0284, "step": 176230 }, { "epoch": 1.1259471269948762, "grad_norm": 1.107435703277588, "learning_rate": 4.020599386214371e-05, "loss": 0.7435, "step": 176240 }, { "epoch": 1.126011014144615, "grad_norm": 1.2904818058013916, "learning_rate": 4.020107342981556e-05, "loss": 0.8234, "step": 176250 }, { "epoch": 1.1260749012943536, "grad_norm": 0.8774335980415344, "learning_rate": 4.019615309617108e-05, "loss": 0.9961, "step": 176260 }, { "epoch": 1.1261387884440923, "grad_norm": 0.841364860534668, "learning_rate": 4.019123286125982e-05, "loss": 0.7548, "step": 176270 }, { "epoch": 1.126202675593831, "grad_norm": 1.181922435760498, "learning_rate": 4.0186312725131324e-05, "loss": 0.8136, "step": 176280 }, { "epoch": 1.1262665627435697, "grad_norm": 1.0423723459243774, "learning_rate": 4.0181392687835144e-05, "loss": 0.9189, "step": 176290 }, { "epoch": 1.1263304498933084, "grad_norm": 1.5770231485366821, "learning_rate": 4.0176472749420844e-05, "loss": 0.7019, "step": 176300 }, { "epoch": 1.1263943370430471, "grad_norm": 0.641741931438446, "learning_rate": 4.0171552909937966e-05, "loss": 0.7976, "step": 176310 }, { "epoch": 1.1264582241927859, "grad_norm": 1.0063591003417969, "learning_rate": 4.0166633169436045e-05, "loss": 0.7588, "step": 176320 }, { "epoch": 1.1265221113425246, "grad_norm": 0.9964459538459778, "learning_rate": 4.016171352796464e-05, "loss": 0.7928, "step": 176330 }, { "epoch": 1.1265859984922633, "grad_norm": 0.7890756726264954, "learning_rate": 4.015679398557329e-05, "loss": 0.8646, "step": 176340 }, { "epoch": 1.126649885642002, "grad_norm": 0.8053073883056641, "learning_rate": 4.015187454231154e-05, "loss": 0.732, "step": 176350 }, { "epoch": 1.1267137727917407, "grad_norm": 0.5603983402252197, "learning_rate": 4.0146955198228936e-05, "loss": 0.7962, "step": 176360 }, { "epoch": 1.1267776599414794, "grad_norm": 0.9688702821731567, "learning_rate": 4.014203595337503e-05, "loss": 0.8995, "step": 176370 }, { "epoch": 1.126841547091218, "grad_norm": 0.8661218285560608, "learning_rate": 4.013711680779934e-05, "loss": 0.7945, "step": 176380 }, { "epoch": 1.1269054342409568, "grad_norm": 0.7414273619651794, "learning_rate": 4.013219776155141e-05, "loss": 0.8952, "step": 176390 }, { "epoch": 1.1269693213906955, "grad_norm": 0.9069517254829407, "learning_rate": 4.012727881468079e-05, "loss": 0.8427, "step": 176400 }, { "epoch": 1.1270332085404342, "grad_norm": 1.473358392715454, "learning_rate": 4.0122359967237e-05, "loss": 1.0556, "step": 176410 }, { "epoch": 1.127097095690173, "grad_norm": 0.7204602360725403, "learning_rate": 4.0117441219269605e-05, "loss": 0.8077, "step": 176420 }, { "epoch": 1.1271609828399116, "grad_norm": 0.7743983864784241, "learning_rate": 4.011252257082812e-05, "loss": 0.8541, "step": 176430 }, { "epoch": 1.1272248699896503, "grad_norm": 0.9365828037261963, "learning_rate": 4.010760402196209e-05, "loss": 1.0954, "step": 176440 }, { "epoch": 1.127288757139389, "grad_norm": 0.8942638039588928, "learning_rate": 4.0102685572721046e-05, "loss": 0.8843, "step": 176450 }, { "epoch": 1.1273526442891277, "grad_norm": 0.8413757085800171, "learning_rate": 4.0097767223154513e-05, "loss": 0.9711, "step": 176460 }, { "epoch": 1.1274165314388664, "grad_norm": 0.9489029049873352, "learning_rate": 4.009284897331204e-05, "loss": 0.9959, "step": 176470 }, { "epoch": 1.1274804185886051, "grad_norm": 1.105385661125183, "learning_rate": 4.008793082324315e-05, "loss": 0.9225, "step": 176480 }, { "epoch": 1.1275443057383439, "grad_norm": 1.2501875162124634, "learning_rate": 4.0083012772997363e-05, "loss": 0.87, "step": 176490 }, { "epoch": 1.1276081928880826, "grad_norm": 2.948787212371826, "learning_rate": 4.0078094822624224e-05, "loss": 0.9624, "step": 176500 }, { "epoch": 1.1276720800378213, "grad_norm": 0.6365622282028198, "learning_rate": 4.007317697217325e-05, "loss": 0.8106, "step": 176510 }, { "epoch": 1.12773596718756, "grad_norm": 0.8782965540885925, "learning_rate": 4.006825922169397e-05, "loss": 0.9458, "step": 176520 }, { "epoch": 1.1277998543372987, "grad_norm": 3.0257232189178467, "learning_rate": 4.006334157123592e-05, "loss": 0.6978, "step": 176530 }, { "epoch": 1.1278637414870374, "grad_norm": 0.8372243046760559, "learning_rate": 4.005842402084861e-05, "loss": 1.2384, "step": 176540 }, { "epoch": 1.127927628636776, "grad_norm": 1.0190279483795166, "learning_rate": 4.0053506570581575e-05, "loss": 0.9559, "step": 176550 }, { "epoch": 1.1279915157865146, "grad_norm": 0.8211161494255066, "learning_rate": 4.004858922048433e-05, "loss": 0.8927, "step": 176560 }, { "epoch": 1.1280554029362535, "grad_norm": 0.9208956956863403, "learning_rate": 4.00436719706064e-05, "loss": 1.0404, "step": 176570 }, { "epoch": 1.128119290085992, "grad_norm": 0.8615883588790894, "learning_rate": 4.003875482099731e-05, "loss": 0.9103, "step": 176580 }, { "epoch": 1.128183177235731, "grad_norm": 1.2518213987350464, "learning_rate": 4.0033837771706576e-05, "loss": 0.8344, "step": 176590 }, { "epoch": 1.1282470643854694, "grad_norm": 2.0138180255889893, "learning_rate": 4.0028920822783716e-05, "loss": 0.7134, "step": 176600 }, { "epoch": 1.1283109515352083, "grad_norm": 1.0518046617507935, "learning_rate": 4.002400397427824e-05, "loss": 0.8504, "step": 176610 }, { "epoch": 1.1283748386849468, "grad_norm": 1.0148245096206665, "learning_rate": 4.0019087226239684e-05, "loss": 0.8871, "step": 176620 }, { "epoch": 1.1284387258346855, "grad_norm": 0.7396321296691895, "learning_rate": 4.001417057871756e-05, "loss": 0.7334, "step": 176630 }, { "epoch": 1.1285026129844242, "grad_norm": 0.7813929915428162, "learning_rate": 4.000925403176137e-05, "loss": 0.8969, "step": 176640 }, { "epoch": 1.128566500134163, "grad_norm": 0.9283764958381653, "learning_rate": 4.0004337585420635e-05, "loss": 0.6189, "step": 176650 }, { "epoch": 1.1286303872839016, "grad_norm": 0.765900194644928, "learning_rate": 3.999942123974487e-05, "loss": 0.9674, "step": 176660 }, { "epoch": 1.1286942744336403, "grad_norm": 2.070537805557251, "learning_rate": 3.999450499478359e-05, "loss": 1.0555, "step": 176670 }, { "epoch": 1.128758161583379, "grad_norm": 1.2113842964172363, "learning_rate": 3.99895888505863e-05, "loss": 1.0827, "step": 176680 }, { "epoch": 1.1288220487331178, "grad_norm": 0.9803450703620911, "learning_rate": 3.99846728072025e-05, "loss": 1.0341, "step": 176690 }, { "epoch": 1.1288859358828565, "grad_norm": 2.890547513961792, "learning_rate": 3.997975686468172e-05, "loss": 0.8549, "step": 176700 }, { "epoch": 1.1289498230325952, "grad_norm": 0.7865378260612488, "learning_rate": 3.997484102307345e-05, "loss": 0.7177, "step": 176710 }, { "epoch": 1.1290137101823339, "grad_norm": 0.8870431184768677, "learning_rate": 3.9969925282427205e-05, "loss": 1.0428, "step": 176720 }, { "epoch": 1.1290775973320726, "grad_norm": 0.9200987219810486, "learning_rate": 3.996500964279249e-05, "loss": 0.7899, "step": 176730 }, { "epoch": 1.1291414844818113, "grad_norm": 0.9197986721992493, "learning_rate": 3.996009410421881e-05, "loss": 0.8757, "step": 176740 }, { "epoch": 1.12920537163155, "grad_norm": 1.4534109830856323, "learning_rate": 3.995517866675568e-05, "loss": 0.8529, "step": 176750 }, { "epoch": 1.1292692587812887, "grad_norm": 1.1534450054168701, "learning_rate": 3.995026333045257e-05, "loss": 0.7974, "step": 176760 }, { "epoch": 1.1293331459310274, "grad_norm": 0.5616369843482971, "learning_rate": 3.994534809535901e-05, "loss": 0.9461, "step": 176770 }, { "epoch": 1.129397033080766, "grad_norm": 0.7991965413093567, "learning_rate": 3.99404329615245e-05, "loss": 0.7453, "step": 176780 }, { "epoch": 1.1294609202305048, "grad_norm": 0.9583500027656555, "learning_rate": 3.9935517928998534e-05, "loss": 0.9574, "step": 176790 }, { "epoch": 1.1295248073802435, "grad_norm": 0.9706417322158813, "learning_rate": 3.99306029978306e-05, "loss": 0.9761, "step": 176800 }, { "epoch": 1.1295886945299822, "grad_norm": 1.1579259634017944, "learning_rate": 3.9925688168070205e-05, "loss": 0.8768, "step": 176810 }, { "epoch": 1.129652581679721, "grad_norm": 0.9929895401000977, "learning_rate": 3.992077343976685e-05, "loss": 1.0702, "step": 176820 }, { "epoch": 1.1297164688294596, "grad_norm": 0.6023370027542114, "learning_rate": 3.991585881297002e-05, "loss": 0.921, "step": 176830 }, { "epoch": 1.1297803559791983, "grad_norm": 1.0571560859680176, "learning_rate": 3.991094428772922e-05, "loss": 0.8577, "step": 176840 }, { "epoch": 1.129844243128937, "grad_norm": 2.328549861907959, "learning_rate": 3.990602986409394e-05, "loss": 0.7989, "step": 176850 }, { "epoch": 1.1299081302786758, "grad_norm": 2.132355213165283, "learning_rate": 3.99011155421137e-05, "loss": 1.0796, "step": 176860 }, { "epoch": 1.1299720174284145, "grad_norm": 0.7115640640258789, "learning_rate": 3.9896201321837936e-05, "loss": 0.91, "step": 176870 }, { "epoch": 1.1300359045781532, "grad_norm": 0.7301086783409119, "learning_rate": 3.9891287203316164e-05, "loss": 0.8216, "step": 176880 }, { "epoch": 1.1300997917278919, "grad_norm": 0.8626973032951355, "learning_rate": 3.988637318659788e-05, "loss": 0.7884, "step": 176890 }, { "epoch": 1.1301636788776306, "grad_norm": 1.646178126335144, "learning_rate": 3.988145927173256e-05, "loss": 1.0285, "step": 176900 }, { "epoch": 1.1302275660273693, "grad_norm": 0.8210130333900452, "learning_rate": 3.987654545876971e-05, "loss": 0.6995, "step": 176910 }, { "epoch": 1.130291453177108, "grad_norm": 1.1210635900497437, "learning_rate": 3.98716317477588e-05, "loss": 0.9967, "step": 176920 }, { "epoch": 1.1303553403268467, "grad_norm": 0.7595154643058777, "learning_rate": 3.98667181387493e-05, "loss": 0.9252, "step": 176930 }, { "epoch": 1.1304192274765854, "grad_norm": 0.8323392868041992, "learning_rate": 3.986180463179074e-05, "loss": 0.9651, "step": 176940 }, { "epoch": 1.1304831146263241, "grad_norm": 1.6904256343841553, "learning_rate": 3.985689122693257e-05, "loss": 0.8427, "step": 176950 }, { "epoch": 1.1305470017760628, "grad_norm": 0.7488362789154053, "learning_rate": 3.985197792422428e-05, "loss": 0.9075, "step": 176960 }, { "epoch": 1.1306108889258015, "grad_norm": 0.6929590702056885, "learning_rate": 3.984706472371535e-05, "loss": 0.9052, "step": 176970 }, { "epoch": 1.1306747760755402, "grad_norm": 1.0957715511322021, "learning_rate": 3.984215162545527e-05, "loss": 0.8558, "step": 176980 }, { "epoch": 1.130738663225279, "grad_norm": 1.0007911920547485, "learning_rate": 3.983723862949351e-05, "loss": 0.9489, "step": 176990 }, { "epoch": 1.1308025503750176, "grad_norm": 2.169706344604492, "learning_rate": 3.983232573587955e-05, "loss": 1.0108, "step": 177000 }, { "epoch": 1.1308664375247564, "grad_norm": 0.8061663508415222, "learning_rate": 3.9827412944662856e-05, "loss": 0.7875, "step": 177010 }, { "epoch": 1.130930324674495, "grad_norm": 0.8821120262145996, "learning_rate": 3.982250025589292e-05, "loss": 0.9757, "step": 177020 }, { "epoch": 1.1309942118242338, "grad_norm": 1.5471516847610474, "learning_rate": 3.9817587669619214e-05, "loss": 1.2905, "step": 177030 }, { "epoch": 1.1310580989739725, "grad_norm": 0.76194828748703, "learning_rate": 3.981267518589121e-05, "loss": 0.7426, "step": 177040 }, { "epoch": 1.131121986123711, "grad_norm": 1.5704962015151978, "learning_rate": 3.980776280475838e-05, "loss": 1.1536, "step": 177050 }, { "epoch": 1.1311858732734499, "grad_norm": 0.8537958860397339, "learning_rate": 3.9802850526270184e-05, "loss": 1.0459, "step": 177060 }, { "epoch": 1.1312497604231884, "grad_norm": 0.8242719173431396, "learning_rate": 3.9797938350476116e-05, "loss": 0.6936, "step": 177070 }, { "epoch": 1.1313136475729273, "grad_norm": 0.935931384563446, "learning_rate": 3.979302627742564e-05, "loss": 0.7973, "step": 177080 }, { "epoch": 1.1313775347226658, "grad_norm": 0.7640109062194824, "learning_rate": 3.978811430716821e-05, "loss": 0.7046, "step": 177090 }, { "epoch": 1.1314414218724047, "grad_norm": 1.0798890590667725, "learning_rate": 3.9783202439753303e-05, "loss": 1.0424, "step": 177100 }, { "epoch": 1.1315053090221432, "grad_norm": 0.8156636953353882, "learning_rate": 3.977829067523039e-05, "loss": 0.6593, "step": 177110 }, { "epoch": 1.131569196171882, "grad_norm": 0.8849560022354126, "learning_rate": 3.977337901364893e-05, "loss": 1.1453, "step": 177120 }, { "epoch": 1.1316330833216206, "grad_norm": 0.9597598910331726, "learning_rate": 3.9768467455058395e-05, "loss": 0.7509, "step": 177130 }, { "epoch": 1.1316969704713593, "grad_norm": 0.8824740648269653, "learning_rate": 3.9763555999508226e-05, "loss": 0.6749, "step": 177140 }, { "epoch": 1.131760857621098, "grad_norm": 0.6336933970451355, "learning_rate": 3.975864464704793e-05, "loss": 0.7753, "step": 177150 }, { "epoch": 1.1318247447708367, "grad_norm": 1.0209406614303589, "learning_rate": 3.9753733397726925e-05, "loss": 0.8533, "step": 177160 }, { "epoch": 1.1318886319205754, "grad_norm": 1.1303939819335938, "learning_rate": 3.97488222515947e-05, "loss": 0.8434, "step": 177170 }, { "epoch": 1.1319525190703141, "grad_norm": 0.7389097213745117, "learning_rate": 3.97439112087007e-05, "loss": 0.9985, "step": 177180 }, { "epoch": 1.1320164062200528, "grad_norm": 0.8295130729675293, "learning_rate": 3.9739000269094385e-05, "loss": 1.0372, "step": 177190 }, { "epoch": 1.1320802933697915, "grad_norm": 0.9382843375205994, "learning_rate": 3.9734089432825216e-05, "loss": 0.8728, "step": 177200 }, { "epoch": 1.1321441805195303, "grad_norm": 1.4782503843307495, "learning_rate": 3.9729178699942646e-05, "loss": 0.8717, "step": 177210 }, { "epoch": 1.132208067669269, "grad_norm": 0.9647573828697205, "learning_rate": 3.972426807049614e-05, "loss": 0.9883, "step": 177220 }, { "epoch": 1.1322719548190077, "grad_norm": 0.8529700636863708, "learning_rate": 3.9719357544535134e-05, "loss": 0.9342, "step": 177230 }, { "epoch": 1.1323358419687464, "grad_norm": 0.7517978549003601, "learning_rate": 3.97144471221091e-05, "loss": 0.9383, "step": 177240 }, { "epoch": 1.132399729118485, "grad_norm": 0.7577384114265442, "learning_rate": 3.9709536803267475e-05, "loss": 0.7, "step": 177250 }, { "epoch": 1.1324636162682238, "grad_norm": 0.7614976763725281, "learning_rate": 3.9704626588059715e-05, "loss": 0.9475, "step": 177260 }, { "epoch": 1.1325275034179625, "grad_norm": 0.9894033670425415, "learning_rate": 3.969971647653528e-05, "loss": 0.7899, "step": 177270 }, { "epoch": 1.1325913905677012, "grad_norm": 1.0283654928207397, "learning_rate": 3.969480646874361e-05, "loss": 0.8041, "step": 177280 }, { "epoch": 1.13265527771744, "grad_norm": 0.8264014720916748, "learning_rate": 3.968989656473415e-05, "loss": 0.8589, "step": 177290 }, { "epoch": 1.1327191648671786, "grad_norm": 0.9640235900878906, "learning_rate": 3.968498676455635e-05, "loss": 0.7731, "step": 177300 }, { "epoch": 1.1327830520169173, "grad_norm": 1.018362283706665, "learning_rate": 3.968007706825966e-05, "loss": 0.9082, "step": 177310 }, { "epoch": 1.132846939166656, "grad_norm": 0.6184595227241516, "learning_rate": 3.967516747589352e-05, "loss": 1.0333, "step": 177320 }, { "epoch": 1.1329108263163947, "grad_norm": 0.9920672178268433, "learning_rate": 3.967025798750738e-05, "loss": 0.8722, "step": 177330 }, { "epoch": 1.1329747134661334, "grad_norm": 1.114719271659851, "learning_rate": 3.966534860315069e-05, "loss": 0.7463, "step": 177340 }, { "epoch": 1.1330386006158721, "grad_norm": 0.7361113429069519, "learning_rate": 3.966043932287286e-05, "loss": 0.8014, "step": 177350 }, { "epoch": 1.1331024877656108, "grad_norm": 0.8126183748245239, "learning_rate": 3.965553014672336e-05, "loss": 1.0529, "step": 177360 }, { "epoch": 1.1331663749153496, "grad_norm": 0.9899440407752991, "learning_rate": 3.965062107475161e-05, "loss": 0.6825, "step": 177370 }, { "epoch": 1.1332302620650883, "grad_norm": 1.2756019830703735, "learning_rate": 3.964571210700708e-05, "loss": 0.7693, "step": 177380 }, { "epoch": 1.133294149214827, "grad_norm": 0.8842296600341797, "learning_rate": 3.9640803243539174e-05, "loss": 0.7919, "step": 177390 }, { "epoch": 1.1333580363645657, "grad_norm": 1.163802981376648, "learning_rate": 3.963589448439734e-05, "loss": 1.0671, "step": 177400 }, { "epoch": 1.1334219235143044, "grad_norm": 1.0410068035125732, "learning_rate": 3.9630985829631014e-05, "loss": 0.9584, "step": 177410 }, { "epoch": 1.133485810664043, "grad_norm": 0.9730801582336426, "learning_rate": 3.962607727928963e-05, "loss": 1.0008, "step": 177420 }, { "epoch": 1.1335496978137818, "grad_norm": 0.867691159248352, "learning_rate": 3.962116883342263e-05, "loss": 0.7916, "step": 177430 }, { "epoch": 1.1336135849635205, "grad_norm": 0.998274028301239, "learning_rate": 3.961626049207943e-05, "loss": 0.7744, "step": 177440 }, { "epoch": 1.1336774721132592, "grad_norm": 1.0011632442474365, "learning_rate": 3.961135225530947e-05, "loss": 1.0658, "step": 177450 }, { "epoch": 1.133741359262998, "grad_norm": 0.8723202347755432, "learning_rate": 3.9606444123162176e-05, "loss": 0.8927, "step": 177460 }, { "epoch": 1.1338052464127366, "grad_norm": 1.3066819906234741, "learning_rate": 3.9601536095687e-05, "loss": 1.3038, "step": 177470 }, { "epoch": 1.1338691335624753, "grad_norm": 0.928521454334259, "learning_rate": 3.959662817293334e-05, "loss": 0.6839, "step": 177480 }, { "epoch": 1.133933020712214, "grad_norm": 0.982753574848175, "learning_rate": 3.959172035495064e-05, "loss": 0.7935, "step": 177490 }, { "epoch": 1.1339969078619527, "grad_norm": 0.7935505509376526, "learning_rate": 3.958681264178831e-05, "loss": 0.9875, "step": 177500 }, { "epoch": 1.1340607950116914, "grad_norm": 0.9873987436294556, "learning_rate": 3.958190503349579e-05, "loss": 0.9934, "step": 177510 }, { "epoch": 1.1341246821614301, "grad_norm": 0.8302233219146729, "learning_rate": 3.9576997530122505e-05, "loss": 1.0712, "step": 177520 }, { "epoch": 1.1341885693111688, "grad_norm": 0.8561579585075378, "learning_rate": 3.9572090131717865e-05, "loss": 1.0051, "step": 177530 }, { "epoch": 1.1342524564609073, "grad_norm": 0.733059287071228, "learning_rate": 3.956718283833131e-05, "loss": 0.8051, "step": 177540 }, { "epoch": 1.1343163436106463, "grad_norm": 0.8287229537963867, "learning_rate": 3.9562275650012234e-05, "loss": 1.0124, "step": 177550 }, { "epoch": 1.1343802307603847, "grad_norm": 3.3876659870147705, "learning_rate": 3.955736856681008e-05, "loss": 0.8488, "step": 177560 }, { "epoch": 1.1344441179101237, "grad_norm": 1.316805124282837, "learning_rate": 3.955246158877426e-05, "loss": 0.9535, "step": 177570 }, { "epoch": 1.1345080050598622, "grad_norm": 0.6540564894676208, "learning_rate": 3.954755471595419e-05, "loss": 0.6977, "step": 177580 }, { "epoch": 1.134571892209601, "grad_norm": 0.8143235445022583, "learning_rate": 3.954264794839929e-05, "loss": 0.8273, "step": 177590 }, { "epoch": 1.1346357793593396, "grad_norm": 2.201139211654663, "learning_rate": 3.9537741286158966e-05, "loss": 0.8558, "step": 177600 }, { "epoch": 1.1346996665090783, "grad_norm": 1.5337715148925781, "learning_rate": 3.953283472928264e-05, "loss": 0.8518, "step": 177610 }, { "epoch": 1.134763553658817, "grad_norm": 0.6330432295799255, "learning_rate": 3.952792827781972e-05, "loss": 0.9595, "step": 177620 }, { "epoch": 1.1348274408085557, "grad_norm": 0.9442708492279053, "learning_rate": 3.952302193181963e-05, "loss": 0.7393, "step": 177630 }, { "epoch": 1.1348913279582944, "grad_norm": 0.9224326014518738, "learning_rate": 3.951811569133176e-05, "loss": 1.1248, "step": 177640 }, { "epoch": 1.134955215108033, "grad_norm": 0.8346001505851746, "learning_rate": 3.9513209556405546e-05, "loss": 0.8125, "step": 177650 }, { "epoch": 1.1350191022577718, "grad_norm": 0.6194186210632324, "learning_rate": 3.950830352709038e-05, "loss": 0.6402, "step": 177660 }, { "epoch": 1.1350829894075105, "grad_norm": 0.8308387994766235, "learning_rate": 3.950339760343566e-05, "loss": 0.667, "step": 177670 }, { "epoch": 1.1351468765572492, "grad_norm": 3.698460817337036, "learning_rate": 3.949849178549082e-05, "loss": 0.9149, "step": 177680 }, { "epoch": 1.135210763706988, "grad_norm": 0.8807551860809326, "learning_rate": 3.949358607330525e-05, "loss": 0.9277, "step": 177690 }, { "epoch": 1.1352746508567266, "grad_norm": 0.4799681305885315, "learning_rate": 3.948868046692837e-05, "loss": 0.8688, "step": 177700 }, { "epoch": 1.1353385380064653, "grad_norm": 1.0262006521224976, "learning_rate": 3.948377496640956e-05, "loss": 0.8444, "step": 177710 }, { "epoch": 1.135402425156204, "grad_norm": 1.2053029537200928, "learning_rate": 3.947886957179824e-05, "loss": 0.8337, "step": 177720 }, { "epoch": 1.1354663123059427, "grad_norm": 0.8215839266777039, "learning_rate": 3.94739642831438e-05, "loss": 1.1505, "step": 177730 }, { "epoch": 1.1355301994556815, "grad_norm": 0.8201315402984619, "learning_rate": 3.946905910049564e-05, "loss": 0.5706, "step": 177740 }, { "epoch": 1.1355940866054202, "grad_norm": 1.062843918800354, "learning_rate": 3.9464154023903176e-05, "loss": 0.728, "step": 177750 }, { "epoch": 1.1356579737551589, "grad_norm": 0.9273681640625, "learning_rate": 3.94592490534158e-05, "loss": 0.8214, "step": 177760 }, { "epoch": 1.1357218609048976, "grad_norm": 0.9091299176216125, "learning_rate": 3.9454344189082893e-05, "loss": 0.7926, "step": 177770 }, { "epoch": 1.1357857480546363, "grad_norm": 0.6764705181121826, "learning_rate": 3.944943943095387e-05, "loss": 0.7392, "step": 177780 }, { "epoch": 1.135849635204375, "grad_norm": 1.0174916982650757, "learning_rate": 3.944453477907812e-05, "loss": 1.0032, "step": 177790 }, { "epoch": 1.1359135223541137, "grad_norm": 1.1083827018737793, "learning_rate": 3.943963023350503e-05, "loss": 0.7908, "step": 177800 }, { "epoch": 1.1359774095038524, "grad_norm": 1.1159414052963257, "learning_rate": 3.9434725794284e-05, "loss": 0.8939, "step": 177810 }, { "epoch": 1.136041296653591, "grad_norm": 0.6911755800247192, "learning_rate": 3.9429821461464435e-05, "loss": 1.0674, "step": 177820 }, { "epoch": 1.1361051838033298, "grad_norm": 0.6208989024162292, "learning_rate": 3.942491723509571e-05, "loss": 0.9803, "step": 177830 }, { "epoch": 1.1361690709530685, "grad_norm": 1.1319308280944824, "learning_rate": 3.942001311522721e-05, "loss": 0.7391, "step": 177840 }, { "epoch": 1.1362329581028072, "grad_norm": 0.943028450012207, "learning_rate": 3.941510910190833e-05, "loss": 0.8243, "step": 177850 }, { "epoch": 1.136296845252546, "grad_norm": 0.8729805946350098, "learning_rate": 3.941020519518846e-05, "loss": 0.8554, "step": 177860 }, { "epoch": 1.1363607324022846, "grad_norm": 2.452819585800171, "learning_rate": 3.940530139511699e-05, "loss": 1.1151, "step": 177870 }, { "epoch": 1.1364246195520233, "grad_norm": 0.8181469440460205, "learning_rate": 3.940039770174329e-05, "loss": 0.7646, "step": 177880 }, { "epoch": 1.136488506701762, "grad_norm": 0.7301978468894958, "learning_rate": 3.939549411511676e-05, "loss": 0.8143, "step": 177890 }, { "epoch": 1.1365523938515008, "grad_norm": 1.052147388458252, "learning_rate": 3.939059063528678e-05, "loss": 0.9841, "step": 177900 }, { "epoch": 1.1366162810012395, "grad_norm": 1.142288327217102, "learning_rate": 3.938568726230273e-05, "loss": 0.9373, "step": 177910 }, { "epoch": 1.1366801681509782, "grad_norm": 3.7667288780212402, "learning_rate": 3.9380783996214e-05, "loss": 1.0897, "step": 177920 }, { "epoch": 1.1367440553007169, "grad_norm": 0.8383669853210449, "learning_rate": 3.9375880837069945e-05, "loss": 0.9187, "step": 177930 }, { "epoch": 1.1368079424504556, "grad_norm": 0.7958370447158813, "learning_rate": 3.937097778491997e-05, "loss": 0.6835, "step": 177940 }, { "epoch": 1.1368718296001943, "grad_norm": 0.8150979280471802, "learning_rate": 3.9366074839813446e-05, "loss": 1.0067, "step": 177950 }, { "epoch": 1.136935716749933, "grad_norm": 0.7327728867530823, "learning_rate": 3.9361172001799744e-05, "loss": 0.7047, "step": 177960 }, { "epoch": 1.1369996038996717, "grad_norm": 0.9451047778129578, "learning_rate": 3.935626927092825e-05, "loss": 0.9296, "step": 177970 }, { "epoch": 1.1370634910494104, "grad_norm": 1.3871461153030396, "learning_rate": 3.9351366647248325e-05, "loss": 0.9513, "step": 177980 }, { "epoch": 1.137127378199149, "grad_norm": 1.4654449224472046, "learning_rate": 3.934646413080934e-05, "loss": 1.026, "step": 177990 }, { "epoch": 1.1371912653488878, "grad_norm": 1.2127928733825684, "learning_rate": 3.934156172166069e-05, "loss": 0.8672, "step": 178000 }, { "epoch": 1.1372551524986265, "grad_norm": 1.219888687133789, "learning_rate": 3.933665941985174e-05, "loss": 0.7495, "step": 178010 }, { "epoch": 1.1373190396483652, "grad_norm": 0.9994648098945618, "learning_rate": 3.933175722543185e-05, "loss": 1.1691, "step": 178020 }, { "epoch": 1.1373829267981037, "grad_norm": 1.248658537864685, "learning_rate": 3.9326855138450396e-05, "loss": 0.8557, "step": 178030 }, { "epoch": 1.1374468139478426, "grad_norm": 0.9416319727897644, "learning_rate": 3.932195315895674e-05, "loss": 0.7892, "step": 178040 }, { "epoch": 1.1375107010975811, "grad_norm": 1.1696947813034058, "learning_rate": 3.9317051287000264e-05, "loss": 0.8266, "step": 178050 }, { "epoch": 1.13757458824732, "grad_norm": 1.167878270149231, "learning_rate": 3.931214952263031e-05, "loss": 1.0848, "step": 178060 }, { "epoch": 1.1376384753970585, "grad_norm": 1.0277177095413208, "learning_rate": 3.930724786589626e-05, "loss": 0.9707, "step": 178070 }, { "epoch": 1.1377023625467972, "grad_norm": 1.651376724243164, "learning_rate": 3.9302346316847484e-05, "loss": 1.1477, "step": 178080 }, { "epoch": 1.137766249696536, "grad_norm": 1.2855274677276611, "learning_rate": 3.9297444875533324e-05, "loss": 0.7331, "step": 178090 }, { "epoch": 1.1378301368462747, "grad_norm": 1.2985841035842896, "learning_rate": 3.929254354200316e-05, "loss": 0.8664, "step": 178100 }, { "epoch": 1.1378940239960134, "grad_norm": 0.9524058103561401, "learning_rate": 3.928764231630634e-05, "loss": 0.7921, "step": 178110 }, { "epoch": 1.137957911145752, "grad_norm": 0.7414030432701111, "learning_rate": 3.928274119849223e-05, "loss": 0.8939, "step": 178120 }, { "epoch": 1.1380217982954908, "grad_norm": 0.6210494637489319, "learning_rate": 3.9277840188610197e-05, "loss": 0.9458, "step": 178130 }, { "epoch": 1.1380856854452295, "grad_norm": 0.8775937557220459, "learning_rate": 3.927293928670958e-05, "loss": 0.9972, "step": 178140 }, { "epoch": 1.1381495725949682, "grad_norm": 0.8039231896400452, "learning_rate": 3.926803849283975e-05, "loss": 0.7796, "step": 178150 }, { "epoch": 1.138213459744707, "grad_norm": 1.4437992572784424, "learning_rate": 3.926313780705005e-05, "loss": 0.8891, "step": 178160 }, { "epoch": 1.1382773468944456, "grad_norm": 1.6523823738098145, "learning_rate": 3.925823722938985e-05, "loss": 1.0061, "step": 178170 }, { "epoch": 1.1383412340441843, "grad_norm": 0.7780923843383789, "learning_rate": 3.925333675990849e-05, "loss": 0.9331, "step": 178180 }, { "epoch": 1.138405121193923, "grad_norm": 1.6156859397888184, "learning_rate": 3.924843639865531e-05, "loss": 0.6508, "step": 178190 }, { "epoch": 1.1384690083436617, "grad_norm": 3.4931466579437256, "learning_rate": 3.92435361456797e-05, "loss": 0.9637, "step": 178200 }, { "epoch": 1.1385328954934004, "grad_norm": 1.5247029066085815, "learning_rate": 3.9238636001030974e-05, "loss": 0.9032, "step": 178210 }, { "epoch": 1.1385967826431391, "grad_norm": 1.0243054628372192, "learning_rate": 3.92337359647585e-05, "loss": 0.928, "step": 178220 }, { "epoch": 1.1386606697928778, "grad_norm": 0.9085797071456909, "learning_rate": 3.922883603691162e-05, "loss": 0.9142, "step": 178230 }, { "epoch": 1.1387245569426165, "grad_norm": 0.8062956929206848, "learning_rate": 3.922393621753968e-05, "loss": 0.7967, "step": 178240 }, { "epoch": 1.1387884440923552, "grad_norm": 0.6442756056785583, "learning_rate": 3.921903650669202e-05, "loss": 1.1538, "step": 178250 }, { "epoch": 1.138852331242094, "grad_norm": 0.8777545094490051, "learning_rate": 3.9214136904417986e-05, "loss": 0.8731, "step": 178260 }, { "epoch": 1.1389162183918327, "grad_norm": 1.089011311531067, "learning_rate": 3.920923741076693e-05, "loss": 0.6851, "step": 178270 }, { "epoch": 1.1389801055415714, "grad_norm": 0.9241474866867065, "learning_rate": 3.920433802578819e-05, "loss": 0.9525, "step": 178280 }, { "epoch": 1.13904399269131, "grad_norm": 1.746314287185669, "learning_rate": 3.91994387495311e-05, "loss": 1.2368, "step": 178290 }, { "epoch": 1.1391078798410488, "grad_norm": 0.8153021931648254, "learning_rate": 3.919453958204502e-05, "loss": 0.7242, "step": 178300 }, { "epoch": 1.1391717669907875, "grad_norm": 0.8815270662307739, "learning_rate": 3.918964052337927e-05, "loss": 0.7458, "step": 178310 }, { "epoch": 1.1392356541405262, "grad_norm": 0.9482192993164062, "learning_rate": 3.918474157358318e-05, "loss": 0.8227, "step": 178320 }, { "epoch": 1.139299541290265, "grad_norm": 0.9908492565155029, "learning_rate": 3.9179842732706114e-05, "loss": 0.9984, "step": 178330 }, { "epoch": 1.1393634284400036, "grad_norm": 0.5842317342758179, "learning_rate": 3.917494400079738e-05, "loss": 0.7083, "step": 178340 }, { "epoch": 1.1394273155897423, "grad_norm": 0.8298249840736389, "learning_rate": 3.917004537790633e-05, "loss": 0.816, "step": 178350 }, { "epoch": 1.139491202739481, "grad_norm": 1.1871623992919922, "learning_rate": 3.916514686408229e-05, "loss": 0.7508, "step": 178360 }, { "epoch": 1.1395550898892197, "grad_norm": 0.6309062242507935, "learning_rate": 3.916024845937459e-05, "loss": 0.7599, "step": 178370 }, { "epoch": 1.1396189770389584, "grad_norm": 0.6612233519554138, "learning_rate": 3.9155350163832575e-05, "loss": 0.8779, "step": 178380 }, { "epoch": 1.1396828641886971, "grad_norm": 1.1284621953964233, "learning_rate": 3.915045197750556e-05, "loss": 1.2675, "step": 178390 }, { "epoch": 1.1397467513384358, "grad_norm": 1.0002460479736328, "learning_rate": 3.9145553900442886e-05, "loss": 0.8455, "step": 178400 }, { "epoch": 1.1398106384881745, "grad_norm": 0.8611626625061035, "learning_rate": 3.914065593269387e-05, "loss": 0.8965, "step": 178410 }, { "epoch": 1.1398745256379132, "grad_norm": 1.169734001159668, "learning_rate": 3.9135758074307846e-05, "loss": 0.8153, "step": 178420 }, { "epoch": 1.139938412787652, "grad_norm": 0.885779857635498, "learning_rate": 3.913086032533413e-05, "loss": 0.8775, "step": 178430 }, { "epoch": 1.1400022999373907, "grad_norm": 0.981346607208252, "learning_rate": 3.912596268582206e-05, "loss": 0.8037, "step": 178440 }, { "epoch": 1.1400661870871294, "grad_norm": 1.3203126192092896, "learning_rate": 3.9121554903891656e-05, "loss": 0.8425, "step": 178450 }, { "epoch": 1.140130074236868, "grad_norm": 0.7057384848594666, "learning_rate": 3.911665747249259e-05, "loss": 0.7483, "step": 178460 }, { "epoch": 1.1401939613866068, "grad_norm": 0.6951310634613037, "learning_rate": 3.91117601506982e-05, "loss": 1.031, "step": 178470 }, { "epoch": 1.1402578485363455, "grad_norm": 0.8375203013420105, "learning_rate": 3.91068629385578e-05, "loss": 0.7288, "step": 178480 }, { "epoch": 1.1403217356860842, "grad_norm": 0.8579121232032776, "learning_rate": 3.910196583612071e-05, "loss": 0.919, "step": 178490 }, { "epoch": 1.140385622835823, "grad_norm": 1.0421398878097534, "learning_rate": 3.909706884343625e-05, "loss": 1.0143, "step": 178500 }, { "epoch": 1.1404495099855616, "grad_norm": 0.5818277597427368, "learning_rate": 3.9092171960553745e-05, "loss": 1.0266, "step": 178510 }, { "epoch": 1.1405133971353, "grad_norm": 1.5515040159225464, "learning_rate": 3.908727518752251e-05, "loss": 0.9996, "step": 178520 }, { "epoch": 1.140577284285039, "grad_norm": 0.9544771909713745, "learning_rate": 3.908237852439185e-05, "loss": 0.8596, "step": 178530 }, { "epoch": 1.1406411714347775, "grad_norm": 1.0081593990325928, "learning_rate": 3.9077481971211075e-05, "loss": 0.6763, "step": 178540 }, { "epoch": 1.1407050585845164, "grad_norm": 0.8507857918739319, "learning_rate": 3.907258552802951e-05, "loss": 1.3678, "step": 178550 }, { "epoch": 1.140768945734255, "grad_norm": 0.8438236713409424, "learning_rate": 3.9067689194896476e-05, "loss": 0.8784, "step": 178560 }, { "epoch": 1.1408328328839936, "grad_norm": 0.8789088129997253, "learning_rate": 3.906279297186125e-05, "loss": 0.7002, "step": 178570 }, { "epoch": 1.1408967200337323, "grad_norm": 0.8186583518981934, "learning_rate": 3.905789685897318e-05, "loss": 0.866, "step": 178580 }, { "epoch": 1.140960607183471, "grad_norm": 0.7446259260177612, "learning_rate": 3.905300085628154e-05, "loss": 0.8398, "step": 178590 }, { "epoch": 1.1410244943332097, "grad_norm": 1.0921506881713867, "learning_rate": 3.9048104963835654e-05, "loss": 1.1518, "step": 178600 }, { "epoch": 1.1410883814829484, "grad_norm": 1.0900102853775024, "learning_rate": 3.904320918168483e-05, "loss": 0.8006, "step": 178610 }, { "epoch": 1.1411522686326872, "grad_norm": 0.7984510660171509, "learning_rate": 3.9038313509878365e-05, "loss": 0.8165, "step": 178620 }, { "epoch": 1.1412161557824259, "grad_norm": 0.9651395678520203, "learning_rate": 3.9033417948465554e-05, "loss": 1.1725, "step": 178630 }, { "epoch": 1.1412800429321646, "grad_norm": 0.9538650512695312, "learning_rate": 3.902852249749573e-05, "loss": 0.7891, "step": 178640 }, { "epoch": 1.1413439300819033, "grad_norm": 0.9638974070549011, "learning_rate": 3.9023627157018174e-05, "loss": 0.7188, "step": 178650 }, { "epoch": 1.141407817231642, "grad_norm": 0.8554356694221497, "learning_rate": 3.901873192708219e-05, "loss": 1.0519, "step": 178660 }, { "epoch": 1.1414717043813807, "grad_norm": 0.9228730797767639, "learning_rate": 3.9013836807737064e-05, "loss": 1.0421, "step": 178670 }, { "epoch": 1.1415355915311194, "grad_norm": 0.788002610206604, "learning_rate": 3.9008941799032116e-05, "loss": 0.8416, "step": 178680 }, { "epoch": 1.141599478680858, "grad_norm": 0.4219997823238373, "learning_rate": 3.9004046901016634e-05, "loss": 0.843, "step": 178690 }, { "epoch": 1.1416633658305968, "grad_norm": 0.9031209945678711, "learning_rate": 3.899915211373991e-05, "loss": 0.9158, "step": 178700 }, { "epoch": 1.1417272529803355, "grad_norm": 0.7229623794555664, "learning_rate": 3.899425743725124e-05, "loss": 0.9196, "step": 178710 }, { "epoch": 1.1417911401300742, "grad_norm": 0.8259024620056152, "learning_rate": 3.898936287159992e-05, "loss": 0.9001, "step": 178720 }, { "epoch": 1.141855027279813, "grad_norm": 1.3349802494049072, "learning_rate": 3.898446841683523e-05, "loss": 0.8477, "step": 178730 }, { "epoch": 1.1419189144295516, "grad_norm": 0.7066223621368408, "learning_rate": 3.8979574073006485e-05, "loss": 0.8269, "step": 178740 }, { "epoch": 1.1419828015792903, "grad_norm": 1.2465709447860718, "learning_rate": 3.897467984016296e-05, "loss": 1.0771, "step": 178750 }, { "epoch": 1.142046688729029, "grad_norm": 0.7932277321815491, "learning_rate": 3.896978571835395e-05, "loss": 0.7609, "step": 178760 }, { "epoch": 1.1421105758787677, "grad_norm": 1.180996298789978, "learning_rate": 3.8964891707628745e-05, "loss": 0.7902, "step": 178770 }, { "epoch": 1.1421744630285064, "grad_norm": 1.8655349016189575, "learning_rate": 3.895999780803662e-05, "loss": 0.6203, "step": 178780 }, { "epoch": 1.1422383501782452, "grad_norm": 0.9006361365318298, "learning_rate": 3.8955104019626865e-05, "loss": 0.9835, "step": 178790 }, { "epoch": 1.1423022373279839, "grad_norm": 1.117305040359497, "learning_rate": 3.895021034244878e-05, "loss": 0.8953, "step": 178800 }, { "epoch": 1.1423661244777226, "grad_norm": 1.0132900476455688, "learning_rate": 3.894531677655162e-05, "loss": 0.9854, "step": 178810 }, { "epoch": 1.1424300116274613, "grad_norm": 1.055531620979309, "learning_rate": 3.89404233219847e-05, "loss": 0.551, "step": 178820 }, { "epoch": 1.1424938987772, "grad_norm": 1.010424256324768, "learning_rate": 3.893552997879727e-05, "loss": 1.0174, "step": 178830 }, { "epoch": 1.1425577859269387, "grad_norm": 0.947930097579956, "learning_rate": 3.893063674703862e-05, "loss": 0.9078, "step": 178840 }, { "epoch": 1.1426216730766774, "grad_norm": 1.0680005550384521, "learning_rate": 3.892574362675805e-05, "loss": 0.9613, "step": 178850 }, { "epoch": 1.142685560226416, "grad_norm": 1.8149521350860596, "learning_rate": 3.8920850618004825e-05, "loss": 0.7425, "step": 178860 }, { "epoch": 1.1427494473761548, "grad_norm": 1.3183022737503052, "learning_rate": 3.891595772082821e-05, "loss": 1.0288, "step": 178870 }, { "epoch": 1.1428133345258935, "grad_norm": 0.9927735924720764, "learning_rate": 3.891106493527749e-05, "loss": 0.6818, "step": 178880 }, { "epoch": 1.1428772216756322, "grad_norm": 0.6876515746116638, "learning_rate": 3.8906172261401944e-05, "loss": 1.0061, "step": 178890 }, { "epoch": 1.142941108825371, "grad_norm": 0.6178897619247437, "learning_rate": 3.8901279699250833e-05, "loss": 0.7599, "step": 178900 }, { "epoch": 1.1430049959751096, "grad_norm": 0.7962726950645447, "learning_rate": 3.889638724887345e-05, "loss": 0.9557, "step": 178910 }, { "epoch": 1.1430688831248483, "grad_norm": 1.6612660884857178, "learning_rate": 3.889149491031905e-05, "loss": 0.9919, "step": 178920 }, { "epoch": 1.143132770274587, "grad_norm": 2.1932833194732666, "learning_rate": 3.88866026836369e-05, "loss": 0.9959, "step": 178930 }, { "epoch": 1.1431966574243257, "grad_norm": 1.0773873329162598, "learning_rate": 3.888171056887628e-05, "loss": 0.9211, "step": 178940 }, { "epoch": 1.1432605445740645, "grad_norm": 0.9403598308563232, "learning_rate": 3.887681856608646e-05, "loss": 0.7464, "step": 178950 }, { "epoch": 1.1433244317238032, "grad_norm": 0.6172276139259338, "learning_rate": 3.8871926675316696e-05, "loss": 0.8862, "step": 178960 }, { "epoch": 1.1433883188735419, "grad_norm": 0.8478199243545532, "learning_rate": 3.886703489661625e-05, "loss": 1.1241, "step": 178970 }, { "epoch": 1.1434522060232806, "grad_norm": 1.059793472290039, "learning_rate": 3.8862143230034395e-05, "loss": 0.8413, "step": 178980 }, { "epoch": 1.143516093173019, "grad_norm": 1.063503384590149, "learning_rate": 3.88572516756204e-05, "loss": 0.6765, "step": 178990 }, { "epoch": 1.143579980322758, "grad_norm": 1.4878591299057007, "learning_rate": 3.8852360233423515e-05, "loss": 1.0485, "step": 179000 }, { "epoch": 1.1436438674724965, "grad_norm": 0.5734277367591858, "learning_rate": 3.884746890349301e-05, "loss": 0.9841, "step": 179010 }, { "epoch": 1.1437077546222354, "grad_norm": 1.1828947067260742, "learning_rate": 3.8842577685878136e-05, "loss": 1.053, "step": 179020 }, { "epoch": 1.1437716417719739, "grad_norm": 0.7959204316139221, "learning_rate": 3.883768658062816e-05, "loss": 0.8519, "step": 179030 }, { "epoch": 1.1438355289217128, "grad_norm": 1.2506985664367676, "learning_rate": 3.883279558779234e-05, "loss": 0.8219, "step": 179040 }, { "epoch": 1.1438994160714513, "grad_norm": 0.72293621301651, "learning_rate": 3.882790470741993e-05, "loss": 1.0947, "step": 179050 }, { "epoch": 1.14396330322119, "grad_norm": 1.0498420000076294, "learning_rate": 3.8823013939560177e-05, "loss": 0.8723, "step": 179060 }, { "epoch": 1.1440271903709287, "grad_norm": 0.7280313372612, "learning_rate": 3.881812328426234e-05, "loss": 0.8269, "step": 179070 }, { "epoch": 1.1440910775206674, "grad_norm": 0.8474599719047546, "learning_rate": 3.881323274157569e-05, "loss": 1.0128, "step": 179080 }, { "epoch": 1.1441549646704061, "grad_norm": 0.9039817452430725, "learning_rate": 3.880834231154946e-05, "loss": 0.8717, "step": 179090 }, { "epoch": 1.1442188518201448, "grad_norm": 2.04731822013855, "learning_rate": 3.8803451994232896e-05, "loss": 0.754, "step": 179100 }, { "epoch": 1.1442827389698835, "grad_norm": 0.7595462203025818, "learning_rate": 3.879856178967526e-05, "loss": 0.7114, "step": 179110 }, { "epoch": 1.1443466261196222, "grad_norm": 1.808081030845642, "learning_rate": 3.87936716979258e-05, "loss": 0.7817, "step": 179120 }, { "epoch": 1.144410513269361, "grad_norm": 0.9245400428771973, "learning_rate": 3.8788781719033765e-05, "loss": 0.7758, "step": 179130 }, { "epoch": 1.1444744004190996, "grad_norm": 0.8909382224082947, "learning_rate": 3.8783891853048394e-05, "loss": 1.1851, "step": 179140 }, { "epoch": 1.1445382875688384, "grad_norm": 0.8799207210540771, "learning_rate": 3.877900210001893e-05, "loss": 0.8381, "step": 179150 }, { "epoch": 1.144602174718577, "grad_norm": 2.0935261249542236, "learning_rate": 3.877411245999462e-05, "loss": 0.5351, "step": 179160 }, { "epoch": 1.1446660618683158, "grad_norm": 0.7175489664077759, "learning_rate": 3.8769222933024716e-05, "loss": 0.8042, "step": 179170 }, { "epoch": 1.1447299490180545, "grad_norm": 0.9167541861534119, "learning_rate": 3.8764333519158455e-05, "loss": 0.919, "step": 179180 }, { "epoch": 1.1447938361677932, "grad_norm": 0.929903507232666, "learning_rate": 3.8759444218445075e-05, "loss": 1.0032, "step": 179190 }, { "epoch": 1.1448577233175319, "grad_norm": 0.9965102076530457, "learning_rate": 3.875455503093381e-05, "loss": 0.9498, "step": 179200 }, { "epoch": 1.1449216104672706, "grad_norm": 0.677137553691864, "learning_rate": 3.874966595667391e-05, "loss": 0.8057, "step": 179210 }, { "epoch": 1.1449854976170093, "grad_norm": 1.0251044034957886, "learning_rate": 3.874477699571461e-05, "loss": 0.7497, "step": 179220 }, { "epoch": 1.145049384766748, "grad_norm": 0.7787938117980957, "learning_rate": 3.873988814810514e-05, "loss": 0.8835, "step": 179230 }, { "epoch": 1.1451132719164867, "grad_norm": 0.9648923277854919, "learning_rate": 3.873499941389474e-05, "loss": 0.874, "step": 179240 }, { "epoch": 1.1451771590662254, "grad_norm": 0.9109137058258057, "learning_rate": 3.8730110793132634e-05, "loss": 1.028, "step": 179250 }, { "epoch": 1.1452410462159641, "grad_norm": 1.3188549280166626, "learning_rate": 3.872522228586807e-05, "loss": 0.9197, "step": 179260 }, { "epoch": 1.1453049333657028, "grad_norm": 0.8881723284721375, "learning_rate": 3.8720333892150265e-05, "loss": 0.7404, "step": 179270 }, { "epoch": 1.1453688205154415, "grad_norm": 0.8248503804206848, "learning_rate": 3.871544561202846e-05, "loss": 0.8751, "step": 179280 }, { "epoch": 1.1454327076651802, "grad_norm": 1.1585686206817627, "learning_rate": 3.8710557445551884e-05, "loss": 0.8279, "step": 179290 }, { "epoch": 1.145496594814919, "grad_norm": 0.8697220087051392, "learning_rate": 3.870566939276975e-05, "loss": 0.9178, "step": 179300 }, { "epoch": 1.1455604819646577, "grad_norm": 0.8634340167045593, "learning_rate": 3.870078145373131e-05, "loss": 0.8718, "step": 179310 }, { "epoch": 1.1456243691143964, "grad_norm": 0.6686280965805054, "learning_rate": 3.8695893628485766e-05, "loss": 0.7065, "step": 179320 }, { "epoch": 1.145688256264135, "grad_norm": 0.8824241161346436, "learning_rate": 3.8691005917082355e-05, "loss": 0.835, "step": 179330 }, { "epoch": 1.1457521434138738, "grad_norm": 1.1661261320114136, "learning_rate": 3.86861183195703e-05, "loss": 0.8727, "step": 179340 }, { "epoch": 1.1458160305636125, "grad_norm": 2.5761728286743164, "learning_rate": 3.868123083599882e-05, "loss": 0.7874, "step": 179350 }, { "epoch": 1.1458799177133512, "grad_norm": 1.544651985168457, "learning_rate": 3.867634346641713e-05, "loss": 0.8582, "step": 179360 }, { "epoch": 1.1459438048630899, "grad_norm": 0.9821982979774475, "learning_rate": 3.867145621087447e-05, "loss": 0.9806, "step": 179370 }, { "epoch": 1.1460076920128286, "grad_norm": 1.0123443603515625, "learning_rate": 3.866656906942004e-05, "loss": 1.0326, "step": 179380 }, { "epoch": 1.1460715791625673, "grad_norm": 0.7034146785736084, "learning_rate": 3.866168204210307e-05, "loss": 0.9303, "step": 179390 }, { "epoch": 1.146135466312306, "grad_norm": 0.9089710712432861, "learning_rate": 3.865679512897276e-05, "loss": 0.9674, "step": 179400 }, { "epoch": 1.1461993534620447, "grad_norm": 0.6943764686584473, "learning_rate": 3.865190833007835e-05, "loss": 0.6687, "step": 179410 }, { "epoch": 1.1462632406117834, "grad_norm": 0.9293021559715271, "learning_rate": 3.8647021645469025e-05, "loss": 0.9211, "step": 179420 }, { "epoch": 1.1463271277615221, "grad_norm": 1.3763326406478882, "learning_rate": 3.8642135075194045e-05, "loss": 0.9414, "step": 179430 }, { "epoch": 1.1463910149112608, "grad_norm": 0.9421727657318115, "learning_rate": 3.863724861930258e-05, "loss": 0.9874, "step": 179440 }, { "epoch": 1.1464549020609995, "grad_norm": 1.2408033609390259, "learning_rate": 3.863236227784383e-05, "loss": 0.9334, "step": 179450 }, { "epoch": 1.1465187892107382, "grad_norm": 0.9744552373886108, "learning_rate": 3.862747605086704e-05, "loss": 0.8964, "step": 179460 }, { "epoch": 1.146582676360477, "grad_norm": 0.6820455193519592, "learning_rate": 3.86225899384214e-05, "loss": 0.6913, "step": 179470 }, { "epoch": 1.1466465635102154, "grad_norm": 0.6710923910140991, "learning_rate": 3.8617703940556114e-05, "loss": 0.9485, "step": 179480 }, { "epoch": 1.1467104506599544, "grad_norm": 0.6139732003211975, "learning_rate": 3.861281805732041e-05, "loss": 0.8372, "step": 179490 }, { "epoch": 1.1467743378096928, "grad_norm": 0.9873855113983154, "learning_rate": 3.8607932288763473e-05, "loss": 0.98, "step": 179500 }, { "epoch": 1.1468382249594318, "grad_norm": 0.5885666608810425, "learning_rate": 3.860353519515323e-05, "loss": 1.0115, "step": 179510 }, { "epoch": 1.1469021121091703, "grad_norm": 1.1420519351959229, "learning_rate": 3.859864964462152e-05, "loss": 0.8411, "step": 179520 }, { "epoch": 1.1469659992589092, "grad_norm": 1.1546262502670288, "learning_rate": 3.859376420891128e-05, "loss": 0.8978, "step": 179530 }, { "epoch": 1.1470298864086477, "grad_norm": 1.3471343517303467, "learning_rate": 3.858887888807169e-05, "loss": 0.9926, "step": 179540 }, { "epoch": 1.1470937735583864, "grad_norm": 0.7748019695281982, "learning_rate": 3.858399368215197e-05, "loss": 1.0838, "step": 179550 }, { "epoch": 1.147157660708125, "grad_norm": 0.711402416229248, "learning_rate": 3.857910859120131e-05, "loss": 0.804, "step": 179560 }, { "epoch": 1.1472215478578638, "grad_norm": 0.8038139939308167, "learning_rate": 3.85742236152689e-05, "loss": 0.733, "step": 179570 }, { "epoch": 1.1472854350076025, "grad_norm": 2.674290895462036, "learning_rate": 3.8569338754403966e-05, "loss": 1.0378, "step": 179580 }, { "epoch": 1.1473493221573412, "grad_norm": 1.2918055057525635, "learning_rate": 3.856445400865566e-05, "loss": 1.0691, "step": 179590 }, { "epoch": 1.14741320930708, "grad_norm": 0.8749229311943054, "learning_rate": 3.855956937807319e-05, "loss": 0.8179, "step": 179600 }, { "epoch": 1.1474770964568186, "grad_norm": 2.0050618648529053, "learning_rate": 3.8554684862705755e-05, "loss": 1.077, "step": 179610 }, { "epoch": 1.1475409836065573, "grad_norm": 0.6908048391342163, "learning_rate": 3.8549800462602546e-05, "loss": 0.985, "step": 179620 }, { "epoch": 1.147604870756296, "grad_norm": 1.2195981740951538, "learning_rate": 3.8544916177812756e-05, "loss": 0.8393, "step": 179630 }, { "epoch": 1.1476687579060347, "grad_norm": 0.7860914468765259, "learning_rate": 3.854003200838557e-05, "loss": 0.855, "step": 179640 }, { "epoch": 1.1477326450557734, "grad_norm": 0.6682232618331909, "learning_rate": 3.8535147954370174e-05, "loss": 0.9404, "step": 179650 }, { "epoch": 1.1477965322055121, "grad_norm": 1.3988233804702759, "learning_rate": 3.853026401581576e-05, "loss": 0.9323, "step": 179660 }, { "epoch": 1.1478604193552508, "grad_norm": 0.9596664309501648, "learning_rate": 3.852538019277151e-05, "loss": 0.9515, "step": 179670 }, { "epoch": 1.1479243065049896, "grad_norm": 1.8466547727584839, "learning_rate": 3.85204964852866e-05, "loss": 1.0884, "step": 179680 }, { "epoch": 1.1479881936547283, "grad_norm": 0.7646600604057312, "learning_rate": 3.851561289341023e-05, "loss": 1.0537, "step": 179690 }, { "epoch": 1.148052080804467, "grad_norm": 0.6700149178504944, "learning_rate": 3.851072941719157e-05, "loss": 0.8711, "step": 179700 }, { "epoch": 1.1481159679542057, "grad_norm": 1.687111258506775, "learning_rate": 3.8505846056679805e-05, "loss": 0.6716, "step": 179710 }, { "epoch": 1.1481798551039444, "grad_norm": 0.8387539982795715, "learning_rate": 3.850096281192412e-05, "loss": 1.1118, "step": 179720 }, { "epoch": 1.148243742253683, "grad_norm": 1.3596510887145996, "learning_rate": 3.8496079682973685e-05, "loss": 0.8337, "step": 179730 }, { "epoch": 1.1483076294034218, "grad_norm": 2.704559803009033, "learning_rate": 3.849119666987767e-05, "loss": 1.0053, "step": 179740 }, { "epoch": 1.1483715165531605, "grad_norm": 0.7268739938735962, "learning_rate": 3.8486313772685274e-05, "loss": 1.0139, "step": 179750 }, { "epoch": 1.1484354037028992, "grad_norm": 1.4763686656951904, "learning_rate": 3.848143099144566e-05, "loss": 0.8677, "step": 179760 }, { "epoch": 1.148499290852638, "grad_norm": 1.2906302213668823, "learning_rate": 3.847654832620798e-05, "loss": 0.9295, "step": 179770 }, { "epoch": 1.1485631780023766, "grad_norm": 0.7881545424461365, "learning_rate": 3.847166577702145e-05, "loss": 0.967, "step": 179780 }, { "epoch": 1.1486270651521153, "grad_norm": 0.7793990969657898, "learning_rate": 3.846678334393521e-05, "loss": 0.8536, "step": 179790 }, { "epoch": 1.148690952301854, "grad_norm": 0.7015102505683899, "learning_rate": 3.8461901026998424e-05, "loss": 0.766, "step": 179800 }, { "epoch": 1.1487548394515927, "grad_norm": 0.8594871163368225, "learning_rate": 3.84570188262603e-05, "loss": 0.9288, "step": 179810 }, { "epoch": 1.1488187266013314, "grad_norm": 0.8671073317527771, "learning_rate": 3.845213674176997e-05, "loss": 0.8234, "step": 179820 }, { "epoch": 1.1488826137510701, "grad_norm": 1.8361603021621704, "learning_rate": 3.8447254773576625e-05, "loss": 0.8157, "step": 179830 }, { "epoch": 1.1489465009008089, "grad_norm": 0.8427587151527405, "learning_rate": 3.844237292172941e-05, "loss": 0.8628, "step": 179840 }, { "epoch": 1.1490103880505476, "grad_norm": 0.4868806004524231, "learning_rate": 3.84374911862775e-05, "loss": 0.8923, "step": 179850 }, { "epoch": 1.1490742752002863, "grad_norm": 0.9738015532493591, "learning_rate": 3.843260956727006e-05, "loss": 0.8577, "step": 179860 }, { "epoch": 1.149138162350025, "grad_norm": 0.9108662009239197, "learning_rate": 3.8427728064756246e-05, "loss": 0.697, "step": 179870 }, { "epoch": 1.1492020494997637, "grad_norm": 0.8939322233200073, "learning_rate": 3.842284667878522e-05, "loss": 0.8281, "step": 179880 }, { "epoch": 1.1492659366495024, "grad_norm": 2.0734434127807617, "learning_rate": 3.8417965409406146e-05, "loss": 0.8891, "step": 179890 }, { "epoch": 1.149329823799241, "grad_norm": 1.4640614986419678, "learning_rate": 3.8413084256668184e-05, "loss": 0.9592, "step": 179900 }, { "epoch": 1.1493937109489798, "grad_norm": 1.422544002532959, "learning_rate": 3.840820322062048e-05, "loss": 0.7184, "step": 179910 }, { "epoch": 1.1494575980987185, "grad_norm": 0.8664306402206421, "learning_rate": 3.8403322301312204e-05, "loss": 0.9412, "step": 179920 }, { "epoch": 1.1495214852484572, "grad_norm": 0.8996186852455139, "learning_rate": 3.83984414987925e-05, "loss": 0.7447, "step": 179930 }, { "epoch": 1.149585372398196, "grad_norm": 0.9673845171928406, "learning_rate": 3.839356081311053e-05, "loss": 0.8512, "step": 179940 }, { "epoch": 1.1496492595479346, "grad_norm": 2.0374224185943604, "learning_rate": 3.8388680244315445e-05, "loss": 0.9246, "step": 179950 }, { "epoch": 1.1497131466976733, "grad_norm": 0.8047225475311279, "learning_rate": 3.83837997924564e-05, "loss": 0.8539, "step": 179960 }, { "epoch": 1.1497770338474118, "grad_norm": 0.9088835120201111, "learning_rate": 3.837891945758253e-05, "loss": 1.0117, "step": 179970 }, { "epoch": 1.1498409209971507, "grad_norm": 0.5110180974006653, "learning_rate": 3.8374039239743e-05, "loss": 1.0974, "step": 179980 }, { "epoch": 1.1499048081468892, "grad_norm": 0.6761854887008667, "learning_rate": 3.8369159138986946e-05, "loss": 0.9451, "step": 179990 }, { "epoch": 1.1499686952966282, "grad_norm": 0.8277345895767212, "learning_rate": 3.836427915536353e-05, "loss": 0.9262, "step": 180000 }, { "epoch": 1.1500325824463666, "grad_norm": 0.847381055355072, "learning_rate": 3.8359399288921876e-05, "loss": 0.8348, "step": 180010 }, { "epoch": 1.1500964695961056, "grad_norm": 0.8218114972114563, "learning_rate": 3.835451953971115e-05, "loss": 0.7931, "step": 180020 }, { "epoch": 1.150160356745844, "grad_norm": 1.1525636911392212, "learning_rate": 3.834963990778049e-05, "loss": 0.9558, "step": 180030 }, { "epoch": 1.1502242438955828, "grad_norm": 3.6466426849365234, "learning_rate": 3.8344760393179036e-05, "loss": 0.9922, "step": 180040 }, { "epoch": 1.1502881310453215, "grad_norm": 0.820673406124115, "learning_rate": 3.833988099595593e-05, "loss": 0.9471, "step": 180050 }, { "epoch": 1.1503520181950602, "grad_norm": 3.4960498809814453, "learning_rate": 3.83350017161603e-05, "loss": 0.7328, "step": 180060 }, { "epoch": 1.1504159053447989, "grad_norm": 1.1338176727294922, "learning_rate": 3.833012255384132e-05, "loss": 1.0169, "step": 180070 }, { "epoch": 1.1504797924945376, "grad_norm": 0.9621381163597107, "learning_rate": 3.8325243509048087e-05, "loss": 0.8626, "step": 180080 }, { "epoch": 1.1505436796442763, "grad_norm": 0.5299105644226074, "learning_rate": 3.832036458182975e-05, "loss": 0.9519, "step": 180090 }, { "epoch": 1.150607566794015, "grad_norm": 1.7963881492614746, "learning_rate": 3.831548577223544e-05, "loss": 1.0677, "step": 180100 }, { "epoch": 1.1506714539437537, "grad_norm": 1.3347585201263428, "learning_rate": 3.83106070803143e-05, "loss": 0.8912, "step": 180110 }, { "epoch": 1.1507353410934924, "grad_norm": 1.0304107666015625, "learning_rate": 3.8305728506115466e-05, "loss": 0.7527, "step": 180120 }, { "epoch": 1.150799228243231, "grad_norm": 0.5389941334724426, "learning_rate": 3.8300850049688045e-05, "loss": 0.7198, "step": 180130 }, { "epoch": 1.1508631153929698, "grad_norm": 0.7622451186180115, "learning_rate": 3.829597171108119e-05, "loss": 0.8252, "step": 180140 }, { "epoch": 1.1509270025427085, "grad_norm": 0.707021176815033, "learning_rate": 3.829109349034403e-05, "loss": 0.879, "step": 180150 }, { "epoch": 1.1509908896924472, "grad_norm": 1.0913118124008179, "learning_rate": 3.828621538752569e-05, "loss": 0.9587, "step": 180160 }, { "epoch": 1.151054776842186, "grad_norm": 0.8637599349021912, "learning_rate": 3.82813374026753e-05, "loss": 0.8042, "step": 180170 }, { "epoch": 1.1511186639919246, "grad_norm": 1.4673819541931152, "learning_rate": 3.827645953584197e-05, "loss": 1.0232, "step": 180180 }, { "epoch": 1.1511825511416633, "grad_norm": 0.8439252972602844, "learning_rate": 3.827158178707484e-05, "loss": 0.8032, "step": 180190 }, { "epoch": 1.151246438291402, "grad_norm": 0.743141233921051, "learning_rate": 3.826670415642303e-05, "loss": 0.8605, "step": 180200 }, { "epoch": 1.1513103254411408, "grad_norm": 1.2738887071609497, "learning_rate": 3.826182664393566e-05, "loss": 0.9834, "step": 180210 }, { "epoch": 1.1513742125908795, "grad_norm": 1.370021104812622, "learning_rate": 3.825694924966185e-05, "loss": 0.9888, "step": 180220 }, { "epoch": 1.1514380997406182, "grad_norm": 0.7736698389053345, "learning_rate": 3.825207197365072e-05, "loss": 1.0524, "step": 180230 }, { "epoch": 1.1515019868903569, "grad_norm": 0.7036052346229553, "learning_rate": 3.8247194815951384e-05, "loss": 0.8803, "step": 180240 }, { "epoch": 1.1515658740400956, "grad_norm": 0.7617993950843811, "learning_rate": 3.824231777661297e-05, "loss": 0.9111, "step": 180250 }, { "epoch": 1.1516297611898343, "grad_norm": 0.7389393448829651, "learning_rate": 3.8237440855684586e-05, "loss": 0.8772, "step": 180260 }, { "epoch": 1.151693648339573, "grad_norm": 0.9305931925773621, "learning_rate": 3.8232564053215345e-05, "loss": 0.8958, "step": 180270 }, { "epoch": 1.1517575354893117, "grad_norm": 0.979669988155365, "learning_rate": 3.8227687369254375e-05, "loss": 0.7242, "step": 180280 }, { "epoch": 1.1518214226390504, "grad_norm": 0.8320657014846802, "learning_rate": 3.8222810803850764e-05, "loss": 0.8644, "step": 180290 }, { "epoch": 1.1518853097887891, "grad_norm": 0.5598975419998169, "learning_rate": 3.821793435705364e-05, "loss": 0.6644, "step": 180300 }, { "epoch": 1.1519491969385278, "grad_norm": 1.0118290185928345, "learning_rate": 3.821305802891212e-05, "loss": 0.8266, "step": 180310 }, { "epoch": 1.1520130840882665, "grad_norm": 0.8727541565895081, "learning_rate": 3.820818181947529e-05, "loss": 0.9033, "step": 180320 }, { "epoch": 1.1520769712380052, "grad_norm": 1.116168737411499, "learning_rate": 3.8203305728792265e-05, "loss": 0.8494, "step": 180330 }, { "epoch": 1.152140858387744, "grad_norm": 1.2078253030776978, "learning_rate": 3.819842975691217e-05, "loss": 0.8909, "step": 180340 }, { "epoch": 1.1522047455374826, "grad_norm": 1.025717854499817, "learning_rate": 3.8193553903884096e-05, "loss": 0.8589, "step": 180350 }, { "epoch": 1.1522686326872214, "grad_norm": 0.9668052792549133, "learning_rate": 3.818867816975715e-05, "loss": 0.6721, "step": 180360 }, { "epoch": 1.15233251983696, "grad_norm": 0.5999698638916016, "learning_rate": 3.818380255458043e-05, "loss": 0.9891, "step": 180370 }, { "epoch": 1.1523964069866988, "grad_norm": 0.9243110418319702, "learning_rate": 3.817892705840304e-05, "loss": 0.9535, "step": 180380 }, { "epoch": 1.1524602941364375, "grad_norm": 0.7655467391014099, "learning_rate": 3.817405168127408e-05, "loss": 1.1042, "step": 180390 }, { "epoch": 1.1525241812861762, "grad_norm": 1.5217201709747314, "learning_rate": 3.816917642324265e-05, "loss": 0.8361, "step": 180400 }, { "epoch": 1.1525880684359149, "grad_norm": 0.9932133555412292, "learning_rate": 3.816430128435786e-05, "loss": 0.9629, "step": 180410 }, { "epoch": 1.1526519555856536, "grad_norm": 0.9760572910308838, "learning_rate": 3.8159426264668784e-05, "loss": 0.8349, "step": 180420 }, { "epoch": 1.1527158427353923, "grad_norm": 1.3286035060882568, "learning_rate": 3.815455136422453e-05, "loss": 0.7393, "step": 180430 }, { "epoch": 1.152779729885131, "grad_norm": 1.8230654001235962, "learning_rate": 3.814967658307419e-05, "loss": 0.721, "step": 180440 }, { "epoch": 1.1528436170348697, "grad_norm": 2.0647432804107666, "learning_rate": 3.8144801921266864e-05, "loss": 0.7575, "step": 180450 }, { "epoch": 1.1529075041846082, "grad_norm": 1.0009099245071411, "learning_rate": 3.813992737885164e-05, "loss": 0.998, "step": 180460 }, { "epoch": 1.1529713913343471, "grad_norm": 0.806215763092041, "learning_rate": 3.8135052955877605e-05, "loss": 0.9999, "step": 180470 }, { "epoch": 1.1530352784840856, "grad_norm": 0.6237285733222961, "learning_rate": 3.813017865239385e-05, "loss": 0.689, "step": 180480 }, { "epoch": 1.1530991656338245, "grad_norm": 0.7346072793006897, "learning_rate": 3.812530446844946e-05, "loss": 0.9834, "step": 180490 }, { "epoch": 1.153163052783563, "grad_norm": 1.3192369937896729, "learning_rate": 3.812043040409354e-05, "loss": 0.8756, "step": 180500 }, { "epoch": 1.1532269399333017, "grad_norm": 1.162412166595459, "learning_rate": 3.811555645937516e-05, "loss": 0.9242, "step": 180510 }, { "epoch": 1.1532908270830404, "grad_norm": 0.7788326740264893, "learning_rate": 3.81106826343434e-05, "loss": 0.9619, "step": 180520 }, { "epoch": 1.1533547142327791, "grad_norm": 1.02617347240448, "learning_rate": 3.810580892904735e-05, "loss": 0.7565, "step": 180530 }, { "epoch": 1.1534186013825178, "grad_norm": 0.9374564290046692, "learning_rate": 3.8100935343536094e-05, "loss": 0.8091, "step": 180540 }, { "epoch": 1.1534824885322565, "grad_norm": 1.0165518522262573, "learning_rate": 3.809606187785874e-05, "loss": 1.1619, "step": 180550 }, { "epoch": 1.1535463756819953, "grad_norm": 0.9394980072975159, "learning_rate": 3.809118853206432e-05, "loss": 0.7875, "step": 180560 }, { "epoch": 1.153610262831734, "grad_norm": 0.8749181628227234, "learning_rate": 3.808631530620194e-05, "loss": 0.8106, "step": 180570 }, { "epoch": 1.1536741499814727, "grad_norm": 0.6072586178779602, "learning_rate": 3.808144220032066e-05, "loss": 1.0034, "step": 180580 }, { "epoch": 1.1537380371312114, "grad_norm": 1.6008857488632202, "learning_rate": 3.807656921446957e-05, "loss": 0.8974, "step": 180590 }, { "epoch": 1.15380192428095, "grad_norm": 1.2123829126358032, "learning_rate": 3.807169634869775e-05, "loss": 0.7672, "step": 180600 }, { "epoch": 1.1538658114306888, "grad_norm": 0.917658269405365, "learning_rate": 3.806682360305427e-05, "loss": 0.8276, "step": 180610 }, { "epoch": 1.1539296985804275, "grad_norm": 0.9251102209091187, "learning_rate": 3.806195097758819e-05, "loss": 1.0129, "step": 180620 }, { "epoch": 1.1539935857301662, "grad_norm": 1.3319838047027588, "learning_rate": 3.805707847234859e-05, "loss": 0.7021, "step": 180630 }, { "epoch": 1.154057472879905, "grad_norm": 0.8594711422920227, "learning_rate": 3.805220608738456e-05, "loss": 0.9549, "step": 180640 }, { "epoch": 1.1541213600296436, "grad_norm": 1.0314610004425049, "learning_rate": 3.8047333822745126e-05, "loss": 0.8582, "step": 180650 }, { "epoch": 1.1541852471793823, "grad_norm": 1.0455981492996216, "learning_rate": 3.804246167847939e-05, "loss": 1.2074, "step": 180660 }, { "epoch": 1.154249134329121, "grad_norm": 1.1809886693954468, "learning_rate": 3.803758965463641e-05, "loss": 0.6409, "step": 180670 }, { "epoch": 1.1543130214788597, "grad_norm": 0.782289981842041, "learning_rate": 3.803271775126525e-05, "loss": 0.817, "step": 180680 }, { "epoch": 1.1543769086285984, "grad_norm": 0.8476322293281555, "learning_rate": 3.802784596841499e-05, "loss": 0.6469, "step": 180690 }, { "epoch": 1.1544407957783371, "grad_norm": 1.193834662437439, "learning_rate": 3.802297430613467e-05, "loss": 1.1158, "step": 180700 }, { "epoch": 1.1545046829280758, "grad_norm": 0.7722360491752625, "learning_rate": 3.801810276447336e-05, "loss": 0.84, "step": 180710 }, { "epoch": 1.1545685700778145, "grad_norm": 1.5033289194107056, "learning_rate": 3.8013231343480116e-05, "loss": 1.0485, "step": 180720 }, { "epoch": 1.1546324572275533, "grad_norm": 1.234089732170105, "learning_rate": 3.8008360043204004e-05, "loss": 0.9439, "step": 180730 }, { "epoch": 1.154696344377292, "grad_norm": 1.0235389471054077, "learning_rate": 3.800348886369409e-05, "loss": 0.7474, "step": 180740 }, { "epoch": 1.1547602315270307, "grad_norm": 1.125136137008667, "learning_rate": 3.799861780499941e-05, "loss": 1.1013, "step": 180750 }, { "epoch": 1.1548241186767694, "grad_norm": 0.9494902491569519, "learning_rate": 3.7993746867169036e-05, "loss": 0.694, "step": 180760 }, { "epoch": 1.154888005826508, "grad_norm": 1.7362793684005737, "learning_rate": 3.798887605025202e-05, "loss": 1.2714, "step": 180770 }, { "epoch": 1.1549518929762468, "grad_norm": 1.3733989000320435, "learning_rate": 3.798400535429741e-05, "loss": 0.659, "step": 180780 }, { "epoch": 1.1550157801259855, "grad_norm": 1.4269659519195557, "learning_rate": 3.797913477935426e-05, "loss": 0.9194, "step": 180790 }, { "epoch": 1.1550796672757242, "grad_norm": 0.7260159850120544, "learning_rate": 3.7974264325471625e-05, "loss": 0.9957, "step": 180800 }, { "epoch": 1.155143554425463, "grad_norm": 0.8534484505653381, "learning_rate": 3.7969393992698555e-05, "loss": 0.8095, "step": 180810 }, { "epoch": 1.1552074415752016, "grad_norm": 1.1256579160690308, "learning_rate": 3.796452378108409e-05, "loss": 1.053, "step": 180820 }, { "epoch": 1.1552713287249403, "grad_norm": 0.9373098015785217, "learning_rate": 3.7959653690677285e-05, "loss": 0.766, "step": 180830 }, { "epoch": 1.155335215874679, "grad_norm": 1.5208162069320679, "learning_rate": 3.795478372152718e-05, "loss": 1.1105, "step": 180840 }, { "epoch": 1.1553991030244177, "grad_norm": 1.0059376955032349, "learning_rate": 3.794991387368283e-05, "loss": 0.891, "step": 180850 }, { "epoch": 1.1554629901741564, "grad_norm": 0.7993272542953491, "learning_rate": 3.794504414719326e-05, "loss": 0.7323, "step": 180860 }, { "epoch": 1.1555268773238951, "grad_norm": 0.8731907606124878, "learning_rate": 3.794017454210753e-05, "loss": 0.8365, "step": 180870 }, { "epoch": 1.1555907644736338, "grad_norm": 0.9847076535224915, "learning_rate": 3.793530505847468e-05, "loss": 0.7823, "step": 180880 }, { "epoch": 1.1556546516233726, "grad_norm": 0.7352263331413269, "learning_rate": 3.793043569634375e-05, "loss": 0.9791, "step": 180890 }, { "epoch": 1.1557185387731113, "grad_norm": 0.743043065071106, "learning_rate": 3.792556645576376e-05, "loss": 0.7904, "step": 180900 }, { "epoch": 1.15578242592285, "grad_norm": 0.854838490486145, "learning_rate": 3.792069733678377e-05, "loss": 0.8579, "step": 180910 }, { "epoch": 1.1558463130725887, "grad_norm": 1.538976788520813, "learning_rate": 3.7915828339452814e-05, "loss": 0.9096, "step": 180920 }, { "epoch": 1.1559102002223274, "grad_norm": 1.347536563873291, "learning_rate": 3.791095946381992e-05, "loss": 0.8312, "step": 180930 }, { "epoch": 1.155974087372066, "grad_norm": 0.7410019040107727, "learning_rate": 3.790609070993412e-05, "loss": 0.7537, "step": 180940 }, { "epoch": 1.1560379745218046, "grad_norm": 1.0292385816574097, "learning_rate": 3.790122207784444e-05, "loss": 0.8605, "step": 180950 }, { "epoch": 1.1561018616715435, "grad_norm": 0.9388356804847717, "learning_rate": 3.789635356759993e-05, "loss": 0.9448, "step": 180960 }, { "epoch": 1.156165748821282, "grad_norm": 1.113138198852539, "learning_rate": 3.789148517924961e-05, "loss": 0.8119, "step": 180970 }, { "epoch": 1.156229635971021, "grad_norm": 1.052147388458252, "learning_rate": 3.78866169128425e-05, "loss": 0.739, "step": 180980 }, { "epoch": 1.1562935231207594, "grad_norm": 0.8988513946533203, "learning_rate": 3.788174876842765e-05, "loss": 0.9475, "step": 180990 }, { "epoch": 1.156357410270498, "grad_norm": 0.6247332096099854, "learning_rate": 3.787688074605407e-05, "loss": 0.9559, "step": 181000 }, { "epoch": 1.1564212974202368, "grad_norm": 0.7940369844436646, "learning_rate": 3.7872012845770786e-05, "loss": 0.865, "step": 181010 }, { "epoch": 1.1564851845699755, "grad_norm": 0.5319830179214478, "learning_rate": 3.786714506762683e-05, "loss": 0.8773, "step": 181020 }, { "epoch": 1.1565490717197142, "grad_norm": 1.5722520351409912, "learning_rate": 3.7862277411671215e-05, "loss": 0.772, "step": 181030 }, { "epoch": 1.156612958869453, "grad_norm": 0.8292412161827087, "learning_rate": 3.785740987795298e-05, "loss": 0.9318, "step": 181040 }, { "epoch": 1.1566768460191916, "grad_norm": 0.785133957862854, "learning_rate": 3.785254246652112e-05, "loss": 0.9598, "step": 181050 }, { "epoch": 1.1567407331689303, "grad_norm": 0.6454100012779236, "learning_rate": 3.784767517742467e-05, "loss": 0.9134, "step": 181060 }, { "epoch": 1.156804620318669, "grad_norm": 0.7190561890602112, "learning_rate": 3.784280801071264e-05, "loss": 1.0787, "step": 181070 }, { "epoch": 1.1568685074684077, "grad_norm": 1.0026381015777588, "learning_rate": 3.7837940966434054e-05, "loss": 0.8038, "step": 181080 }, { "epoch": 1.1569323946181465, "grad_norm": 0.8632673025131226, "learning_rate": 3.783307404463792e-05, "loss": 0.8546, "step": 181090 }, { "epoch": 1.1569962817678852, "grad_norm": 0.8693109154701233, "learning_rate": 3.782820724537326e-05, "loss": 0.7959, "step": 181100 }, { "epoch": 1.1570601689176239, "grad_norm": 1.686172366142273, "learning_rate": 3.782334056868908e-05, "loss": 0.8705, "step": 181110 }, { "epoch": 1.1571240560673626, "grad_norm": 1.0223690271377563, "learning_rate": 3.78184740146344e-05, "loss": 0.7805, "step": 181120 }, { "epoch": 1.1571879432171013, "grad_norm": 0.9894540309906006, "learning_rate": 3.781360758325822e-05, "loss": 0.7278, "step": 181130 }, { "epoch": 1.15725183036684, "grad_norm": 0.6399027109146118, "learning_rate": 3.780874127460956e-05, "loss": 1.0004, "step": 181140 }, { "epoch": 1.1573157175165787, "grad_norm": 0.7714298963546753, "learning_rate": 3.780387508873742e-05, "loss": 0.8707, "step": 181150 }, { "epoch": 1.1573796046663174, "grad_norm": 1.1210808753967285, "learning_rate": 3.779900902569081e-05, "loss": 0.9824, "step": 181160 }, { "epoch": 1.157443491816056, "grad_norm": 0.8586247563362122, "learning_rate": 3.779414308551873e-05, "loss": 0.8191, "step": 181170 }, { "epoch": 1.1575073789657948, "grad_norm": 1.4679404497146606, "learning_rate": 3.778927726827018e-05, "loss": 0.9684, "step": 181180 }, { "epoch": 1.1575712661155335, "grad_norm": 1.372011661529541, "learning_rate": 3.778441157399418e-05, "loss": 0.7683, "step": 181190 }, { "epoch": 1.1576351532652722, "grad_norm": 0.8723155856132507, "learning_rate": 3.7779546002739724e-05, "loss": 0.9515, "step": 181200 }, { "epoch": 1.157699040415011, "grad_norm": 1.1017118692398071, "learning_rate": 3.777468055455582e-05, "loss": 0.7784, "step": 181210 }, { "epoch": 1.1577629275647496, "grad_norm": 0.6969674825668335, "learning_rate": 3.7769815229491454e-05, "loss": 0.8209, "step": 181220 }, { "epoch": 1.1578268147144883, "grad_norm": 0.9235311150550842, "learning_rate": 3.776495002759563e-05, "loss": 0.9272, "step": 181230 }, { "epoch": 1.157890701864227, "grad_norm": 1.3109992742538452, "learning_rate": 3.776008494891734e-05, "loss": 0.8468, "step": 181240 }, { "epoch": 1.1579545890139658, "grad_norm": 0.8040546774864197, "learning_rate": 3.7755219993505584e-05, "loss": 1.0643, "step": 181250 }, { "epoch": 1.1580184761637045, "grad_norm": 0.8779431581497192, "learning_rate": 3.775035516140936e-05, "loss": 0.8391, "step": 181260 }, { "epoch": 1.1580823633134432, "grad_norm": 1.0938910245895386, "learning_rate": 3.774549045267765e-05, "loss": 0.8048, "step": 181270 }, { "epoch": 1.1581462504631819, "grad_norm": 0.9017459750175476, "learning_rate": 3.774062586735946e-05, "loss": 0.8209, "step": 181280 }, { "epoch": 1.1582101376129206, "grad_norm": 0.5983861684799194, "learning_rate": 3.773576140550377e-05, "loss": 0.6446, "step": 181290 }, { "epoch": 1.1582740247626593, "grad_norm": 0.9771537184715271, "learning_rate": 3.7730897067159575e-05, "loss": 1.0019, "step": 181300 }, { "epoch": 1.158337911912398, "grad_norm": 0.7589857578277588, "learning_rate": 3.772603285237586e-05, "loss": 0.6826, "step": 181310 }, { "epoch": 1.1584017990621367, "grad_norm": 0.9540377259254456, "learning_rate": 3.772116876120161e-05, "loss": 1.033, "step": 181320 }, { "epoch": 1.1584656862118754, "grad_norm": 0.9302192330360413, "learning_rate": 3.771630479368582e-05, "loss": 0.868, "step": 181330 }, { "epoch": 1.158529573361614, "grad_norm": 0.9737901091575623, "learning_rate": 3.771144094987746e-05, "loss": 0.7623, "step": 181340 }, { "epoch": 1.1585934605113528, "grad_norm": 0.8602831959724426, "learning_rate": 3.7706577229825524e-05, "loss": 0.9287, "step": 181350 }, { "epoch": 1.1586573476610915, "grad_norm": 0.8971428275108337, "learning_rate": 3.770171363357899e-05, "loss": 1.0126, "step": 181360 }, { "epoch": 1.1587212348108302, "grad_norm": 1.3346503973007202, "learning_rate": 3.769685016118684e-05, "loss": 0.7815, "step": 181370 }, { "epoch": 1.158785121960569, "grad_norm": 1.5257915258407593, "learning_rate": 3.769198681269805e-05, "loss": 1.0097, "step": 181380 }, { "epoch": 1.1588490091103076, "grad_norm": 1.0773704051971436, "learning_rate": 3.7687123588161596e-05, "loss": 0.8822, "step": 181390 }, { "epoch": 1.1589128962600463, "grad_norm": 0.8371546268463135, "learning_rate": 3.768226048762647e-05, "loss": 0.7492, "step": 181400 }, { "epoch": 1.158976783409785, "grad_norm": 1.3601465225219727, "learning_rate": 3.767739751114163e-05, "loss": 0.8812, "step": 181410 }, { "epoch": 1.1590406705595235, "grad_norm": 0.9463026523590088, "learning_rate": 3.7672534658756065e-05, "loss": 0.8712, "step": 181420 }, { "epoch": 1.1591045577092625, "grad_norm": 0.891038179397583, "learning_rate": 3.7667671930518736e-05, "loss": 0.8663, "step": 181430 }, { "epoch": 1.159168444859001, "grad_norm": 1.142344355583191, "learning_rate": 3.766280932647862e-05, "loss": 1.0367, "step": 181440 }, { "epoch": 1.1592323320087399, "grad_norm": 0.9246888756752014, "learning_rate": 3.765794684668469e-05, "loss": 0.9055, "step": 181450 }, { "epoch": 1.1592962191584784, "grad_norm": 1.2193877696990967, "learning_rate": 3.7653084491185905e-05, "loss": 0.8285, "step": 181460 }, { "epoch": 1.1593601063082173, "grad_norm": 0.9855943918228149, "learning_rate": 3.764822226003125e-05, "loss": 0.7588, "step": 181470 }, { "epoch": 1.1594239934579558, "grad_norm": 0.9939285516738892, "learning_rate": 3.764336015326968e-05, "loss": 0.6418, "step": 181480 }, { "epoch": 1.1594878806076945, "grad_norm": 1.2632123231887817, "learning_rate": 3.7638498170950165e-05, "loss": 0.9491, "step": 181490 }, { "epoch": 1.1595517677574332, "grad_norm": 0.8524492979049683, "learning_rate": 3.7633636313121663e-05, "loss": 0.955, "step": 181500 }, { "epoch": 1.1596156549071719, "grad_norm": 1.167758584022522, "learning_rate": 3.762877457983314e-05, "loss": 0.8589, "step": 181510 }, { "epoch": 1.1596795420569106, "grad_norm": 0.8116161823272705, "learning_rate": 3.762391297113358e-05, "loss": 0.891, "step": 181520 }, { "epoch": 1.1597434292066493, "grad_norm": 0.7200629115104675, "learning_rate": 3.76190514870719e-05, "loss": 0.7444, "step": 181530 }, { "epoch": 1.159807316356388, "grad_norm": 1.024326205253601, "learning_rate": 3.76141901276971e-05, "loss": 0.8589, "step": 181540 }, { "epoch": 1.1598712035061267, "grad_norm": 5.042026042938232, "learning_rate": 3.7609328893058104e-05, "loss": 1.1791, "step": 181550 }, { "epoch": 1.1599350906558654, "grad_norm": 0.9187368750572205, "learning_rate": 3.760446778320389e-05, "loss": 0.7202, "step": 181560 }, { "epoch": 1.1599989778056041, "grad_norm": 0.8078513741493225, "learning_rate": 3.7599606798183413e-05, "loss": 0.8145, "step": 181570 }, { "epoch": 1.1600628649553428, "grad_norm": 0.6355553865432739, "learning_rate": 3.7594745938045625e-05, "loss": 0.7847, "step": 181580 }, { "epoch": 1.1601267521050815, "grad_norm": 0.6867857575416565, "learning_rate": 3.758988520283947e-05, "loss": 0.9891, "step": 181590 }, { "epoch": 1.1601906392548202, "grad_norm": 1.211389422416687, "learning_rate": 3.758502459261391e-05, "loss": 0.9464, "step": 181600 }, { "epoch": 1.160254526404559, "grad_norm": 0.8838968276977539, "learning_rate": 3.758016410741789e-05, "loss": 0.718, "step": 181610 }, { "epoch": 1.1603184135542977, "grad_norm": 1.0951565504074097, "learning_rate": 3.7575303747300375e-05, "loss": 1.0658, "step": 181620 }, { "epoch": 1.1603823007040364, "grad_norm": 0.9639030694961548, "learning_rate": 3.7570443512310287e-05, "loss": 1.1464, "step": 181630 }, { "epoch": 1.160446187853775, "grad_norm": 0.8996525406837463, "learning_rate": 3.756558340249659e-05, "loss": 1.005, "step": 181640 }, { "epoch": 1.1605100750035138, "grad_norm": 0.6685171723365784, "learning_rate": 3.7560723417908225e-05, "loss": 0.8409, "step": 181650 }, { "epoch": 1.1605739621532525, "grad_norm": 0.6640573143959045, "learning_rate": 3.755586355859414e-05, "loss": 0.8569, "step": 181660 }, { "epoch": 1.1606378493029912, "grad_norm": 0.5627730488777161, "learning_rate": 3.755100382460327e-05, "loss": 0.9019, "step": 181670 }, { "epoch": 1.16070173645273, "grad_norm": 0.6873822808265686, "learning_rate": 3.754614421598456e-05, "loss": 0.8357, "step": 181680 }, { "epoch": 1.1607656236024686, "grad_norm": 1.4170078039169312, "learning_rate": 3.7541284732786955e-05, "loss": 0.8245, "step": 181690 }, { "epoch": 1.1608295107522073, "grad_norm": 0.745650053024292, "learning_rate": 3.753642537505939e-05, "loss": 0.9163, "step": 181700 }, { "epoch": 1.160893397901946, "grad_norm": 1.3696508407592773, "learning_rate": 3.75315661428508e-05, "loss": 0.932, "step": 181710 }, { "epoch": 1.1609572850516847, "grad_norm": 1.0974339246749878, "learning_rate": 3.752670703621013e-05, "loss": 0.881, "step": 181720 }, { "epoch": 1.1610211722014234, "grad_norm": 0.8808803558349609, "learning_rate": 3.752184805518631e-05, "loss": 0.7972, "step": 181730 }, { "epoch": 1.1610850593511621, "grad_norm": 1.2625608444213867, "learning_rate": 3.7516989199828276e-05, "loss": 0.7407, "step": 181740 }, { "epoch": 1.1611489465009008, "grad_norm": 0.8603794574737549, "learning_rate": 3.7512130470184965e-05, "loss": 0.7863, "step": 181750 }, { "epoch": 1.1612128336506395, "grad_norm": 1.2565032243728638, "learning_rate": 3.75072718663053e-05, "loss": 0.8844, "step": 181760 }, { "epoch": 1.1612767208003782, "grad_norm": 1.3598438501358032, "learning_rate": 3.750241338823821e-05, "loss": 0.8526, "step": 181770 }, { "epoch": 1.161340607950117, "grad_norm": 2.179485321044922, "learning_rate": 3.749755503603264e-05, "loss": 0.9197, "step": 181780 }, { "epoch": 1.1614044950998557, "grad_norm": 0.8402548432350159, "learning_rate": 3.74926968097375e-05, "loss": 0.9501, "step": 181790 }, { "epoch": 1.1614683822495944, "grad_norm": 0.6361011862754822, "learning_rate": 3.748783870940172e-05, "loss": 0.9137, "step": 181800 }, { "epoch": 1.161532269399333, "grad_norm": 1.048244833946228, "learning_rate": 3.748298073507424e-05, "loss": 0.9456, "step": 181810 }, { "epoch": 1.1615961565490718, "grad_norm": 1.0340771675109863, "learning_rate": 3.747812288680396e-05, "loss": 0.721, "step": 181820 }, { "epoch": 1.1616600436988105, "grad_norm": 0.6859948635101318, "learning_rate": 3.7473265164639824e-05, "loss": 0.9426, "step": 181830 }, { "epoch": 1.1617239308485492, "grad_norm": 0.9537613987922668, "learning_rate": 3.746840756863074e-05, "loss": 0.9536, "step": 181840 }, { "epoch": 1.161787817998288, "grad_norm": 0.8040690422058105, "learning_rate": 3.746355009882564e-05, "loss": 0.9398, "step": 181850 }, { "epoch": 1.1618517051480266, "grad_norm": 1.4307293891906738, "learning_rate": 3.745869275527343e-05, "loss": 0.9606, "step": 181860 }, { "epoch": 1.1619155922977653, "grad_norm": 0.8316680788993835, "learning_rate": 3.7453835538023035e-05, "loss": 0.9238, "step": 181870 }, { "epoch": 1.161979479447504, "grad_norm": 1.3545746803283691, "learning_rate": 3.744897844712337e-05, "loss": 0.764, "step": 181880 }, { "epoch": 1.1620433665972427, "grad_norm": 1.1133147478103638, "learning_rate": 3.744412148262335e-05, "loss": 0.9603, "step": 181890 }, { "epoch": 1.1621072537469814, "grad_norm": 0.8634859919548035, "learning_rate": 3.7439264644571894e-05, "loss": 0.7834, "step": 181900 }, { "epoch": 1.16217114089672, "grad_norm": 0.7529647350311279, "learning_rate": 3.743440793301789e-05, "loss": 0.7559, "step": 181910 }, { "epoch": 1.1622350280464588, "grad_norm": 1.1919103860855103, "learning_rate": 3.742955134801028e-05, "loss": 1.2041, "step": 181920 }, { "epoch": 1.1622989151961973, "grad_norm": 0.8481646180152893, "learning_rate": 3.742469488959798e-05, "loss": 0.8569, "step": 181930 }, { "epoch": 1.1623628023459363, "grad_norm": 0.7014936804771423, "learning_rate": 3.741983855782986e-05, "loss": 0.8347, "step": 181940 }, { "epoch": 1.1624266894956747, "grad_norm": 0.9098723530769348, "learning_rate": 3.741498235275486e-05, "loss": 1.1005, "step": 181950 }, { "epoch": 1.1624905766454137, "grad_norm": 0.6613329648971558, "learning_rate": 3.741012627442188e-05, "loss": 0.7433, "step": 181960 }, { "epoch": 1.1625544637951521, "grad_norm": 0.695220410823822, "learning_rate": 3.74052703228798e-05, "loss": 0.7844, "step": 181970 }, { "epoch": 1.1626183509448909, "grad_norm": 1.442959189414978, "learning_rate": 3.740041449817756e-05, "loss": 0.7808, "step": 181980 }, { "epoch": 1.1626822380946296, "grad_norm": 0.615817129611969, "learning_rate": 3.739555880036405e-05, "loss": 0.7967, "step": 181990 }, { "epoch": 1.1627461252443683, "grad_norm": 0.6628965139389038, "learning_rate": 3.739070322948816e-05, "loss": 0.818, "step": 182000 }, { "epoch": 1.162810012394107, "grad_norm": 1.3861855268478394, "learning_rate": 3.738584778559881e-05, "loss": 0.8205, "step": 182010 }, { "epoch": 1.1628738995438457, "grad_norm": 0.8403356075286865, "learning_rate": 3.738099246874488e-05, "loss": 0.7642, "step": 182020 }, { "epoch": 1.1629377866935844, "grad_norm": 2.395003080368042, "learning_rate": 3.737613727897525e-05, "loss": 0.9817, "step": 182030 }, { "epoch": 1.163001673843323, "grad_norm": 1.15186607837677, "learning_rate": 3.7371282216338866e-05, "loss": 1.169, "step": 182040 }, { "epoch": 1.1630655609930618, "grad_norm": 0.9080342650413513, "learning_rate": 3.736642728088458e-05, "loss": 0.8127, "step": 182050 }, { "epoch": 1.1631294481428005, "grad_norm": 0.6692883372306824, "learning_rate": 3.736157247266131e-05, "loss": 0.8561, "step": 182060 }, { "epoch": 1.1631933352925392, "grad_norm": 0.8358718156814575, "learning_rate": 3.735671779171793e-05, "loss": 1.0591, "step": 182070 }, { "epoch": 1.163257222442278, "grad_norm": 1.2735133171081543, "learning_rate": 3.7352348687733605e-05, "loss": 1.0301, "step": 182080 }, { "epoch": 1.1633211095920166, "grad_norm": 0.8889757990837097, "learning_rate": 3.734749424875673e-05, "loss": 0.7145, "step": 182090 }, { "epoch": 1.1633849967417553, "grad_norm": 0.9000428915023804, "learning_rate": 3.734263993720153e-05, "loss": 0.9598, "step": 182100 }, { "epoch": 1.163448883891494, "grad_norm": 0.8241376876831055, "learning_rate": 3.733778575311691e-05, "loss": 0.8692, "step": 182110 }, { "epoch": 1.1635127710412327, "grad_norm": 1.059684157371521, "learning_rate": 3.7332931696551724e-05, "loss": 0.7761, "step": 182120 }, { "epoch": 1.1635766581909714, "grad_norm": 0.8571027517318726, "learning_rate": 3.732807776755489e-05, "loss": 0.8801, "step": 182130 }, { "epoch": 1.1636405453407102, "grad_norm": 0.9349920749664307, "learning_rate": 3.7323223966175265e-05, "loss": 0.7988, "step": 182140 }, { "epoch": 1.1637044324904489, "grad_norm": 1.583733320236206, "learning_rate": 3.731837029246174e-05, "loss": 1.1716, "step": 182150 }, { "epoch": 1.1637683196401876, "grad_norm": 0.6162463426589966, "learning_rate": 3.731351674646322e-05, "loss": 0.9512, "step": 182160 }, { "epoch": 1.1638322067899263, "grad_norm": 1.2393447160720825, "learning_rate": 3.730866332822854e-05, "loss": 0.7938, "step": 182170 }, { "epoch": 1.163896093939665, "grad_norm": 0.8612910509109497, "learning_rate": 3.7303810037806605e-05, "loss": 0.7164, "step": 182180 }, { "epoch": 1.1639599810894037, "grad_norm": 0.8981246948242188, "learning_rate": 3.729895687524629e-05, "loss": 1.391, "step": 182190 }, { "epoch": 1.1640238682391424, "grad_norm": 0.7715309858322144, "learning_rate": 3.729410384059646e-05, "loss": 0.9247, "step": 182200 }, { "epoch": 1.164087755388881, "grad_norm": 1.1804099082946777, "learning_rate": 3.7289250933906e-05, "loss": 0.7421, "step": 182210 }, { "epoch": 1.1641516425386198, "grad_norm": 1.579128623008728, "learning_rate": 3.7284398155223774e-05, "loss": 0.7398, "step": 182220 }, { "epoch": 1.1642155296883585, "grad_norm": 2.8554675579071045, "learning_rate": 3.7279545504598666e-05, "loss": 0.9659, "step": 182230 }, { "epoch": 1.1642794168380972, "grad_norm": 0.661597490310669, "learning_rate": 3.727469298207954e-05, "loss": 0.9344, "step": 182240 }, { "epoch": 1.164343303987836, "grad_norm": 0.8794891238212585, "learning_rate": 3.7269840587715264e-05, "loss": 0.8297, "step": 182250 }, { "epoch": 1.1644071911375746, "grad_norm": 0.934339165687561, "learning_rate": 3.72649883215547e-05, "loss": 0.8278, "step": 182260 }, { "epoch": 1.1644710782873133, "grad_norm": 0.8373285531997681, "learning_rate": 3.726013618364673e-05, "loss": 0.8641, "step": 182270 }, { "epoch": 1.164534965437052, "grad_norm": 1.0624688863754272, "learning_rate": 3.7255284174040204e-05, "loss": 1.1135, "step": 182280 }, { "epoch": 1.1645988525867907, "grad_norm": 1.0716806650161743, "learning_rate": 3.7250432292784e-05, "loss": 0.803, "step": 182290 }, { "epoch": 1.1646627397365295, "grad_norm": 0.7583722472190857, "learning_rate": 3.724558053992696e-05, "loss": 0.8769, "step": 182300 }, { "epoch": 1.1647266268862682, "grad_norm": 1.5714266300201416, "learning_rate": 3.724072891551797e-05, "loss": 1.0371, "step": 182310 }, { "epoch": 1.1647905140360069, "grad_norm": 2.1298489570617676, "learning_rate": 3.723587741960587e-05, "loss": 0.9073, "step": 182320 }, { "epoch": 1.1648544011857456, "grad_norm": 1.1624418497085571, "learning_rate": 3.7231026052239525e-05, "loss": 0.9129, "step": 182330 }, { "epoch": 1.1649182883354843, "grad_norm": 0.9117680191993713, "learning_rate": 3.7226174813467805e-05, "loss": 0.836, "step": 182340 }, { "epoch": 1.164982175485223, "grad_norm": 1.1857670545578003, "learning_rate": 3.722132370333954e-05, "loss": 0.6342, "step": 182350 }, { "epoch": 1.1650460626349617, "grad_norm": 1.1982570886611938, "learning_rate": 3.7216472721903604e-05, "loss": 0.8073, "step": 182360 }, { "epoch": 1.1651099497847004, "grad_norm": 0.8804404139518738, "learning_rate": 3.721162186920886e-05, "loss": 0.7809, "step": 182370 }, { "epoch": 1.165173836934439, "grad_norm": 1.8570173978805542, "learning_rate": 3.7206771145304136e-05, "loss": 0.6439, "step": 182380 }, { "epoch": 1.1652377240841778, "grad_norm": 1.0087871551513672, "learning_rate": 3.72019205502383e-05, "loss": 1.0128, "step": 182390 }, { "epoch": 1.1653016112339163, "grad_norm": 0.9948041439056396, "learning_rate": 3.7197070084060195e-05, "loss": 0.8732, "step": 182400 }, { "epoch": 1.1653654983836552, "grad_norm": 1.0306204557418823, "learning_rate": 3.719221974681867e-05, "loss": 1.0214, "step": 182410 }, { "epoch": 1.1654293855333937, "grad_norm": 0.7993441820144653, "learning_rate": 3.718736953856258e-05, "loss": 0.9127, "step": 182420 }, { "epoch": 1.1654932726831326, "grad_norm": 0.7198377251625061, "learning_rate": 3.718251945934075e-05, "loss": 0.785, "step": 182430 }, { "epoch": 1.1655571598328711, "grad_norm": 0.7696216106414795, "learning_rate": 3.717766950920204e-05, "loss": 0.8648, "step": 182440 }, { "epoch": 1.1656210469826098, "grad_norm": 2.3993189334869385, "learning_rate": 3.71728196881953e-05, "loss": 0.9621, "step": 182450 }, { "epoch": 1.1656849341323485, "grad_norm": 1.0224963426589966, "learning_rate": 3.716796999636936e-05, "loss": 1.0035, "step": 182460 }, { "epoch": 1.1657488212820872, "grad_norm": 1.0298407077789307, "learning_rate": 3.716312043377306e-05, "loss": 1.0118, "step": 182470 }, { "epoch": 1.165812708431826, "grad_norm": 0.9027889966964722, "learning_rate": 3.7158271000455236e-05, "loss": 0.9612, "step": 182480 }, { "epoch": 1.1658765955815646, "grad_norm": 0.8727611303329468, "learning_rate": 3.715342169646474e-05, "loss": 0.9595, "step": 182490 }, { "epoch": 1.1659404827313034, "grad_norm": 1.0932813882827759, "learning_rate": 3.714857252185041e-05, "loss": 0.7695, "step": 182500 }, { "epoch": 1.166004369881042, "grad_norm": 1.0781934261322021, "learning_rate": 3.714372347666106e-05, "loss": 0.8286, "step": 182510 }, { "epoch": 1.1660682570307808, "grad_norm": 1.1852980852127075, "learning_rate": 3.713887456094554e-05, "loss": 1.361, "step": 182520 }, { "epoch": 1.1661321441805195, "grad_norm": 1.407976746559143, "learning_rate": 3.713402577475268e-05, "loss": 0.9317, "step": 182530 }, { "epoch": 1.1661960313302582, "grad_norm": 1.2148253917694092, "learning_rate": 3.7129177118131315e-05, "loss": 0.9379, "step": 182540 }, { "epoch": 1.1662599184799969, "grad_norm": 0.6507695317268372, "learning_rate": 3.712432859113026e-05, "loss": 0.8821, "step": 182550 }, { "epoch": 1.1663238056297356, "grad_norm": 1.3190046548843384, "learning_rate": 3.711948019379836e-05, "loss": 0.9343, "step": 182560 }, { "epoch": 1.1663876927794743, "grad_norm": 1.001220464706421, "learning_rate": 3.711463192618444e-05, "loss": 0.9505, "step": 182570 }, { "epoch": 1.166451579929213, "grad_norm": 0.7457411289215088, "learning_rate": 3.710978378833733e-05, "loss": 0.7789, "step": 182580 }, { "epoch": 1.1665154670789517, "grad_norm": 0.7126203179359436, "learning_rate": 3.710493578030584e-05, "loss": 0.7784, "step": 182590 }, { "epoch": 1.1665793542286904, "grad_norm": 1.4537057876586914, "learning_rate": 3.71000879021388e-05, "loss": 0.8228, "step": 182600 }, { "epoch": 1.1666432413784291, "grad_norm": 1.0841671228408813, "learning_rate": 3.709524015388505e-05, "loss": 0.822, "step": 182610 }, { "epoch": 1.1667071285281678, "grad_norm": 0.9426870346069336, "learning_rate": 3.709039253559339e-05, "loss": 1.1529, "step": 182620 }, { "epoch": 1.1667710156779065, "grad_norm": 1.0441092252731323, "learning_rate": 3.708554504731264e-05, "loss": 0.922, "step": 182630 }, { "epoch": 1.1668349028276452, "grad_norm": 1.3361924886703491, "learning_rate": 3.708069768909165e-05, "loss": 0.7883, "step": 182640 }, { "epoch": 1.166898789977384, "grad_norm": 0.7366297245025635, "learning_rate": 3.707585046097918e-05, "loss": 0.8257, "step": 182650 }, { "epoch": 1.1669626771271226, "grad_norm": 0.9412416219711304, "learning_rate": 3.707100336302409e-05, "loss": 1.0877, "step": 182660 }, { "epoch": 1.1670265642768614, "grad_norm": 1.1593157052993774, "learning_rate": 3.706615639527516e-05, "loss": 0.8254, "step": 182670 }, { "epoch": 1.1670904514266, "grad_norm": 0.8784003257751465, "learning_rate": 3.706130955778124e-05, "loss": 0.741, "step": 182680 }, { "epoch": 1.1671543385763388, "grad_norm": 0.787391722202301, "learning_rate": 3.705646285059113e-05, "loss": 0.9372, "step": 182690 }, { "epoch": 1.1672182257260775, "grad_norm": 0.7201464176177979, "learning_rate": 3.705161627375363e-05, "loss": 0.8021, "step": 182700 }, { "epoch": 1.1672821128758162, "grad_norm": 2.2414963245391846, "learning_rate": 3.7046769827317565e-05, "loss": 0.776, "step": 182710 }, { "epoch": 1.1673460000255549, "grad_norm": 0.7492812275886536, "learning_rate": 3.7041923511331725e-05, "loss": 0.7326, "step": 182720 }, { "epoch": 1.1674098871752936, "grad_norm": 0.9507798552513123, "learning_rate": 3.7037077325844923e-05, "loss": 0.8424, "step": 182730 }, { "epoch": 1.1674737743250323, "grad_norm": 1.3532758951187134, "learning_rate": 3.7032231270905984e-05, "loss": 0.8017, "step": 182740 }, { "epoch": 1.167537661474771, "grad_norm": 1.3244647979736328, "learning_rate": 3.702738534656368e-05, "loss": 0.9646, "step": 182750 }, { "epoch": 1.1676015486245097, "grad_norm": 0.6468150019645691, "learning_rate": 3.702253955286683e-05, "loss": 0.8012, "step": 182760 }, { "epoch": 1.1676654357742484, "grad_norm": 0.9107818603515625, "learning_rate": 3.7017693889864236e-05, "loss": 0.795, "step": 182770 }, { "epoch": 1.1677293229239871, "grad_norm": 0.9616878032684326, "learning_rate": 3.7012848357604703e-05, "loss": 0.8296, "step": 182780 }, { "epoch": 1.1677932100737258, "grad_norm": 1.004823088645935, "learning_rate": 3.700800295613701e-05, "loss": 0.7486, "step": 182790 }, { "epoch": 1.1678570972234645, "grad_norm": 0.8042672276496887, "learning_rate": 3.700315768550997e-05, "loss": 0.787, "step": 182800 }, { "epoch": 1.1679209843732032, "grad_norm": 1.9837151765823364, "learning_rate": 3.6998312545772385e-05, "loss": 0.9188, "step": 182810 }, { "epoch": 1.167984871522942, "grad_norm": 0.9530550837516785, "learning_rate": 3.6993467536973034e-05, "loss": 0.7108, "step": 182820 }, { "epoch": 1.1680487586726807, "grad_norm": 0.8177058100700378, "learning_rate": 3.698862265916071e-05, "loss": 0.8103, "step": 182830 }, { "epoch": 1.1681126458224194, "grad_norm": 1.3955106735229492, "learning_rate": 3.698377791238422e-05, "loss": 1.0225, "step": 182840 }, { "epoch": 1.168176532972158, "grad_norm": 0.8228069543838501, "learning_rate": 3.6978933296692354e-05, "loss": 0.8187, "step": 182850 }, { "epoch": 1.1682404201218968, "grad_norm": 0.820376992225647, "learning_rate": 3.6974088812133885e-05, "loss": 1.0945, "step": 182860 }, { "epoch": 1.1683043072716355, "grad_norm": 1.0790095329284668, "learning_rate": 3.696924445875761e-05, "loss": 0.8802, "step": 182870 }, { "epoch": 1.1683681944213742, "grad_norm": 0.557696521282196, "learning_rate": 3.6964400236612306e-05, "loss": 0.9731, "step": 182880 }, { "epoch": 1.1684320815711127, "grad_norm": 1.1943516731262207, "learning_rate": 3.695955614574679e-05, "loss": 1.0052, "step": 182890 }, { "epoch": 1.1684959687208516, "grad_norm": 0.8318352699279785, "learning_rate": 3.695471218620981e-05, "loss": 0.7788, "step": 182900 }, { "epoch": 1.16855985587059, "grad_norm": 0.8753002882003784, "learning_rate": 3.6949868358050174e-05, "loss": 0.7583, "step": 182910 }, { "epoch": 1.168623743020329, "grad_norm": 1.1179732084274292, "learning_rate": 3.694502466131665e-05, "loss": 1.0665, "step": 182920 }, { "epoch": 1.1686876301700675, "grad_norm": 1.8066426515579224, "learning_rate": 3.6940181096058026e-05, "loss": 1.0061, "step": 182930 }, { "epoch": 1.1687515173198062, "grad_norm": 1.7594103813171387, "learning_rate": 3.6935337662323074e-05, "loss": 0.5617, "step": 182940 }, { "epoch": 1.168815404469545, "grad_norm": 0.6220073103904724, "learning_rate": 3.693049436016057e-05, "loss": 0.7875, "step": 182950 }, { "epoch": 1.1688792916192836, "grad_norm": 0.6708908081054688, "learning_rate": 3.692565118961931e-05, "loss": 1.0373, "step": 182960 }, { "epoch": 1.1689431787690223, "grad_norm": 0.5550050139427185, "learning_rate": 3.6920808150748035e-05, "loss": 0.881, "step": 182970 }, { "epoch": 1.169007065918761, "grad_norm": 2.151418685913086, "learning_rate": 3.6915965243595543e-05, "loss": 0.9676, "step": 182980 }, { "epoch": 1.1690709530684997, "grad_norm": 0.657231867313385, "learning_rate": 3.69111224682106e-05, "loss": 0.9571, "step": 182990 }, { "epoch": 1.1691348402182384, "grad_norm": 0.6494762897491455, "learning_rate": 3.6906279824641975e-05, "loss": 0.9356, "step": 183000 }, { "epoch": 1.1691987273679771, "grad_norm": 0.6837002038955688, "learning_rate": 3.690143731293845e-05, "loss": 0.8295, "step": 183010 }, { "epoch": 1.1692626145177158, "grad_norm": 0.991506040096283, "learning_rate": 3.689659493314877e-05, "loss": 0.933, "step": 183020 }, { "epoch": 1.1693265016674546, "grad_norm": 1.013289213180542, "learning_rate": 3.689175268532172e-05, "loss": 0.7162, "step": 183030 }, { "epoch": 1.1693903888171933, "grad_norm": 0.630157470703125, "learning_rate": 3.688691056950606e-05, "loss": 0.6827, "step": 183040 }, { "epoch": 1.169454275966932, "grad_norm": 1.3331143856048584, "learning_rate": 3.688206858575056e-05, "loss": 0.8385, "step": 183050 }, { "epoch": 1.1695181631166707, "grad_norm": 1.0507668256759644, "learning_rate": 3.687722673410398e-05, "loss": 0.9496, "step": 183060 }, { "epoch": 1.1695820502664094, "grad_norm": 0.6389208436012268, "learning_rate": 3.6872385014615074e-05, "loss": 0.9461, "step": 183070 }, { "epoch": 1.169645937416148, "grad_norm": 1.128082036972046, "learning_rate": 3.68675434273326e-05, "loss": 0.85, "step": 183080 }, { "epoch": 1.1697098245658868, "grad_norm": 1.2686810493469238, "learning_rate": 3.686270197230533e-05, "loss": 0.7047, "step": 183090 }, { "epoch": 1.1697737117156255, "grad_norm": 0.8824436664581299, "learning_rate": 3.685786064958202e-05, "loss": 0.9775, "step": 183100 }, { "epoch": 1.1698375988653642, "grad_norm": 1.6692858934402466, "learning_rate": 3.6853019459211424e-05, "loss": 0.8535, "step": 183110 }, { "epoch": 1.169901486015103, "grad_norm": 1.2711756229400635, "learning_rate": 3.6848178401242296e-05, "loss": 0.9764, "step": 183120 }, { "epoch": 1.1699653731648416, "grad_norm": 0.6435794234275818, "learning_rate": 3.6843337475723405e-05, "loss": 0.9709, "step": 183130 }, { "epoch": 1.1700292603145803, "grad_norm": 1.0502516031265259, "learning_rate": 3.683849668270347e-05, "loss": 0.7854, "step": 183140 }, { "epoch": 1.170093147464319, "grad_norm": 0.8473712205886841, "learning_rate": 3.6833656022231266e-05, "loss": 0.6378, "step": 183150 }, { "epoch": 1.1701570346140577, "grad_norm": 1.302493691444397, "learning_rate": 3.682881549435553e-05, "loss": 0.8931, "step": 183160 }, { "epoch": 1.1702209217637964, "grad_norm": 0.6629976630210876, "learning_rate": 3.682397509912502e-05, "loss": 0.9276, "step": 183170 }, { "epoch": 1.1702848089135351, "grad_norm": 1.529744029045105, "learning_rate": 3.6819134836588476e-05, "loss": 0.9018, "step": 183180 }, { "epoch": 1.1703486960632739, "grad_norm": 0.7747004628181458, "learning_rate": 3.681429470679465e-05, "loss": 0.9624, "step": 183190 }, { "epoch": 1.1704125832130126, "grad_norm": 1.035202980041504, "learning_rate": 3.6809454709792266e-05, "loss": 0.9256, "step": 183200 }, { "epoch": 1.1704764703627513, "grad_norm": 1.0493172407150269, "learning_rate": 3.6804614845630106e-05, "loss": 0.8489, "step": 183210 }, { "epoch": 1.17054035751249, "grad_norm": 0.969811737537384, "learning_rate": 3.679977511435688e-05, "loss": 1.0871, "step": 183220 }, { "epoch": 1.1706042446622287, "grad_norm": 1.3164643049240112, "learning_rate": 3.6794935516021346e-05, "loss": 0.9199, "step": 183230 }, { "epoch": 1.1706681318119674, "grad_norm": 2.7494232654571533, "learning_rate": 3.679009605067223e-05, "loss": 0.9381, "step": 183240 }, { "epoch": 1.170732018961706, "grad_norm": 1.1078988313674927, "learning_rate": 3.6785256718358276e-05, "loss": 0.8591, "step": 183250 }, { "epoch": 1.1707959061114448, "grad_norm": 0.8365240693092346, "learning_rate": 3.678041751912822e-05, "loss": 0.8841, "step": 183260 }, { "epoch": 1.1708597932611835, "grad_norm": 1.2854210138320923, "learning_rate": 3.67755784530308e-05, "loss": 1.0024, "step": 183270 }, { "epoch": 1.1709236804109222, "grad_norm": 0.6243909001350403, "learning_rate": 3.677073952011474e-05, "loss": 0.9515, "step": 183280 }, { "epoch": 1.170987567560661, "grad_norm": 0.8205077648162842, "learning_rate": 3.676590072042878e-05, "loss": 0.7729, "step": 183290 }, { "epoch": 1.1710514547103996, "grad_norm": 0.6808330416679382, "learning_rate": 3.676106205402165e-05, "loss": 0.7247, "step": 183300 }, { "epoch": 1.1711153418601383, "grad_norm": 1.3510143756866455, "learning_rate": 3.6756223520942076e-05, "loss": 0.8529, "step": 183310 }, { "epoch": 1.171179229009877, "grad_norm": 1.1353574991226196, "learning_rate": 3.6751385121238795e-05, "loss": 0.7977, "step": 183320 }, { "epoch": 1.1712431161596157, "grad_norm": 1.2666032314300537, "learning_rate": 3.674654685496052e-05, "loss": 1.217, "step": 183330 }, { "epoch": 1.1713070033093544, "grad_norm": 0.8943315148353577, "learning_rate": 3.674170872215599e-05, "loss": 1.0473, "step": 183340 }, { "epoch": 1.1713708904590932, "grad_norm": 2.6050353050231934, "learning_rate": 3.673687072287392e-05, "loss": 1.0005, "step": 183350 }, { "epoch": 1.1714347776088319, "grad_norm": 0.4434867799282074, "learning_rate": 3.6732032857163035e-05, "loss": 0.7291, "step": 183360 }, { "epoch": 1.1714986647585706, "grad_norm": 1.2048684358596802, "learning_rate": 3.672719512507206e-05, "loss": 0.6699, "step": 183370 }, { "epoch": 1.171562551908309, "grad_norm": 0.9108380079269409, "learning_rate": 3.672235752664971e-05, "loss": 0.8089, "step": 183380 }, { "epoch": 1.171626439058048, "grad_norm": 0.8623518943786621, "learning_rate": 3.671752006194471e-05, "loss": 0.7186, "step": 183390 }, { "epoch": 1.1716903262077865, "grad_norm": 0.6611031293869019, "learning_rate": 3.6712682731005774e-05, "loss": 0.9761, "step": 183400 }, { "epoch": 1.1717542133575254, "grad_norm": 0.9480814337730408, "learning_rate": 3.6707845533881605e-05, "loss": 0.7531, "step": 183410 }, { "epoch": 1.1718181005072639, "grad_norm": 0.6748718023300171, "learning_rate": 3.670300847062094e-05, "loss": 0.9506, "step": 183420 }, { "epoch": 1.1718819876570026, "grad_norm": 1.0344165563583374, "learning_rate": 3.6698171541272486e-05, "loss": 0.7988, "step": 183430 }, { "epoch": 1.1719458748067413, "grad_norm": 3.343217134475708, "learning_rate": 3.669333474588496e-05, "loss": 1.0061, "step": 183440 }, { "epoch": 1.17200976195648, "grad_norm": 0.6782224178314209, "learning_rate": 3.668849808450705e-05, "loss": 0.9939, "step": 183450 }, { "epoch": 1.1720736491062187, "grad_norm": 2.6960456371307373, "learning_rate": 3.668366155718749e-05, "loss": 0.8139, "step": 183460 }, { "epoch": 1.1721375362559574, "grad_norm": 0.6687380075454712, "learning_rate": 3.6678825163974974e-05, "loss": 0.8677, "step": 183470 }, { "epoch": 1.172201423405696, "grad_norm": 0.8012658953666687, "learning_rate": 3.667398890491821e-05, "loss": 0.862, "step": 183480 }, { "epoch": 1.1722653105554348, "grad_norm": 0.8338684439659119, "learning_rate": 3.6669152780065906e-05, "loss": 0.641, "step": 183490 }, { "epoch": 1.1723291977051735, "grad_norm": 1.8749988079071045, "learning_rate": 3.6664316789466777e-05, "loss": 1.1108, "step": 183500 }, { "epoch": 1.1723930848549122, "grad_norm": 1.0634840726852417, "learning_rate": 3.665948093316951e-05, "loss": 1.0941, "step": 183510 }, { "epoch": 1.172456972004651, "grad_norm": 0.8182593584060669, "learning_rate": 3.6654645211222806e-05, "loss": 0.8858, "step": 183520 }, { "epoch": 1.1725208591543896, "grad_norm": 0.7956867814064026, "learning_rate": 3.664980962367538e-05, "loss": 0.8432, "step": 183530 }, { "epoch": 1.1725847463041283, "grad_norm": 1.3294975757598877, "learning_rate": 3.664497417057591e-05, "loss": 1.1343, "step": 183540 }, { "epoch": 1.172648633453867, "grad_norm": 1.4603157043457031, "learning_rate": 3.6640138851973113e-05, "loss": 1.0972, "step": 183550 }, { "epoch": 1.1727125206036058, "grad_norm": 0.9470663070678711, "learning_rate": 3.663530366791567e-05, "loss": 0.9428, "step": 183560 }, { "epoch": 1.1727764077533445, "grad_norm": 0.9879772067070007, "learning_rate": 3.6630468618452284e-05, "loss": 0.7917, "step": 183570 }, { "epoch": 1.1728402949030832, "grad_norm": 0.8244022727012634, "learning_rate": 3.662563370363164e-05, "loss": 0.9972, "step": 183580 }, { "epoch": 1.1729041820528219, "grad_norm": 0.7531734108924866, "learning_rate": 3.662079892350244e-05, "loss": 0.8495, "step": 183590 }, { "epoch": 1.1729680692025606, "grad_norm": 0.9825856685638428, "learning_rate": 3.6615964278113366e-05, "loss": 1.0305, "step": 183600 }, { "epoch": 1.1730319563522993, "grad_norm": 0.920281171798706, "learning_rate": 3.6611129767513134e-05, "loss": 0.8422, "step": 183610 }, { "epoch": 1.173095843502038, "grad_norm": 0.9334577322006226, "learning_rate": 3.6606295391750375e-05, "loss": 0.8659, "step": 183620 }, { "epoch": 1.1731597306517767, "grad_norm": 0.7163191437721252, "learning_rate": 3.6601461150873825e-05, "loss": 0.9231, "step": 183630 }, { "epoch": 1.1732236178015154, "grad_norm": 0.898003876209259, "learning_rate": 3.659662704493215e-05, "loss": 0.8257, "step": 183640 }, { "epoch": 1.1732875049512541, "grad_norm": 1.5005582571029663, "learning_rate": 3.6591793073974035e-05, "loss": 0.8273, "step": 183650 }, { "epoch": 1.1733513921009928, "grad_norm": 1.2959163188934326, "learning_rate": 3.658695923804816e-05, "loss": 1.1756, "step": 183660 }, { "epoch": 1.1734152792507315, "grad_norm": 1.2547670602798462, "learning_rate": 3.6582125537203215e-05, "loss": 0.7682, "step": 183670 }, { "epoch": 1.1734791664004702, "grad_norm": 2.360750913619995, "learning_rate": 3.657729197148787e-05, "loss": 0.7794, "step": 183680 }, { "epoch": 1.173543053550209, "grad_norm": 0.991039514541626, "learning_rate": 3.657245854095081e-05, "loss": 0.8745, "step": 183690 }, { "epoch": 1.1736069406999476, "grad_norm": 1.098197340965271, "learning_rate": 3.656762524564071e-05, "loss": 0.926, "step": 183700 }, { "epoch": 1.1736708278496863, "grad_norm": 0.8927314281463623, "learning_rate": 3.656279208560624e-05, "loss": 0.7737, "step": 183710 }, { "epoch": 1.173734714999425, "grad_norm": 1.753037691116333, "learning_rate": 3.655795906089608e-05, "loss": 0.8657, "step": 183720 }, { "epoch": 1.1737986021491638, "grad_norm": 1.0958718061447144, "learning_rate": 3.655312617155889e-05, "loss": 0.8419, "step": 183730 }, { "epoch": 1.1738624892989025, "grad_norm": 0.8453962206840515, "learning_rate": 3.654829341764336e-05, "loss": 0.6998, "step": 183740 }, { "epoch": 1.1739263764486412, "grad_norm": 0.9455485939979553, "learning_rate": 3.654346079919816e-05, "loss": 0.7942, "step": 183750 }, { "epoch": 1.1739902635983799, "grad_norm": 1.053681492805481, "learning_rate": 3.653862831627195e-05, "loss": 1.0206, "step": 183760 }, { "epoch": 1.1740541507481186, "grad_norm": 1.9257044792175293, "learning_rate": 3.65337959689134e-05, "loss": 0.8839, "step": 183770 }, { "epoch": 1.1741180378978573, "grad_norm": 0.8294272422790527, "learning_rate": 3.6528963757171175e-05, "loss": 0.8785, "step": 183780 }, { "epoch": 1.174181925047596, "grad_norm": 0.706585168838501, "learning_rate": 3.652413168109393e-05, "loss": 0.6012, "step": 183790 }, { "epoch": 1.1742458121973347, "grad_norm": 0.9635806083679199, "learning_rate": 3.6519299740730345e-05, "loss": 1.0587, "step": 183800 }, { "epoch": 1.1743096993470734, "grad_norm": 1.0544793605804443, "learning_rate": 3.651446793612907e-05, "loss": 0.8475, "step": 183810 }, { "epoch": 1.1743735864968121, "grad_norm": 0.8509196639060974, "learning_rate": 3.6509636267338776e-05, "loss": 0.9506, "step": 183820 }, { "epoch": 1.1744374736465508, "grad_norm": 0.7948233485221863, "learning_rate": 3.650480473440811e-05, "loss": 0.6293, "step": 183830 }, { "epoch": 1.1745013607962895, "grad_norm": 0.9166902899742126, "learning_rate": 3.649997333738574e-05, "loss": 0.8128, "step": 183840 }, { "epoch": 1.174565247946028, "grad_norm": 1.2377371788024902, "learning_rate": 3.649514207632031e-05, "loss": 0.887, "step": 183850 }, { "epoch": 1.174629135095767, "grad_norm": 1.338071346282959, "learning_rate": 3.6490310951260486e-05, "loss": 1.0392, "step": 183860 }, { "epoch": 1.1746930222455054, "grad_norm": 1.4467830657958984, "learning_rate": 3.648547996225492e-05, "loss": 1.032, "step": 183870 }, { "epoch": 1.1747569093952444, "grad_norm": 0.8257285952568054, "learning_rate": 3.6480649109352264e-05, "loss": 0.833, "step": 183880 }, { "epoch": 1.1748207965449828, "grad_norm": 1.4109437465667725, "learning_rate": 3.6475818392601163e-05, "loss": 0.847, "step": 183890 }, { "epoch": 1.1748846836947218, "grad_norm": 0.6960985660552979, "learning_rate": 3.647098781205027e-05, "loss": 1.0269, "step": 183900 }, { "epoch": 1.1749485708444602, "grad_norm": 0.9446884989738464, "learning_rate": 3.646615736774824e-05, "loss": 0.8225, "step": 183910 }, { "epoch": 1.175012457994199, "grad_norm": 1.0841772556304932, "learning_rate": 3.646132705974371e-05, "loss": 0.9256, "step": 183920 }, { "epoch": 1.1750763451439377, "grad_norm": 0.8433025479316711, "learning_rate": 3.645649688808532e-05, "loss": 0.7972, "step": 183930 }, { "epoch": 1.1751402322936764, "grad_norm": 0.9971235990524292, "learning_rate": 3.645166685282173e-05, "loss": 0.8408, "step": 183940 }, { "epoch": 1.175204119443415, "grad_norm": 0.9026851058006287, "learning_rate": 3.6446836954001584e-05, "loss": 1.3246, "step": 183950 }, { "epoch": 1.1752680065931538, "grad_norm": 1.0484285354614258, "learning_rate": 3.6442007191673514e-05, "loss": 0.8291, "step": 183960 }, { "epoch": 1.1753318937428925, "grad_norm": 0.8531259894371033, "learning_rate": 3.643717756588615e-05, "loss": 0.7325, "step": 183970 }, { "epoch": 1.1753957808926312, "grad_norm": 0.6556183099746704, "learning_rate": 3.643234807668815e-05, "loss": 0.8424, "step": 183980 }, { "epoch": 1.17545966804237, "grad_norm": 1.1358847618103027, "learning_rate": 3.642751872412814e-05, "loss": 0.7372, "step": 183990 }, { "epoch": 1.1755235551921086, "grad_norm": 0.8008434176445007, "learning_rate": 3.642268950825476e-05, "loss": 0.7901, "step": 184000 }, { "epoch": 1.1755874423418473, "grad_norm": 1.2002679109573364, "learning_rate": 3.6417860429116635e-05, "loss": 1.1483, "step": 184010 }, { "epoch": 1.175651329491586, "grad_norm": 0.8582051992416382, "learning_rate": 3.6413031486762415e-05, "loss": 0.7933, "step": 184020 }, { "epoch": 1.1757152166413247, "grad_norm": 0.6789759993553162, "learning_rate": 3.640820268124072e-05, "loss": 0.8829, "step": 184030 }, { "epoch": 1.1757791037910634, "grad_norm": 0.9038664698600769, "learning_rate": 3.6403374012600184e-05, "loss": 0.7159, "step": 184040 }, { "epoch": 1.1758429909408021, "grad_norm": 0.4721551239490509, "learning_rate": 3.6398545480889434e-05, "loss": 0.8584, "step": 184050 }, { "epoch": 1.1759068780905408, "grad_norm": 1.1332751512527466, "learning_rate": 3.63937170861571e-05, "loss": 0.709, "step": 184060 }, { "epoch": 1.1759707652402795, "grad_norm": 1.0257160663604736, "learning_rate": 3.6388888828451796e-05, "loss": 1.1302, "step": 184070 }, { "epoch": 1.1760346523900183, "grad_norm": 0.7956812381744385, "learning_rate": 3.638406070782217e-05, "loss": 1.0162, "step": 184080 }, { "epoch": 1.176098539539757, "grad_norm": 1.498618245124817, "learning_rate": 3.637923272431682e-05, "loss": 0.7363, "step": 184090 }, { "epoch": 1.1761624266894957, "grad_norm": 0.9680002331733704, "learning_rate": 3.63744048779844e-05, "loss": 1.1146, "step": 184100 }, { "epoch": 1.1762263138392344, "grad_norm": 0.9070209860801697, "learning_rate": 3.636957716887349e-05, "loss": 0.5749, "step": 184110 }, { "epoch": 1.176290200988973, "grad_norm": 0.7833470702171326, "learning_rate": 3.636474959703274e-05, "loss": 1.0808, "step": 184120 }, { "epoch": 1.1763540881387118, "grad_norm": 0.7628843188285828, "learning_rate": 3.635992216251075e-05, "loss": 0.7796, "step": 184130 }, { "epoch": 1.1764179752884505, "grad_norm": 1.3704489469528198, "learning_rate": 3.635509486535615e-05, "loss": 0.9249, "step": 184140 }, { "epoch": 1.1764818624381892, "grad_norm": 1.166788101196289, "learning_rate": 3.6350267705617544e-05, "loss": 0.7454, "step": 184150 }, { "epoch": 1.176545749587928, "grad_norm": 1.0495282411575317, "learning_rate": 3.6345440683343555e-05, "loss": 1.0876, "step": 184160 }, { "epoch": 1.1766096367376666, "grad_norm": 1.9336224794387817, "learning_rate": 3.6340613798582796e-05, "loss": 0.7674, "step": 184170 }, { "epoch": 1.1766735238874053, "grad_norm": 1.1545357704162598, "learning_rate": 3.633578705138386e-05, "loss": 1.0439, "step": 184180 }, { "epoch": 1.176737411037144, "grad_norm": 0.904642641544342, "learning_rate": 3.633096044179538e-05, "loss": 0.959, "step": 184190 }, { "epoch": 1.1768012981868827, "grad_norm": 1.4052455425262451, "learning_rate": 3.632613396986595e-05, "loss": 1.0384, "step": 184200 }, { "epoch": 1.1768651853366214, "grad_norm": 0.9831133484840393, "learning_rate": 3.6321307635644186e-05, "loss": 0.9644, "step": 184210 }, { "epoch": 1.1769290724863601, "grad_norm": 0.7129910588264465, "learning_rate": 3.631648143917869e-05, "loss": 0.7711, "step": 184220 }, { "epoch": 1.1769929596360988, "grad_norm": 1.238707423210144, "learning_rate": 3.631165538051805e-05, "loss": 0.8986, "step": 184230 }, { "epoch": 1.1770568467858376, "grad_norm": 3.398481607437134, "learning_rate": 3.630682945971089e-05, "loss": 0.7843, "step": 184240 }, { "epoch": 1.1771207339355763, "grad_norm": 0.990300714969635, "learning_rate": 3.630200367680581e-05, "loss": 0.9905, "step": 184250 }, { "epoch": 1.177184621085315, "grad_norm": 0.805274248123169, "learning_rate": 3.6297178031851375e-05, "loss": 0.852, "step": 184260 }, { "epoch": 1.1772485082350537, "grad_norm": 1.4010913372039795, "learning_rate": 3.629235252489624e-05, "loss": 0.8368, "step": 184270 }, { "epoch": 1.1773123953847924, "grad_norm": 0.9194250106811523, "learning_rate": 3.6287527155988966e-05, "loss": 0.792, "step": 184280 }, { "epoch": 1.177376282534531, "grad_norm": 1.0088683366775513, "learning_rate": 3.628270192517816e-05, "loss": 1.0042, "step": 184290 }, { "epoch": 1.1774401696842698, "grad_norm": 0.8841618895530701, "learning_rate": 3.6277876832512405e-05, "loss": 0.9673, "step": 184300 }, { "epoch": 1.1775040568340085, "grad_norm": 0.8211191892623901, "learning_rate": 3.627305187804031e-05, "loss": 1.0045, "step": 184310 }, { "epoch": 1.1775679439837472, "grad_norm": 1.2937853336334229, "learning_rate": 3.6268227061810454e-05, "loss": 0.8511, "step": 184320 }, { "epoch": 1.177631831133486, "grad_norm": 1.1388708353042603, "learning_rate": 3.626340238387144e-05, "loss": 0.9739, "step": 184330 }, { "epoch": 1.1776957182832244, "grad_norm": 0.8496128916740417, "learning_rate": 3.625857784427183e-05, "loss": 1.0829, "step": 184340 }, { "epoch": 1.1777596054329633, "grad_norm": 0.6666727066040039, "learning_rate": 3.625375344306025e-05, "loss": 1.1618, "step": 184350 }, { "epoch": 1.1778234925827018, "grad_norm": 0.7936133146286011, "learning_rate": 3.6248929180285254e-05, "loss": 0.7061, "step": 184360 }, { "epoch": 1.1778873797324407, "grad_norm": 0.8399503827095032, "learning_rate": 3.624410505599544e-05, "loss": 0.7688, "step": 184370 }, { "epoch": 1.1779512668821792, "grad_norm": 0.7703272104263306, "learning_rate": 3.623928107023938e-05, "loss": 1.1013, "step": 184380 }, { "epoch": 1.1780151540319181, "grad_norm": 1.9948519468307495, "learning_rate": 3.623445722306567e-05, "loss": 0.8829, "step": 184390 }, { "epoch": 1.1780790411816566, "grad_norm": 0.795413076877594, "learning_rate": 3.6229633514522886e-05, "loss": 0.9189, "step": 184400 }, { "epoch": 1.1781429283313953, "grad_norm": 1.251691460609436, "learning_rate": 3.6224809944659604e-05, "loss": 0.9461, "step": 184410 }, { "epoch": 1.178206815481134, "grad_norm": 0.6770833134651184, "learning_rate": 3.621998651352441e-05, "loss": 0.9295, "step": 184420 }, { "epoch": 1.1782707026308727, "grad_norm": 1.9652451276779175, "learning_rate": 3.621516322116586e-05, "loss": 0.8064, "step": 184430 }, { "epoch": 1.1783345897806115, "grad_norm": 0.9045741558074951, "learning_rate": 3.6210340067632556e-05, "loss": 0.9916, "step": 184440 }, { "epoch": 1.1783984769303502, "grad_norm": 0.6965897083282471, "learning_rate": 3.6205517052973045e-05, "loss": 0.901, "step": 184450 }, { "epoch": 1.1784623640800889, "grad_norm": 0.9617449641227722, "learning_rate": 3.62006941772359e-05, "loss": 0.8419, "step": 184460 }, { "epoch": 1.1785262512298276, "grad_norm": 0.9782170057296753, "learning_rate": 3.6195871440469734e-05, "loss": 1.013, "step": 184470 }, { "epoch": 1.1785901383795663, "grad_norm": 1.2114006280899048, "learning_rate": 3.6191048842723065e-05, "loss": 1.045, "step": 184480 }, { "epoch": 1.178654025529305, "grad_norm": 0.7640635371208191, "learning_rate": 3.618622638404449e-05, "loss": 0.7731, "step": 184490 }, { "epoch": 1.1787179126790437, "grad_norm": 0.8108700513839722, "learning_rate": 3.618140406448256e-05, "loss": 1.1081, "step": 184500 }, { "epoch": 1.1787817998287824, "grad_norm": 1.1722303628921509, "learning_rate": 3.6176581884085844e-05, "loss": 0.6224, "step": 184510 }, { "epoch": 1.178845686978521, "grad_norm": 0.7348328828811646, "learning_rate": 3.6171759842902916e-05, "loss": 1.0012, "step": 184520 }, { "epoch": 1.1789095741282598, "grad_norm": 1.2766432762145996, "learning_rate": 3.616693794098233e-05, "loss": 1.2396, "step": 184530 }, { "epoch": 1.1789734612779985, "grad_norm": 2.398635149002075, "learning_rate": 3.616211617837264e-05, "loss": 0.5746, "step": 184540 }, { "epoch": 1.1790373484277372, "grad_norm": 0.6826220154762268, "learning_rate": 3.6157294555122406e-05, "loss": 0.8408, "step": 184550 }, { "epoch": 1.179101235577476, "grad_norm": 1.0390657186508179, "learning_rate": 3.6152473071280204e-05, "loss": 0.6888, "step": 184560 }, { "epoch": 1.1791651227272146, "grad_norm": 2.681307792663574, "learning_rate": 3.614765172689456e-05, "loss": 0.7737, "step": 184570 }, { "epoch": 1.1792290098769533, "grad_norm": 0.9121163487434387, "learning_rate": 3.614283052201408e-05, "loss": 0.6505, "step": 184580 }, { "epoch": 1.179292897026692, "grad_norm": 0.9245350956916809, "learning_rate": 3.613800945668726e-05, "loss": 0.8225, "step": 184590 }, { "epoch": 1.1793567841764308, "grad_norm": 0.9262069463729858, "learning_rate": 3.613318853096268e-05, "loss": 0.9348, "step": 184600 }, { "epoch": 1.1794206713261695, "grad_norm": 0.5917856693267822, "learning_rate": 3.612836774488889e-05, "loss": 1.1775, "step": 184610 }, { "epoch": 1.1794845584759082, "grad_norm": 0.7416051030158997, "learning_rate": 3.612354709851444e-05, "loss": 0.8081, "step": 184620 }, { "epoch": 1.1795484456256469, "grad_norm": 1.2204169034957886, "learning_rate": 3.611872659188787e-05, "loss": 0.9868, "step": 184630 }, { "epoch": 1.1796123327753856, "grad_norm": 1.0252538919448853, "learning_rate": 3.6113906225057735e-05, "loss": 1.1028, "step": 184640 }, { "epoch": 1.1796762199251243, "grad_norm": 0.9787299633026123, "learning_rate": 3.610908599807258e-05, "loss": 0.9507, "step": 184650 }, { "epoch": 1.179740107074863, "grad_norm": 1.029250979423523, "learning_rate": 3.6104265910980936e-05, "loss": 0.8878, "step": 184660 }, { "epoch": 1.1798039942246017, "grad_norm": 2.0720760822296143, "learning_rate": 3.609944596383137e-05, "loss": 0.8151, "step": 184670 }, { "epoch": 1.1798678813743404, "grad_norm": 1.1255959272384644, "learning_rate": 3.6094626156672394e-05, "loss": 0.7925, "step": 184680 }, { "epoch": 1.179931768524079, "grad_norm": 0.9386995434761047, "learning_rate": 3.6089806489552576e-05, "loss": 1.0739, "step": 184690 }, { "epoch": 1.1799956556738178, "grad_norm": 1.8800048828125, "learning_rate": 3.6084986962520434e-05, "loss": 0.9886, "step": 184700 }, { "epoch": 1.1800595428235565, "grad_norm": 0.7651383280754089, "learning_rate": 3.608016757562451e-05, "loss": 0.859, "step": 184710 }, { "epoch": 1.1801234299732952, "grad_norm": 0.9939292073249817, "learning_rate": 3.6075348328913344e-05, "loss": 0.9842, "step": 184720 }, { "epoch": 1.180187317123034, "grad_norm": 0.6939488649368286, "learning_rate": 3.6070529222435466e-05, "loss": 1.0664, "step": 184730 }, { "epoch": 1.1802512042727726, "grad_norm": 0.9182581305503845, "learning_rate": 3.606571025623941e-05, "loss": 0.9928, "step": 184740 }, { "epoch": 1.1803150914225113, "grad_norm": 1.1776940822601318, "learning_rate": 3.606089143037371e-05, "loss": 0.8528, "step": 184750 }, { "epoch": 1.18037897857225, "grad_norm": 0.787538468837738, "learning_rate": 3.6056072744886885e-05, "loss": 0.8125, "step": 184760 }, { "epoch": 1.1804428657219888, "grad_norm": 1.208063006401062, "learning_rate": 3.605125419982748e-05, "loss": 0.7899, "step": 184770 }, { "epoch": 1.1805067528717275, "grad_norm": 0.7524814605712891, "learning_rate": 3.6046435795243994e-05, "loss": 0.7772, "step": 184780 }, { "epoch": 1.1805706400214662, "grad_norm": 0.9039172530174255, "learning_rate": 3.604161753118498e-05, "loss": 0.8546, "step": 184790 }, { "epoch": 1.1806345271712049, "grad_norm": 0.9902855753898621, "learning_rate": 3.6036799407698964e-05, "loss": 0.7876, "step": 184800 }, { "epoch": 1.1806984143209436, "grad_norm": 0.6805626749992371, "learning_rate": 3.603198142483445e-05, "loss": 0.6933, "step": 184810 }, { "epoch": 1.1807623014706823, "grad_norm": 1.1463366746902466, "learning_rate": 3.6027163582639966e-05, "loss": 0.7168, "step": 184820 }, { "epoch": 1.1808261886204208, "grad_norm": 1.0004279613494873, "learning_rate": 3.602234588116403e-05, "loss": 0.7084, "step": 184830 }, { "epoch": 1.1808900757701597, "grad_norm": 0.9166737198829651, "learning_rate": 3.601752832045517e-05, "loss": 0.6373, "step": 184840 }, { "epoch": 1.1809539629198982, "grad_norm": 1.7508405447006226, "learning_rate": 3.6012710900561895e-05, "loss": 0.673, "step": 184850 }, { "epoch": 1.181017850069637, "grad_norm": 2.30367374420166, "learning_rate": 3.6007893621532725e-05, "loss": 0.8872, "step": 184860 }, { "epoch": 1.1810817372193756, "grad_norm": 0.9799571633338928, "learning_rate": 3.6003076483416166e-05, "loss": 1.0567, "step": 184870 }, { "epoch": 1.1811456243691143, "grad_norm": 1.3801004886627197, "learning_rate": 3.5998259486260736e-05, "loss": 0.9645, "step": 184880 }, { "epoch": 1.181209511518853, "grad_norm": 3.1836953163146973, "learning_rate": 3.5993442630114946e-05, "loss": 0.7476, "step": 184890 }, { "epoch": 1.1812733986685917, "grad_norm": 1.1182852983474731, "learning_rate": 3.5988625915027316e-05, "loss": 0.7176, "step": 184900 }, { "epoch": 1.1813372858183304, "grad_norm": 1.3729959726333618, "learning_rate": 3.5983809341046334e-05, "loss": 0.769, "step": 184910 }, { "epoch": 1.1814011729680691, "grad_norm": 1.1916794776916504, "learning_rate": 3.597899290822052e-05, "loss": 0.6671, "step": 184920 }, { "epoch": 1.1814650601178078, "grad_norm": 0.9152897000312805, "learning_rate": 3.597417661659838e-05, "loss": 0.7897, "step": 184930 }, { "epoch": 1.1815289472675465, "grad_norm": 0.7210372686386108, "learning_rate": 3.596936046622841e-05, "loss": 0.7957, "step": 184940 }, { "epoch": 1.1815928344172852, "grad_norm": 1.240045189857483, "learning_rate": 3.596454445715912e-05, "loss": 0.872, "step": 184950 }, { "epoch": 1.181656721567024, "grad_norm": 0.7053720355033875, "learning_rate": 3.595972858943901e-05, "loss": 0.8205, "step": 184960 }, { "epoch": 1.1817206087167627, "grad_norm": 0.6720781326293945, "learning_rate": 3.595491286311659e-05, "loss": 1.0007, "step": 184970 }, { "epoch": 1.1817844958665014, "grad_norm": 0.8549431562423706, "learning_rate": 3.595009727824033e-05, "loss": 0.848, "step": 184980 }, { "epoch": 1.18184838301624, "grad_norm": 1.6900193691253662, "learning_rate": 3.5945281834858744e-05, "loss": 1.2726, "step": 184990 }, { "epoch": 1.1819122701659788, "grad_norm": 0.8604527115821838, "learning_rate": 3.5940466533020344e-05, "loss": 0.8544, "step": 185000 }, { "epoch": 1.1819761573157175, "grad_norm": 1.0963983535766602, "learning_rate": 3.5935651372773604e-05, "loss": 0.7911, "step": 185010 }, { "epoch": 1.1820400444654562, "grad_norm": 1.3671964406967163, "learning_rate": 3.5930836354167017e-05, "loss": 0.7819, "step": 185020 }, { "epoch": 1.182103931615195, "grad_norm": 0.5053865909576416, "learning_rate": 3.592602147724909e-05, "loss": 0.839, "step": 185030 }, { "epoch": 1.1821678187649336, "grad_norm": 0.7368679046630859, "learning_rate": 3.59212067420683e-05, "loss": 0.8822, "step": 185040 }, { "epoch": 1.1822317059146723, "grad_norm": 1.082725167274475, "learning_rate": 3.591639214867313e-05, "loss": 0.813, "step": 185050 }, { "epoch": 1.182295593064411, "grad_norm": 1.574652075767517, "learning_rate": 3.591157769711209e-05, "loss": 0.8479, "step": 185060 }, { "epoch": 1.1823594802141497, "grad_norm": 0.8046184778213501, "learning_rate": 3.5906763387433655e-05, "loss": 0.8053, "step": 185070 }, { "epoch": 1.1824233673638884, "grad_norm": 1.8920392990112305, "learning_rate": 3.590194921968629e-05, "loss": 0.847, "step": 185080 }, { "epoch": 1.1824872545136271, "grad_norm": 2.120652675628662, "learning_rate": 3.58971351939185e-05, "loss": 0.938, "step": 185090 }, { "epoch": 1.1825511416633658, "grad_norm": 0.6371691226959229, "learning_rate": 3.589232131017875e-05, "loss": 0.849, "step": 185100 }, { "epoch": 1.1826150288131045, "grad_norm": 0.5732691287994385, "learning_rate": 3.588750756851552e-05, "loss": 0.9148, "step": 185110 }, { "epoch": 1.1826789159628432, "grad_norm": 0.6342383623123169, "learning_rate": 3.5882693968977315e-05, "loss": 0.7332, "step": 185120 }, { "epoch": 1.182742803112582, "grad_norm": 0.8432683348655701, "learning_rate": 3.587788051161259e-05, "loss": 0.7871, "step": 185130 }, { "epoch": 1.1828066902623207, "grad_norm": 0.6797798871994019, "learning_rate": 3.5873067196469824e-05, "loss": 0.7023, "step": 185140 }, { "epoch": 1.1828705774120594, "grad_norm": 0.8828827142715454, "learning_rate": 3.5868254023597495e-05, "loss": 0.9116, "step": 185150 }, { "epoch": 1.182934464561798, "grad_norm": 0.8826804161071777, "learning_rate": 3.5863440993044076e-05, "loss": 1.0683, "step": 185160 }, { "epoch": 1.1829983517115368, "grad_norm": 0.9741883277893066, "learning_rate": 3.5858628104858036e-05, "loss": 0.9385, "step": 185170 }, { "epoch": 1.1830622388612755, "grad_norm": 0.9632142782211304, "learning_rate": 3.585381535908784e-05, "loss": 0.721, "step": 185180 }, { "epoch": 1.1831261260110142, "grad_norm": 1.2238630056381226, "learning_rate": 3.5849002755781967e-05, "loss": 0.8154, "step": 185190 }, { "epoch": 1.183190013160753, "grad_norm": 0.9553030133247375, "learning_rate": 3.584419029498888e-05, "loss": 0.819, "step": 185200 }, { "epoch": 1.1832539003104916, "grad_norm": 1.1240880489349365, "learning_rate": 3.583937797675704e-05, "loss": 0.8524, "step": 185210 }, { "epoch": 1.1833177874602303, "grad_norm": 1.025412678718567, "learning_rate": 3.583456580113491e-05, "loss": 1.0, "step": 185220 }, { "epoch": 1.183381674609969, "grad_norm": 2.941777229309082, "learning_rate": 3.582975376817096e-05, "loss": 0.9068, "step": 185230 }, { "epoch": 1.1834455617597077, "grad_norm": 0.9563724398612976, "learning_rate": 3.5824941877913656e-05, "loss": 0.9853, "step": 185240 }, { "epoch": 1.1835094489094464, "grad_norm": 0.7037461996078491, "learning_rate": 3.582013013041144e-05, "loss": 0.8759, "step": 185250 }, { "epoch": 1.1835733360591851, "grad_norm": 0.9281525015830994, "learning_rate": 3.581531852571278e-05, "loss": 0.629, "step": 185260 }, { "epoch": 1.1836372232089238, "grad_norm": 1.0594696998596191, "learning_rate": 3.5810507063866147e-05, "loss": 0.7937, "step": 185270 }, { "epoch": 1.1837011103586625, "grad_norm": 1.3226358890533447, "learning_rate": 3.5805695744919976e-05, "loss": 0.8282, "step": 185280 }, { "epoch": 1.1837649975084013, "grad_norm": 0.8504054546356201, "learning_rate": 3.5800884568922724e-05, "loss": 0.8351, "step": 185290 }, { "epoch": 1.18382888465814, "grad_norm": 1.078147292137146, "learning_rate": 3.5796073535922856e-05, "loss": 1.1989, "step": 185300 }, { "epoch": 1.1838927718078787, "grad_norm": 1.0209671258926392, "learning_rate": 3.57912626459688e-05, "loss": 0.7663, "step": 185310 }, { "epoch": 1.1839566589576171, "grad_norm": 1.1877695322036743, "learning_rate": 3.578645189910903e-05, "loss": 0.8764, "step": 185320 }, { "epoch": 1.184020546107356, "grad_norm": 1.095737099647522, "learning_rate": 3.5781641295391995e-05, "loss": 0.9265, "step": 185330 }, { "epoch": 1.1840844332570946, "grad_norm": 0.9853129982948303, "learning_rate": 3.577683083486613e-05, "loss": 1.2863, "step": 185340 }, { "epoch": 1.1841483204068335, "grad_norm": 0.9213379621505737, "learning_rate": 3.577202051757987e-05, "loss": 0.9187, "step": 185350 }, { "epoch": 1.184212207556572, "grad_norm": 0.8555194735527039, "learning_rate": 3.576721034358169e-05, "loss": 0.7985, "step": 185360 }, { "epoch": 1.1842760947063107, "grad_norm": 1.0572147369384766, "learning_rate": 3.576240031292001e-05, "loss": 0.9271, "step": 185370 }, { "epoch": 1.1843399818560494, "grad_norm": 0.991568922996521, "learning_rate": 3.5757590425643276e-05, "loss": 0.894, "step": 185380 }, { "epoch": 1.184403869005788, "grad_norm": 0.6871430277824402, "learning_rate": 3.575278068179992e-05, "loss": 0.9197, "step": 185390 }, { "epoch": 1.1844677561555268, "grad_norm": 0.7673249244689941, "learning_rate": 3.5747971081438394e-05, "loss": 0.7462, "step": 185400 }, { "epoch": 1.1845316433052655, "grad_norm": 1.055765986442566, "learning_rate": 3.574316162460713e-05, "loss": 0.785, "step": 185410 }, { "epoch": 1.1845955304550042, "grad_norm": 1.6008297204971313, "learning_rate": 3.5738352311354565e-05, "loss": 0.8627, "step": 185420 }, { "epoch": 1.184659417604743, "grad_norm": 1.0407480001449585, "learning_rate": 3.573354314172912e-05, "loss": 0.8712, "step": 185430 }, { "epoch": 1.1847233047544816, "grad_norm": 0.9765558242797852, "learning_rate": 3.572873411577925e-05, "loss": 0.9753, "step": 185440 }, { "epoch": 1.1847871919042203, "grad_norm": 0.7692517042160034, "learning_rate": 3.572392523355337e-05, "loss": 0.8015, "step": 185450 }, { "epoch": 1.184851079053959, "grad_norm": 1.113476276397705, "learning_rate": 3.571911649509991e-05, "loss": 0.8312, "step": 185460 }, { "epoch": 1.1849149662036977, "grad_norm": 0.9064470529556274, "learning_rate": 3.5714307900467306e-05, "loss": 0.8722, "step": 185470 }, { "epoch": 1.1849788533534364, "grad_norm": 0.764999508857727, "learning_rate": 3.570949944970397e-05, "loss": 0.7575, "step": 185480 }, { "epoch": 1.1850427405031752, "grad_norm": 0.7299469709396362, "learning_rate": 3.570469114285835e-05, "loss": 1.0981, "step": 185490 }, { "epoch": 1.1851066276529139, "grad_norm": 0.8814761638641357, "learning_rate": 3.569988297997885e-05, "loss": 1.0155, "step": 185500 }, { "epoch": 1.1851705148026526, "grad_norm": 0.8254292607307434, "learning_rate": 3.5695074961113905e-05, "loss": 0.9247, "step": 185510 }, { "epoch": 1.1852344019523913, "grad_norm": 1.3654801845550537, "learning_rate": 3.5690267086311915e-05, "loss": 0.7811, "step": 185520 }, { "epoch": 1.18529828910213, "grad_norm": 0.8215779066085815, "learning_rate": 3.568545935562133e-05, "loss": 0.9981, "step": 185530 }, { "epoch": 1.1853621762518687, "grad_norm": 0.9064726233482361, "learning_rate": 3.568065176909055e-05, "loss": 0.9709, "step": 185540 }, { "epoch": 1.1854260634016074, "grad_norm": 0.8759022355079651, "learning_rate": 3.567584432676799e-05, "loss": 0.9627, "step": 185550 }, { "epoch": 1.185489950551346, "grad_norm": 0.8267679810523987, "learning_rate": 3.5671037028702103e-05, "loss": 0.6302, "step": 185560 }, { "epoch": 1.1855538377010848, "grad_norm": 1.9346404075622559, "learning_rate": 3.566622987494124e-05, "loss": 1.0868, "step": 185570 }, { "epoch": 1.1856177248508235, "grad_norm": 0.7702456712722778, "learning_rate": 3.5661422865533845e-05, "loss": 0.9252, "step": 185580 }, { "epoch": 1.1856816120005622, "grad_norm": 1.2653367519378662, "learning_rate": 3.565661600052833e-05, "loss": 0.7676, "step": 185590 }, { "epoch": 1.185745499150301, "grad_norm": 1.1973912715911865, "learning_rate": 3.5651809279973094e-05, "loss": 1.0397, "step": 185600 }, { "epoch": 1.1858093863000396, "grad_norm": 0.9906696081161499, "learning_rate": 3.564700270391655e-05, "loss": 0.6995, "step": 185610 }, { "epoch": 1.1858732734497783, "grad_norm": 0.7793400287628174, "learning_rate": 3.5642196272407115e-05, "loss": 0.9983, "step": 185620 }, { "epoch": 1.185937160599517, "grad_norm": 0.97843337059021, "learning_rate": 3.5637389985493176e-05, "loss": 0.9133, "step": 185630 }, { "epoch": 1.1860010477492557, "grad_norm": 0.5301520824432373, "learning_rate": 3.5632583843223144e-05, "loss": 0.993, "step": 185640 }, { "epoch": 1.1860649348989944, "grad_norm": 0.8656491637229919, "learning_rate": 3.562777784564543e-05, "loss": 0.8884, "step": 185650 }, { "epoch": 1.1861288220487332, "grad_norm": 0.8169345855712891, "learning_rate": 3.5622971992808424e-05, "loss": 0.8378, "step": 185660 }, { "epoch": 1.1861927091984719, "grad_norm": 0.7268931865692139, "learning_rate": 3.561816628476053e-05, "loss": 0.9106, "step": 185670 }, { "epoch": 1.1862565963482106, "grad_norm": 0.6114735007286072, "learning_rate": 3.5613360721550136e-05, "loss": 0.6921, "step": 185680 }, { "epoch": 1.1863204834979493, "grad_norm": 1.0138870477676392, "learning_rate": 3.560855530322565e-05, "loss": 1.2098, "step": 185690 }, { "epoch": 1.186384370647688, "grad_norm": 2.26399564743042, "learning_rate": 3.560375002983547e-05, "loss": 0.9302, "step": 185700 }, { "epoch": 1.1864482577974267, "grad_norm": 1.1107441186904907, "learning_rate": 3.5598944901427976e-05, "loss": 1.0999, "step": 185710 }, { "epoch": 1.1865121449471654, "grad_norm": 0.8785502910614014, "learning_rate": 3.559413991805157e-05, "loss": 0.927, "step": 185720 }, { "epoch": 1.186576032096904, "grad_norm": 1.5931460857391357, "learning_rate": 3.558933507975463e-05, "loss": 0.8109, "step": 185730 }, { "epoch": 1.1866399192466428, "grad_norm": 0.8328737020492554, "learning_rate": 3.558453038658556e-05, "loss": 0.9513, "step": 185740 }, { "epoch": 1.1867038063963815, "grad_norm": 0.948833167552948, "learning_rate": 3.5579725838592734e-05, "loss": 0.804, "step": 185750 }, { "epoch": 1.1867676935461202, "grad_norm": 1.2792750597000122, "learning_rate": 3.557492143582454e-05, "loss": 0.9218, "step": 185760 }, { "epoch": 1.186831580695859, "grad_norm": 0.9221693873405457, "learning_rate": 3.557011717832938e-05, "loss": 0.8005, "step": 185770 }, { "epoch": 1.1868954678455976, "grad_norm": 1.035339117050171, "learning_rate": 3.5565313066155616e-05, "loss": 0.9736, "step": 185780 }, { "epoch": 1.1869593549953361, "grad_norm": 1.0393927097320557, "learning_rate": 3.5560509099351636e-05, "loss": 0.7689, "step": 185790 }, { "epoch": 1.187023242145075, "grad_norm": 0.6707600355148315, "learning_rate": 3.5555705277965825e-05, "loss": 0.6814, "step": 185800 }, { "epoch": 1.1870871292948135, "grad_norm": 0.6006231904029846, "learning_rate": 3.555090160204655e-05, "loss": 1.014, "step": 185810 }, { "epoch": 1.1871510164445525, "grad_norm": 1.551177978515625, "learning_rate": 3.5546098071642205e-05, "loss": 0.8504, "step": 185820 }, { "epoch": 1.187214903594291, "grad_norm": 1.0770034790039062, "learning_rate": 3.554129468680115e-05, "loss": 0.9141, "step": 185830 }, { "epoch": 1.1872787907440299, "grad_norm": 0.6229815483093262, "learning_rate": 3.5536491447571765e-05, "loss": 0.7852, "step": 185840 }, { "epoch": 1.1873426778937684, "grad_norm": 0.5835037231445312, "learning_rate": 3.553168835400243e-05, "loss": 0.7914, "step": 185850 }, { "epoch": 1.187406565043507, "grad_norm": 1.2109571695327759, "learning_rate": 3.552688540614151e-05, "loss": 0.8617, "step": 185860 }, { "epoch": 1.1874704521932458, "grad_norm": 0.7813235521316528, "learning_rate": 3.5522082604037376e-05, "loss": 0.9206, "step": 185870 }, { "epoch": 1.1875343393429845, "grad_norm": 1.2372418642044067, "learning_rate": 3.551727994773839e-05, "loss": 0.9416, "step": 185880 }, { "epoch": 1.1875982264927232, "grad_norm": 1.2593668699264526, "learning_rate": 3.551247743729293e-05, "loss": 0.9913, "step": 185890 }, { "epoch": 1.1876621136424619, "grad_norm": 1.3940966129302979, "learning_rate": 3.550767507274935e-05, "loss": 0.9156, "step": 185900 }, { "epoch": 1.1877260007922006, "grad_norm": 1.2378836870193481, "learning_rate": 3.550287285415602e-05, "loss": 0.8502, "step": 185910 }, { "epoch": 1.1877898879419393, "grad_norm": 1.2201237678527832, "learning_rate": 3.549807078156131e-05, "loss": 0.9283, "step": 185920 }, { "epoch": 1.187853775091678, "grad_norm": 1.0473310947418213, "learning_rate": 3.549326885501357e-05, "loss": 0.9494, "step": 185930 }, { "epoch": 1.1879176622414167, "grad_norm": 1.258222222328186, "learning_rate": 3.548846707456116e-05, "loss": 0.7309, "step": 185940 }, { "epoch": 1.1879815493911554, "grad_norm": 0.8088693022727966, "learning_rate": 3.548366544025245e-05, "loss": 0.8427, "step": 185950 }, { "epoch": 1.1880454365408941, "grad_norm": 0.7002352476119995, "learning_rate": 3.547886395213577e-05, "loss": 0.8715, "step": 185960 }, { "epoch": 1.1881093236906328, "grad_norm": 1.0667836666107178, "learning_rate": 3.5474062610259506e-05, "loss": 0.7263, "step": 185970 }, { "epoch": 1.1881732108403715, "grad_norm": 1.051770567893982, "learning_rate": 3.5469261414671996e-05, "loss": 0.9918, "step": 185980 }, { "epoch": 1.1882370979901102, "grad_norm": 1.470700979232788, "learning_rate": 3.5464460365421584e-05, "loss": 0.8464, "step": 185990 }, { "epoch": 1.188300985139849, "grad_norm": 0.7622199058532715, "learning_rate": 3.5459659462556646e-05, "loss": 0.7845, "step": 186000 }, { "epoch": 1.1883648722895876, "grad_norm": 1.0996206998825073, "learning_rate": 3.54548587061255e-05, "loss": 0.9458, "step": 186010 }, { "epoch": 1.1884287594393264, "grad_norm": 0.9898949265480042, "learning_rate": 3.545005809617653e-05, "loss": 0.8094, "step": 186020 }, { "epoch": 1.188492646589065, "grad_norm": 0.7668837308883667, "learning_rate": 3.5445257632758054e-05, "loss": 0.8887, "step": 186030 }, { "epoch": 1.1885565337388038, "grad_norm": 1.3512365818023682, "learning_rate": 3.544045731591843e-05, "loss": 0.7852, "step": 186040 }, { "epoch": 1.1886204208885425, "grad_norm": 0.7477654814720154, "learning_rate": 3.543565714570599e-05, "loss": 0.9058, "step": 186050 }, { "epoch": 1.1886843080382812, "grad_norm": 1.4681755304336548, "learning_rate": 3.543085712216909e-05, "loss": 0.9091, "step": 186060 }, { "epoch": 1.1887481951880199, "grad_norm": 1.0334805250167847, "learning_rate": 3.5426057245356055e-05, "loss": 0.8175, "step": 186070 }, { "epoch": 1.1888120823377586, "grad_norm": 1.928889274597168, "learning_rate": 3.542125751531523e-05, "loss": 0.7668, "step": 186080 }, { "epoch": 1.1888759694874973, "grad_norm": 1.266452431678772, "learning_rate": 3.541645793209496e-05, "loss": 0.9653, "step": 186090 }, { "epoch": 1.188939856637236, "grad_norm": 0.9438464641571045, "learning_rate": 3.541165849574357e-05, "loss": 0.9455, "step": 186100 }, { "epoch": 1.1890037437869747, "grad_norm": 0.8136021494865417, "learning_rate": 3.5406859206309405e-05, "loss": 0.7879, "step": 186110 }, { "epoch": 1.1890676309367134, "grad_norm": 1.0168657302856445, "learning_rate": 3.540206006384079e-05, "loss": 0.9168, "step": 186120 }, { "epoch": 1.1891315180864521, "grad_norm": 1.042479395866394, "learning_rate": 3.539726106838606e-05, "loss": 0.7625, "step": 186130 }, { "epoch": 1.1891954052361908, "grad_norm": 0.821614682674408, "learning_rate": 3.539246221999354e-05, "loss": 0.829, "step": 186140 }, { "epoch": 1.1892592923859295, "grad_norm": 0.6949642896652222, "learning_rate": 3.5387663518711564e-05, "loss": 0.8162, "step": 186150 }, { "epoch": 1.1893231795356682, "grad_norm": 1.2734757661819458, "learning_rate": 3.5382864964588447e-05, "loss": 0.8209, "step": 186160 }, { "epoch": 1.189387066685407, "grad_norm": 2.130889654159546, "learning_rate": 3.537806655767254e-05, "loss": 0.9647, "step": 186170 }, { "epoch": 1.1894509538351457, "grad_norm": 1.1269334554672241, "learning_rate": 3.537326829801215e-05, "loss": 1.0093, "step": 186180 }, { "epoch": 1.1895148409848844, "grad_norm": 0.8635377883911133, "learning_rate": 3.5368470185655605e-05, "loss": 0.9047, "step": 186190 }, { "epoch": 1.189578728134623, "grad_norm": 1.0611902475357056, "learning_rate": 3.536367222065122e-05, "loss": 0.9056, "step": 186200 }, { "epoch": 1.1896426152843618, "grad_norm": 2.2567262649536133, "learning_rate": 3.535887440304732e-05, "loss": 0.9235, "step": 186210 }, { "epoch": 1.1897065024341005, "grad_norm": 0.820324718952179, "learning_rate": 3.535407673289222e-05, "loss": 0.8314, "step": 186220 }, { "epoch": 1.1897703895838392, "grad_norm": 1.0150549411773682, "learning_rate": 3.53497589558613e-05, "loss": 0.9562, "step": 186230 }, { "epoch": 1.189834276733578, "grad_norm": 1.3573201894760132, "learning_rate": 3.534496156599203e-05, "loss": 1.0057, "step": 186240 }, { "epoch": 1.1898981638833166, "grad_norm": 1.0177377462387085, "learning_rate": 3.534016432371168e-05, "loss": 0.9452, "step": 186250 }, { "epoch": 1.1899620510330553, "grad_norm": 1.215646505355835, "learning_rate": 3.533536722906856e-05, "loss": 0.8697, "step": 186260 }, { "epoch": 1.190025938182794, "grad_norm": 0.8700612187385559, "learning_rate": 3.533057028211097e-05, "loss": 1.0512, "step": 186270 }, { "epoch": 1.1900898253325325, "grad_norm": 1.3516863584518433, "learning_rate": 3.532577348288723e-05, "loss": 0.7342, "step": 186280 }, { "epoch": 1.1901537124822714, "grad_norm": 0.7948817610740662, "learning_rate": 3.532097683144565e-05, "loss": 0.9811, "step": 186290 }, { "epoch": 1.19021759963201, "grad_norm": 1.0154379606246948, "learning_rate": 3.5316180327834525e-05, "loss": 0.7686, "step": 186300 }, { "epoch": 1.1902814867817488, "grad_norm": 0.7849642634391785, "learning_rate": 3.5311383972102175e-05, "loss": 0.7886, "step": 186310 }, { "epoch": 1.1903453739314873, "grad_norm": 0.8592081665992737, "learning_rate": 3.530658776429689e-05, "loss": 0.7697, "step": 186320 }, { "epoch": 1.1904092610812262, "grad_norm": 0.3784794807434082, "learning_rate": 3.530179170446699e-05, "loss": 0.7808, "step": 186330 }, { "epoch": 1.1904731482309647, "grad_norm": 1.2555115222930908, "learning_rate": 3.529699579266076e-05, "loss": 0.6747, "step": 186340 }, { "epoch": 1.1905370353807034, "grad_norm": 0.9044711589813232, "learning_rate": 3.5292200028926494e-05, "loss": 0.8883, "step": 186350 }, { "epoch": 1.1906009225304421, "grad_norm": 0.9068841934204102, "learning_rate": 3.5287404413312506e-05, "loss": 0.9085, "step": 186360 }, { "epoch": 1.1906648096801808, "grad_norm": 1.7138638496398926, "learning_rate": 3.528260894586708e-05, "loss": 1.3497, "step": 186370 }, { "epoch": 1.1907286968299196, "grad_norm": 1.0467746257781982, "learning_rate": 3.527781362663851e-05, "loss": 1.687, "step": 186380 }, { "epoch": 1.1907925839796583, "grad_norm": 0.8251565098762512, "learning_rate": 3.52730184556751e-05, "loss": 0.7175, "step": 186390 }, { "epoch": 1.190856471129397, "grad_norm": 0.8762297630310059, "learning_rate": 3.5268223433025135e-05, "loss": 0.9619, "step": 186400 }, { "epoch": 1.1909203582791357, "grad_norm": 0.6899451613426208, "learning_rate": 3.5263428558736896e-05, "loss": 0.7858, "step": 186410 }, { "epoch": 1.1909842454288744, "grad_norm": 1.063529133796692, "learning_rate": 3.5258633832858696e-05, "loss": 0.9464, "step": 186420 }, { "epoch": 1.191048132578613, "grad_norm": 0.955309271812439, "learning_rate": 3.525383925543879e-05, "loss": 0.7051, "step": 186430 }, { "epoch": 1.1911120197283518, "grad_norm": 0.8504273891448975, "learning_rate": 3.524904482652549e-05, "loss": 0.7116, "step": 186440 }, { "epoch": 1.1911759068780905, "grad_norm": 1.1198617219924927, "learning_rate": 3.524425054616707e-05, "loss": 0.8511, "step": 186450 }, { "epoch": 1.1912397940278292, "grad_norm": 0.7477229833602905, "learning_rate": 3.523945641441181e-05, "loss": 0.5929, "step": 186460 }, { "epoch": 1.191303681177568, "grad_norm": 0.9625139832496643, "learning_rate": 3.5234662431308e-05, "loss": 0.8808, "step": 186470 }, { "epoch": 1.1913675683273066, "grad_norm": 0.9500938653945923, "learning_rate": 3.522986859690389e-05, "loss": 0.7244, "step": 186480 }, { "epoch": 1.1914314554770453, "grad_norm": 1.6534185409545898, "learning_rate": 3.52250749112478e-05, "loss": 0.9015, "step": 186490 }, { "epoch": 1.191495342626784, "grad_norm": 1.031301736831665, "learning_rate": 3.522028137438799e-05, "loss": 0.5996, "step": 186500 }, { "epoch": 1.1915592297765227, "grad_norm": 0.9952258467674255, "learning_rate": 3.521548798637272e-05, "loss": 1.109, "step": 186510 }, { "epoch": 1.1916231169262614, "grad_norm": 1.1881864070892334, "learning_rate": 3.5210694747250286e-05, "loss": 0.968, "step": 186520 }, { "epoch": 1.1916870040760001, "grad_norm": 1.4568711519241333, "learning_rate": 3.5205901657068953e-05, "loss": 0.8803, "step": 186530 }, { "epoch": 1.1917508912257389, "grad_norm": 0.7728120684623718, "learning_rate": 3.520110871587698e-05, "loss": 0.9651, "step": 186540 }, { "epoch": 1.1918147783754776, "grad_norm": 0.7609994411468506, "learning_rate": 3.5196315923722655e-05, "loss": 1.0502, "step": 186550 }, { "epoch": 1.1918786655252163, "grad_norm": 1.627550721168518, "learning_rate": 3.519152328065423e-05, "loss": 0.9135, "step": 186560 }, { "epoch": 1.191942552674955, "grad_norm": 0.8452563881874084, "learning_rate": 3.518673078671998e-05, "loss": 0.944, "step": 186570 }, { "epoch": 1.1920064398246937, "grad_norm": 0.8744309544563293, "learning_rate": 3.518193844196816e-05, "loss": 0.7588, "step": 186580 }, { "epoch": 1.1920703269744324, "grad_norm": 0.9594758152961731, "learning_rate": 3.5177146246447046e-05, "loss": 0.8855, "step": 186590 }, { "epoch": 1.192134214124171, "grad_norm": 0.9259825348854065, "learning_rate": 3.517235420020489e-05, "loss": 0.911, "step": 186600 }, { "epoch": 1.1921981012739098, "grad_norm": 2.5260446071624756, "learning_rate": 3.516756230328995e-05, "loss": 0.7676, "step": 186610 }, { "epoch": 1.1922619884236485, "grad_norm": 0.7416990399360657, "learning_rate": 3.51627705557505e-05, "loss": 0.8542, "step": 186620 }, { "epoch": 1.1923258755733872, "grad_norm": 0.6459031105041504, "learning_rate": 3.515797895763478e-05, "loss": 0.8635, "step": 186630 }, { "epoch": 1.192389762723126, "grad_norm": 1.0951290130615234, "learning_rate": 3.5153187508991055e-05, "loss": 0.9024, "step": 186640 }, { "epoch": 1.1924536498728646, "grad_norm": 0.676724374294281, "learning_rate": 3.514839620986757e-05, "loss": 0.8166, "step": 186650 }, { "epoch": 1.1925175370226033, "grad_norm": 1.0528342723846436, "learning_rate": 3.514360506031259e-05, "loss": 0.6472, "step": 186660 }, { "epoch": 1.192581424172342, "grad_norm": 0.7471928596496582, "learning_rate": 3.513881406037437e-05, "loss": 0.6945, "step": 186670 }, { "epoch": 1.1926453113220807, "grad_norm": 1.1996952295303345, "learning_rate": 3.513402321010114e-05, "loss": 1.0772, "step": 186680 }, { "epoch": 1.1927091984718194, "grad_norm": 0.7529562711715698, "learning_rate": 3.512923250954115e-05, "loss": 1.1773, "step": 186690 }, { "epoch": 1.1927730856215581, "grad_norm": 0.6808781623840332, "learning_rate": 3.512444195874266e-05, "loss": 0.895, "step": 186700 }, { "epoch": 1.1928369727712969, "grad_norm": 0.4089086651802063, "learning_rate": 3.511965155775391e-05, "loss": 0.8306, "step": 186710 }, { "epoch": 1.1929008599210356, "grad_norm": 0.682090163230896, "learning_rate": 3.511486130662313e-05, "loss": 0.5964, "step": 186720 }, { "epoch": 1.1929647470707743, "grad_norm": 1.0363860130310059, "learning_rate": 3.5110071205398587e-05, "loss": 0.976, "step": 186730 }, { "epoch": 1.193028634220513, "grad_norm": 0.8405871391296387, "learning_rate": 3.5105281254128504e-05, "loss": 0.9637, "step": 186740 }, { "epoch": 1.1930925213702517, "grad_norm": 0.6451852917671204, "learning_rate": 3.510049145286112e-05, "loss": 0.7558, "step": 186750 }, { "epoch": 1.1931564085199904, "grad_norm": 0.9564416408538818, "learning_rate": 3.5095701801644686e-05, "loss": 0.6832, "step": 186760 }, { "epoch": 1.1932202956697289, "grad_norm": 0.837658703327179, "learning_rate": 3.5090912300527424e-05, "loss": 0.8651, "step": 186770 }, { "epoch": 1.1932841828194678, "grad_norm": 1.0022040605545044, "learning_rate": 3.5086122949557574e-05, "loss": 0.7418, "step": 186780 }, { "epoch": 1.1933480699692063, "grad_norm": 0.6673353910446167, "learning_rate": 3.508133374878337e-05, "loss": 0.9588, "step": 186790 }, { "epoch": 1.1934119571189452, "grad_norm": 1.4611053466796875, "learning_rate": 3.507654469825303e-05, "loss": 1.0155, "step": 186800 }, { "epoch": 1.1934758442686837, "grad_norm": 1.2848858833312988, "learning_rate": 3.507175579801481e-05, "loss": 0.7561, "step": 186810 }, { "epoch": 1.1935397314184226, "grad_norm": 0.8244970440864563, "learning_rate": 3.506696704811691e-05, "loss": 0.9435, "step": 186820 }, { "epoch": 1.193603618568161, "grad_norm": 1.652975082397461, "learning_rate": 3.5062178448607586e-05, "loss": 0.8877, "step": 186830 }, { "epoch": 1.1936675057178998, "grad_norm": 0.8719720840454102, "learning_rate": 3.505738999953504e-05, "loss": 0.7601, "step": 186840 }, { "epoch": 1.1937313928676385, "grad_norm": 0.8868855237960815, "learning_rate": 3.5052601700947506e-05, "loss": 0.6681, "step": 186850 }, { "epoch": 1.1937952800173772, "grad_norm": 0.731994092464447, "learning_rate": 3.50478135528932e-05, "loss": 0.6869, "step": 186860 }, { "epoch": 1.193859167167116, "grad_norm": 1.1442980766296387, "learning_rate": 3.504302555542035e-05, "loss": 0.9518, "step": 186870 }, { "epoch": 1.1939230543168546, "grad_norm": 0.6606703400611877, "learning_rate": 3.503823770857718e-05, "loss": 0.9076, "step": 186880 }, { "epoch": 1.1939869414665933, "grad_norm": 0.9944251775741577, "learning_rate": 3.503345001241189e-05, "loss": 0.6579, "step": 186890 }, { "epoch": 1.194050828616332, "grad_norm": 0.8377325534820557, "learning_rate": 3.5028662466972715e-05, "loss": 0.832, "step": 186900 }, { "epoch": 1.1941147157660708, "grad_norm": 1.2907224893569946, "learning_rate": 3.5023875072307855e-05, "loss": 0.6895, "step": 186910 }, { "epoch": 1.1941786029158095, "grad_norm": 2.1751160621643066, "learning_rate": 3.501908782846553e-05, "loss": 0.8754, "step": 186920 }, { "epoch": 1.1942424900655482, "grad_norm": 1.0982850790023804, "learning_rate": 3.501430073549394e-05, "loss": 0.849, "step": 186930 }, { "epoch": 1.1943063772152869, "grad_norm": 2.043400526046753, "learning_rate": 3.500951379344132e-05, "loss": 0.8272, "step": 186940 }, { "epoch": 1.1943702643650256, "grad_norm": 1.3341394662857056, "learning_rate": 3.5004727002355864e-05, "loss": 0.901, "step": 186950 }, { "epoch": 1.1944341515147643, "grad_norm": 1.1250461339950562, "learning_rate": 3.4999940362285776e-05, "loss": 1.1599, "step": 186960 }, { "epoch": 1.194498038664503, "grad_norm": 1.1271651983261108, "learning_rate": 3.499515387327927e-05, "loss": 0.8843, "step": 186970 }, { "epoch": 1.1945619258142417, "grad_norm": 0.9995719194412231, "learning_rate": 3.499036753538454e-05, "loss": 0.8182, "step": 186980 }, { "epoch": 1.1946258129639804, "grad_norm": 0.8369683027267456, "learning_rate": 3.49855813486498e-05, "loss": 0.8878, "step": 186990 }, { "epoch": 1.194689700113719, "grad_norm": 1.3011962175369263, "learning_rate": 3.498079531312324e-05, "loss": 0.8289, "step": 187000 }, { "epoch": 1.1947535872634578, "grad_norm": 2.0897181034088135, "learning_rate": 3.4976009428853055e-05, "loss": 0.7438, "step": 187010 }, { "epoch": 1.1948174744131965, "grad_norm": 0.9380792379379272, "learning_rate": 3.4971223695887464e-05, "loss": 0.9273, "step": 187020 }, { "epoch": 1.1948813615629352, "grad_norm": 0.6861240863800049, "learning_rate": 3.496643811427466e-05, "loss": 0.6861, "step": 187030 }, { "epoch": 1.194945248712674, "grad_norm": 1.5726001262664795, "learning_rate": 3.4961652684062815e-05, "loss": 0.8486, "step": 187040 }, { "epoch": 1.1950091358624126, "grad_norm": 0.8573664426803589, "learning_rate": 3.495686740530015e-05, "loss": 0.8446, "step": 187050 }, { "epoch": 1.1950730230121513, "grad_norm": 0.8347463607788086, "learning_rate": 3.4952082278034836e-05, "loss": 0.9351, "step": 187060 }, { "epoch": 1.19513691016189, "grad_norm": 0.9354316592216492, "learning_rate": 3.494729730231507e-05, "loss": 0.8548, "step": 187070 }, { "epoch": 1.1952007973116288, "grad_norm": 0.9122055172920227, "learning_rate": 3.494251247818905e-05, "loss": 0.8454, "step": 187080 }, { "epoch": 1.1952646844613675, "grad_norm": 1.073345422744751, "learning_rate": 3.493772780570496e-05, "loss": 0.6588, "step": 187090 }, { "epoch": 1.1953285716111062, "grad_norm": 0.8936179876327515, "learning_rate": 3.4932943284910967e-05, "loss": 0.878, "step": 187100 }, { "epoch": 1.1953924587608449, "grad_norm": 1.066080927848816, "learning_rate": 3.492815891585528e-05, "loss": 0.8864, "step": 187110 }, { "epoch": 1.1954563459105836, "grad_norm": 1.137028455734253, "learning_rate": 3.4923374698586074e-05, "loss": 0.8987, "step": 187120 }, { "epoch": 1.1955202330603223, "grad_norm": 1.0188874006271362, "learning_rate": 3.491859063315152e-05, "loss": 0.7188, "step": 187130 }, { "epoch": 1.195584120210061, "grad_norm": 0.8354491591453552, "learning_rate": 3.491380671959981e-05, "loss": 1.045, "step": 187140 }, { "epoch": 1.1956480073597997, "grad_norm": 1.3807976245880127, "learning_rate": 3.490902295797912e-05, "loss": 0.8854, "step": 187150 }, { "epoch": 1.1957118945095384, "grad_norm": 1.2778666019439697, "learning_rate": 3.490423934833763e-05, "loss": 0.8915, "step": 187160 }, { "epoch": 1.1957757816592771, "grad_norm": 0.9576749205589294, "learning_rate": 3.48994558907235e-05, "loss": 0.8141, "step": 187170 }, { "epoch": 1.1958396688090158, "grad_norm": 1.2838776111602783, "learning_rate": 3.4894672585184916e-05, "loss": 0.8692, "step": 187180 }, { "epoch": 1.1959035559587545, "grad_norm": 1.1865975856781006, "learning_rate": 3.488988943177005e-05, "loss": 0.8883, "step": 187190 }, { "epoch": 1.1959674431084932, "grad_norm": 0.44902050495147705, "learning_rate": 3.488510643052706e-05, "loss": 0.5436, "step": 187200 }, { "epoch": 1.196031330258232, "grad_norm": 1.1116329431533813, "learning_rate": 3.4880323581504126e-05, "loss": 0.8478, "step": 187210 }, { "epoch": 1.1960952174079706, "grad_norm": 1.1291851997375488, "learning_rate": 3.487554088474942e-05, "loss": 0.8885, "step": 187220 }, { "epoch": 1.1961591045577094, "grad_norm": 0.8840822577476501, "learning_rate": 3.48707583403111e-05, "loss": 1.0356, "step": 187230 }, { "epoch": 1.196222991707448, "grad_norm": 0.7359145283699036, "learning_rate": 3.486597594823733e-05, "loss": 0.7619, "step": 187240 }, { "epoch": 1.1962868788571868, "grad_norm": 0.3909396529197693, "learning_rate": 3.4861193708576276e-05, "loss": 0.6675, "step": 187250 }, { "epoch": 1.1963507660069252, "grad_norm": 0.7241166234016418, "learning_rate": 3.4856411621376096e-05, "loss": 1.0381, "step": 187260 }, { "epoch": 1.1964146531566642, "grad_norm": 0.7049499154090881, "learning_rate": 3.485162968668496e-05, "loss": 0.8549, "step": 187270 }, { "epoch": 1.1964785403064027, "grad_norm": 0.6992123126983643, "learning_rate": 3.4846847904551006e-05, "loss": 0.8191, "step": 187280 }, { "epoch": 1.1965424274561416, "grad_norm": 0.6694308519363403, "learning_rate": 3.484206627502241e-05, "loss": 0.9966, "step": 187290 }, { "epoch": 1.19660631460588, "grad_norm": 2.5797371864318848, "learning_rate": 3.483728479814732e-05, "loss": 0.8075, "step": 187300 }, { "epoch": 1.1966702017556188, "grad_norm": 1.4046313762664795, "learning_rate": 3.4832503473973885e-05, "loss": 0.8559, "step": 187310 }, { "epoch": 1.1967340889053575, "grad_norm": 0.6860223412513733, "learning_rate": 3.482772230255027e-05, "loss": 1.0585, "step": 187320 }, { "epoch": 1.1967979760550962, "grad_norm": 1.6226650476455688, "learning_rate": 3.4822941283924604e-05, "loss": 0.9188, "step": 187330 }, { "epoch": 1.196861863204835, "grad_norm": 0.9351048469543457, "learning_rate": 3.481816041814506e-05, "loss": 1.1464, "step": 187340 }, { "epoch": 1.1969257503545736, "grad_norm": 0.7233309745788574, "learning_rate": 3.4813379705259775e-05, "loss": 0.7825, "step": 187350 }, { "epoch": 1.1969896375043123, "grad_norm": 0.7308865189552307, "learning_rate": 3.480859914531689e-05, "loss": 1.0493, "step": 187360 }, { "epoch": 1.197053524654051, "grad_norm": 0.6127648949623108, "learning_rate": 3.480381873836456e-05, "loss": 0.6914, "step": 187370 }, { "epoch": 1.1971174118037897, "grad_norm": 0.5740570425987244, "learning_rate": 3.479903848445092e-05, "loss": 0.828, "step": 187380 }, { "epoch": 1.1971812989535284, "grad_norm": 0.9117375016212463, "learning_rate": 3.4794258383624114e-05, "loss": 0.9847, "step": 187390 }, { "epoch": 1.1972451861032671, "grad_norm": 1.000638484954834, "learning_rate": 3.478947843593228e-05, "loss": 0.7421, "step": 187400 }, { "epoch": 1.1973090732530058, "grad_norm": 1.0521003007888794, "learning_rate": 3.4784698641423553e-05, "loss": 0.8257, "step": 187410 }, { "epoch": 1.1973729604027445, "grad_norm": 0.79627525806427, "learning_rate": 3.477991900014608e-05, "loss": 0.8698, "step": 187420 }, { "epoch": 1.1974368475524833, "grad_norm": 1.0833988189697266, "learning_rate": 3.477513951214798e-05, "loss": 0.8366, "step": 187430 }, { "epoch": 1.197500734702222, "grad_norm": 0.6187765002250671, "learning_rate": 3.47703601774774e-05, "loss": 0.7401, "step": 187440 }, { "epoch": 1.1975646218519607, "grad_norm": 2.6944620609283447, "learning_rate": 3.4765580996182476e-05, "loss": 0.826, "step": 187450 }, { "epoch": 1.1976285090016994, "grad_norm": 1.1974648237228394, "learning_rate": 3.476080196831133e-05, "loss": 1.0353, "step": 187460 }, { "epoch": 1.197692396151438, "grad_norm": 1.277348518371582, "learning_rate": 3.475602309391208e-05, "loss": 0.9897, "step": 187470 }, { "epoch": 1.1977562833011768, "grad_norm": 0.7508608102798462, "learning_rate": 3.475124437303288e-05, "loss": 0.6981, "step": 187480 }, { "epoch": 1.1978201704509155, "grad_norm": 0.5950019359588623, "learning_rate": 3.474646580572184e-05, "loss": 0.8133, "step": 187490 }, { "epoch": 1.1978840576006542, "grad_norm": 0.7577312588691711, "learning_rate": 3.474168739202708e-05, "loss": 1.0592, "step": 187500 }, { "epoch": 1.197947944750393, "grad_norm": 0.863229513168335, "learning_rate": 3.473690913199672e-05, "loss": 0.5926, "step": 187510 }, { "epoch": 1.1980118319001316, "grad_norm": 1.0712931156158447, "learning_rate": 3.4732131025678905e-05, "loss": 0.9704, "step": 187520 }, { "epoch": 1.1980757190498703, "grad_norm": 0.918846845626831, "learning_rate": 3.472735307312173e-05, "loss": 0.8673, "step": 187530 }, { "epoch": 1.198139606199609, "grad_norm": 1.0764986276626587, "learning_rate": 3.4722575274373315e-05, "loss": 1.0774, "step": 187540 }, { "epoch": 1.1982034933493477, "grad_norm": 1.566809058189392, "learning_rate": 3.47177976294818e-05, "loss": 0.7657, "step": 187550 }, { "epoch": 1.1982673804990864, "grad_norm": 1.0461945533752441, "learning_rate": 3.471302013849527e-05, "loss": 0.8546, "step": 187560 }, { "epoch": 1.1983312676488251, "grad_norm": 1.0666048526763916, "learning_rate": 3.4708242801461866e-05, "loss": 0.7968, "step": 187570 }, { "epoch": 1.1983951547985638, "grad_norm": 1.0975289344787598, "learning_rate": 3.470346561842967e-05, "loss": 0.7919, "step": 187580 }, { "epoch": 1.1984590419483026, "grad_norm": 1.0460991859436035, "learning_rate": 3.469868858944683e-05, "loss": 0.7248, "step": 187590 }, { "epoch": 1.1985229290980413, "grad_norm": 0.9325404167175293, "learning_rate": 3.469391171456142e-05, "loss": 0.8127, "step": 187600 }, { "epoch": 1.19858681624778, "grad_norm": 1.0985088348388672, "learning_rate": 3.468913499382156e-05, "loss": 0.7552, "step": 187610 }, { "epoch": 1.1986507033975187, "grad_norm": 1.1224279403686523, "learning_rate": 3.468435842727536e-05, "loss": 1.0798, "step": 187620 }, { "epoch": 1.1987145905472574, "grad_norm": 0.7109350562095642, "learning_rate": 3.4679582014970924e-05, "loss": 0.8925, "step": 187630 }, { "epoch": 1.198778477696996, "grad_norm": 1.715866208076477, "learning_rate": 3.467480575695635e-05, "loss": 0.8057, "step": 187640 }, { "epoch": 1.1988423648467348, "grad_norm": 0.8582981824874878, "learning_rate": 3.467002965327974e-05, "loss": 0.8066, "step": 187650 }, { "epoch": 1.1989062519964735, "grad_norm": 1.1021723747253418, "learning_rate": 3.46652537039892e-05, "loss": 0.971, "step": 187660 }, { "epoch": 1.1989701391462122, "grad_norm": 1.0566543340682983, "learning_rate": 3.4660477909132804e-05, "loss": 0.87, "step": 187670 }, { "epoch": 1.199034026295951, "grad_norm": 1.064985752105713, "learning_rate": 3.4655702268758675e-05, "loss": 1.1492, "step": 187680 }, { "epoch": 1.1990979134456896, "grad_norm": 0.6998661756515503, "learning_rate": 3.4650926782914906e-05, "loss": 0.6774, "step": 187690 }, { "epoch": 1.1991618005954283, "grad_norm": 0.8879082798957825, "learning_rate": 3.4646151451649575e-05, "loss": 1.0302, "step": 187700 }, { "epoch": 1.199225687745167, "grad_norm": 0.7385240197181702, "learning_rate": 3.4641376275010786e-05, "loss": 0.6694, "step": 187710 }, { "epoch": 1.1992895748949057, "grad_norm": 0.822397768497467, "learning_rate": 3.4636601253046616e-05, "loss": 0.7778, "step": 187720 }, { "epoch": 1.1993534620446444, "grad_norm": 1.2824455499649048, "learning_rate": 3.4631826385805165e-05, "loss": 1.0233, "step": 187730 }, { "epoch": 1.1994173491943831, "grad_norm": 1.1572147607803345, "learning_rate": 3.462705167333452e-05, "loss": 0.8526, "step": 187740 }, { "epoch": 1.1994812363441216, "grad_norm": 0.6877502799034119, "learning_rate": 3.4622277115682765e-05, "loss": 0.9332, "step": 187750 }, { "epoch": 1.1995451234938606, "grad_norm": 1.0212002992630005, "learning_rate": 3.4617502712897986e-05, "loss": 1.216, "step": 187760 }, { "epoch": 1.199609010643599, "grad_norm": 0.8066684603691101, "learning_rate": 3.461272846502826e-05, "loss": 0.897, "step": 187770 }, { "epoch": 1.199672897793338, "grad_norm": 1.236677885055542, "learning_rate": 3.460795437212167e-05, "loss": 0.9163, "step": 187780 }, { "epoch": 1.1997367849430765, "grad_norm": 1.0097334384918213, "learning_rate": 3.46031804342263e-05, "loss": 0.7069, "step": 187790 }, { "epoch": 1.1998006720928152, "grad_norm": 0.9955973625183105, "learning_rate": 3.459840665139021e-05, "loss": 0.8812, "step": 187800 }, { "epoch": 1.1998645592425539, "grad_norm": 1.2567247152328491, "learning_rate": 3.4593633023661503e-05, "loss": 0.9321, "step": 187810 }, { "epoch": 1.1999284463922926, "grad_norm": 1.278200387954712, "learning_rate": 3.458885955108824e-05, "loss": 1.1415, "step": 187820 }, { "epoch": 1.1999923335420313, "grad_norm": 0.7412505745887756, "learning_rate": 3.458408623371848e-05, "loss": 0.805, "step": 187830 }, { "epoch": 1.20005622069177, "grad_norm": 0.7039709687232971, "learning_rate": 3.457931307160032e-05, "loss": 1.0012, "step": 187840 }, { "epoch": 1.2001201078415087, "grad_norm": 0.9330798387527466, "learning_rate": 3.4574540064781814e-05, "loss": 0.8153, "step": 187850 }, { "epoch": 1.2001839949912474, "grad_norm": 0.8370354771614075, "learning_rate": 3.456976721331102e-05, "loss": 0.633, "step": 187860 }, { "epoch": 1.200247882140986, "grad_norm": 1.2622960805892944, "learning_rate": 3.4564994517236036e-05, "loss": 0.8454, "step": 187870 }, { "epoch": 1.2003117692907248, "grad_norm": 0.9235817790031433, "learning_rate": 3.456022197660491e-05, "loss": 1.0776, "step": 187880 }, { "epoch": 1.2003756564404635, "grad_norm": 0.7652870416641235, "learning_rate": 3.4555449591465704e-05, "loss": 0.9432, "step": 187890 }, { "epoch": 1.2004395435902022, "grad_norm": 0.9901490211486816, "learning_rate": 3.455067736186649e-05, "loss": 0.8574, "step": 187900 }, { "epoch": 1.200503430739941, "grad_norm": 1.7025309801101685, "learning_rate": 3.454590528785531e-05, "loss": 0.7714, "step": 187910 }, { "epoch": 1.2005673178896796, "grad_norm": 0.802376389503479, "learning_rate": 3.454113336948024e-05, "loss": 1.3025, "step": 187920 }, { "epoch": 1.2006312050394183, "grad_norm": 1.1113643646240234, "learning_rate": 3.453636160678933e-05, "loss": 0.9288, "step": 187930 }, { "epoch": 1.200695092189157, "grad_norm": 0.5715786814689636, "learning_rate": 3.4531589999830626e-05, "loss": 0.8644, "step": 187940 }, { "epoch": 1.2007589793388957, "grad_norm": 1.303452968597412, "learning_rate": 3.452681854865221e-05, "loss": 0.7462, "step": 187950 }, { "epoch": 1.2008228664886345, "grad_norm": 0.7303462028503418, "learning_rate": 3.45220472533021e-05, "loss": 0.6541, "step": 187960 }, { "epoch": 1.2008867536383732, "grad_norm": 1.4614161252975464, "learning_rate": 3.451727611382838e-05, "loss": 0.8383, "step": 187970 }, { "epoch": 1.2009506407881119, "grad_norm": 0.9907222390174866, "learning_rate": 3.4512505130279074e-05, "loss": 0.6691, "step": 187980 }, { "epoch": 1.2010145279378506, "grad_norm": 0.7968649864196777, "learning_rate": 3.450773430270224e-05, "loss": 0.7508, "step": 187990 }, { "epoch": 1.2010784150875893, "grad_norm": 1.011256217956543, "learning_rate": 3.450296363114593e-05, "loss": 0.8145, "step": 188000 }, { "epoch": 1.201142302237328, "grad_norm": 2.9437148571014404, "learning_rate": 3.4498193115658184e-05, "loss": 0.838, "step": 188010 }, { "epoch": 1.2012061893870667, "grad_norm": 1.3569848537445068, "learning_rate": 3.449342275628704e-05, "loss": 1.2062, "step": 188020 }, { "epoch": 1.2012700765368054, "grad_norm": 0.9692744612693787, "learning_rate": 3.448865255308054e-05, "loss": 0.9286, "step": 188030 }, { "epoch": 1.201333963686544, "grad_norm": 1.1860573291778564, "learning_rate": 3.448388250608674e-05, "loss": 0.8968, "step": 188040 }, { "epoch": 1.2013978508362828, "grad_norm": 1.0160366296768188, "learning_rate": 3.4479112615353654e-05, "loss": 0.8594, "step": 188050 }, { "epoch": 1.2014617379860215, "grad_norm": 0.9599398970603943, "learning_rate": 3.447434288092932e-05, "loss": 0.8734, "step": 188060 }, { "epoch": 1.2015256251357602, "grad_norm": 0.8406432271003723, "learning_rate": 3.4469573302861806e-05, "loss": 0.8191, "step": 188070 }, { "epoch": 1.201589512285499, "grad_norm": 1.5487374067306519, "learning_rate": 3.446480388119912e-05, "loss": 0.9018, "step": 188080 }, { "epoch": 1.2016533994352376, "grad_norm": 1.019119381904602, "learning_rate": 3.44600346159893e-05, "loss": 0.9307, "step": 188090 }, { "epoch": 1.2017172865849763, "grad_norm": 0.5902031064033508, "learning_rate": 3.4455265507280374e-05, "loss": 0.8977, "step": 188100 }, { "epoch": 1.201781173734715, "grad_norm": 0.7112128734588623, "learning_rate": 3.445049655512037e-05, "loss": 0.8325, "step": 188110 }, { "epoch": 1.2018450608844538, "grad_norm": 0.95228111743927, "learning_rate": 3.444572775955732e-05, "loss": 0.8221, "step": 188120 }, { "epoch": 1.2019089480341925, "grad_norm": 0.8991641402244568, "learning_rate": 3.444095912063927e-05, "loss": 1.1107, "step": 188130 }, { "epoch": 1.2019728351839312, "grad_norm": 0.8079290390014648, "learning_rate": 3.44361906384142e-05, "loss": 1.0447, "step": 188140 }, { "epoch": 1.2020367223336699, "grad_norm": 2.5006215572357178, "learning_rate": 3.443142231293016e-05, "loss": 1.1094, "step": 188150 }, { "epoch": 1.2021006094834086, "grad_norm": 1.0621665716171265, "learning_rate": 3.4426654144235157e-05, "loss": 0.7572, "step": 188160 }, { "epoch": 1.2021644966331473, "grad_norm": 1.2878823280334473, "learning_rate": 3.442188613237723e-05, "loss": 0.6545, "step": 188170 }, { "epoch": 1.202228383782886, "grad_norm": 1.7365878820419312, "learning_rate": 3.441711827740437e-05, "loss": 1.0443, "step": 188180 }, { "epoch": 1.2022922709326247, "grad_norm": 1.051690697669983, "learning_rate": 3.441235057936462e-05, "loss": 0.9973, "step": 188190 }, { "epoch": 1.2023561580823634, "grad_norm": 0.9624873995780945, "learning_rate": 3.440758303830599e-05, "loss": 0.8223, "step": 188200 }, { "epoch": 1.202420045232102, "grad_norm": 1.7145956754684448, "learning_rate": 3.4402815654276475e-05, "loss": 0.8796, "step": 188210 }, { "epoch": 1.2024839323818406, "grad_norm": 1.1179742813110352, "learning_rate": 3.439804842732411e-05, "loss": 0.7697, "step": 188220 }, { "epoch": 1.2025478195315795, "grad_norm": 0.8818740844726562, "learning_rate": 3.43932813574969e-05, "loss": 0.8465, "step": 188230 }, { "epoch": 1.202611706681318, "grad_norm": 0.8461005687713623, "learning_rate": 3.4388514444842835e-05, "loss": 0.8352, "step": 188240 }, { "epoch": 1.202675593831057, "grad_norm": 1.1533595323562622, "learning_rate": 3.4383747689409944e-05, "loss": 0.7536, "step": 188250 }, { "epoch": 1.2027394809807954, "grad_norm": 2.3494560718536377, "learning_rate": 3.437898109124622e-05, "loss": 0.843, "step": 188260 }, { "epoch": 1.2028033681305343, "grad_norm": 1.2678680419921875, "learning_rate": 3.4374214650399675e-05, "loss": 0.7982, "step": 188270 }, { "epoch": 1.2028672552802728, "grad_norm": 0.7134614586830139, "learning_rate": 3.43694483669183e-05, "loss": 1.0344, "step": 188280 }, { "epoch": 1.2029311424300115, "grad_norm": 2.609257698059082, "learning_rate": 3.436468224085011e-05, "loss": 0.8643, "step": 188290 }, { "epoch": 1.2029950295797502, "grad_norm": 0.8375333547592163, "learning_rate": 3.43599162722431e-05, "loss": 0.7731, "step": 188300 }, { "epoch": 1.203058916729489, "grad_norm": 0.7802673578262329, "learning_rate": 3.435562703516575e-05, "loss": 0.7635, "step": 188310 }, { "epoch": 1.2031228038792277, "grad_norm": 0.7882365584373474, "learning_rate": 3.4350861365867205e-05, "loss": 0.7978, "step": 188320 }, { "epoch": 1.2031866910289664, "grad_norm": 0.7824183106422424, "learning_rate": 3.4346095854169016e-05, "loss": 0.6797, "step": 188330 }, { "epoch": 1.203250578178705, "grad_norm": 0.7336857318878174, "learning_rate": 3.434133050011919e-05, "loss": 0.7331, "step": 188340 }, { "epoch": 1.2033144653284438, "grad_norm": 1.238508701324463, "learning_rate": 3.433656530376571e-05, "loss": 0.6601, "step": 188350 }, { "epoch": 1.2033783524781825, "grad_norm": 1.044569730758667, "learning_rate": 3.4331800265156565e-05, "loss": 0.9067, "step": 188360 }, { "epoch": 1.2034422396279212, "grad_norm": 0.8683834075927734, "learning_rate": 3.432703538433976e-05, "loss": 0.7587, "step": 188370 }, { "epoch": 1.20350612677766, "grad_norm": 1.4416515827178955, "learning_rate": 3.432227066136326e-05, "loss": 1.0052, "step": 188380 }, { "epoch": 1.2035700139273986, "grad_norm": 0.8313100337982178, "learning_rate": 3.431750609627504e-05, "loss": 0.9759, "step": 188390 }, { "epoch": 1.2036339010771373, "grad_norm": 1.365005373954773, "learning_rate": 3.4312741689123115e-05, "loss": 0.7126, "step": 188400 }, { "epoch": 1.203697788226876, "grad_norm": 0.9269607663154602, "learning_rate": 3.430797743995546e-05, "loss": 0.8952, "step": 188410 }, { "epoch": 1.2037616753766147, "grad_norm": 0.9355165362358093, "learning_rate": 3.430321334882004e-05, "loss": 1.0636, "step": 188420 }, { "epoch": 1.2038255625263534, "grad_norm": 0.822027862071991, "learning_rate": 3.4298449415764846e-05, "loss": 0.7767, "step": 188430 }, { "epoch": 1.2038894496760921, "grad_norm": 0.9582952857017517, "learning_rate": 3.429368564083784e-05, "loss": 0.783, "step": 188440 }, { "epoch": 1.2039533368258308, "grad_norm": 0.9899607300758362, "learning_rate": 3.428892202408702e-05, "loss": 0.6833, "step": 188450 }, { "epoch": 1.2040172239755695, "grad_norm": 0.8969405889511108, "learning_rate": 3.428415856556034e-05, "loss": 0.9458, "step": 188460 }, { "epoch": 1.2040811111253082, "grad_norm": 0.8101646304130554, "learning_rate": 3.427939526530578e-05, "loss": 0.8489, "step": 188470 }, { "epoch": 1.204144998275047, "grad_norm": 0.9676410555839539, "learning_rate": 3.427463212337131e-05, "loss": 1.2028, "step": 188480 }, { "epoch": 1.2042088854247857, "grad_norm": 1.022590160369873, "learning_rate": 3.42698691398049e-05, "loss": 0.6856, "step": 188490 }, { "epoch": 1.2042727725745244, "grad_norm": 1.153812050819397, "learning_rate": 3.4265106314654506e-05, "loss": 0.7006, "step": 188500 }, { "epoch": 1.204336659724263, "grad_norm": 1.513657808303833, "learning_rate": 3.426034364796811e-05, "loss": 0.9024, "step": 188510 }, { "epoch": 1.2044005468740018, "grad_norm": 2.0316057205200195, "learning_rate": 3.425558113979367e-05, "loss": 0.9124, "step": 188520 }, { "epoch": 1.2044644340237405, "grad_norm": 0.7945497035980225, "learning_rate": 3.425081879017916e-05, "loss": 0.7946, "step": 188530 }, { "epoch": 1.2045283211734792, "grad_norm": 0.8211964964866638, "learning_rate": 3.4246056599172516e-05, "loss": 0.895, "step": 188540 }, { "epoch": 1.204592208323218, "grad_norm": 0.8268355131149292, "learning_rate": 3.424129456682172e-05, "loss": 0.9794, "step": 188550 }, { "epoch": 1.2046560954729566, "grad_norm": 1.3753883838653564, "learning_rate": 3.4236532693174716e-05, "loss": 0.8483, "step": 188560 }, { "epoch": 1.2047199826226953, "grad_norm": 1.092286467552185, "learning_rate": 3.4231770978279474e-05, "loss": 0.7269, "step": 188570 }, { "epoch": 1.204783869772434, "grad_norm": 0.8295246362686157, "learning_rate": 3.422700942218393e-05, "loss": 1.006, "step": 188580 }, { "epoch": 1.2048477569221727, "grad_norm": 1.040263056755066, "learning_rate": 3.422224802493605e-05, "loss": 0.6638, "step": 188590 }, { "epoch": 1.2049116440719114, "grad_norm": 0.6874042749404907, "learning_rate": 3.421748678658378e-05, "loss": 0.8416, "step": 188600 }, { "epoch": 1.2049755312216501, "grad_norm": 0.7841446995735168, "learning_rate": 3.4212725707175075e-05, "loss": 0.9625, "step": 188610 }, { "epoch": 1.2050394183713888, "grad_norm": 1.3204227685928345, "learning_rate": 3.4207964786757876e-05, "loss": 0.9017, "step": 188620 }, { "epoch": 1.2051033055211275, "grad_norm": 1.4805320501327515, "learning_rate": 3.4203204025380145e-05, "loss": 0.8957, "step": 188630 }, { "epoch": 1.2051671926708662, "grad_norm": 1.7014871835708618, "learning_rate": 3.41984434230898e-05, "loss": 1.0828, "step": 188640 }, { "epoch": 1.205231079820605, "grad_norm": 1.158589243888855, "learning_rate": 3.419368297993481e-05, "loss": 0.8051, "step": 188650 }, { "epoch": 1.2052949669703437, "grad_norm": 1.0002838373184204, "learning_rate": 3.41889226959631e-05, "loss": 0.6792, "step": 188660 }, { "epoch": 1.2053588541200824, "grad_norm": 0.8275355696678162, "learning_rate": 3.418416257122262e-05, "loss": 0.9482, "step": 188670 }, { "epoch": 1.205422741269821, "grad_norm": 0.9177698493003845, "learning_rate": 3.417940260576131e-05, "loss": 1.1724, "step": 188680 }, { "epoch": 1.2054866284195598, "grad_norm": 0.7963119745254517, "learning_rate": 3.41746427996271e-05, "loss": 0.779, "step": 188690 }, { "epoch": 1.2055505155692985, "grad_norm": 1.003836750984192, "learning_rate": 3.4169883152867925e-05, "loss": 0.8695, "step": 188700 }, { "epoch": 1.205614402719037, "grad_norm": 1.1356700658798218, "learning_rate": 3.4165123665531715e-05, "loss": 0.7465, "step": 188710 }, { "epoch": 1.205678289868776, "grad_norm": 0.8815405368804932, "learning_rate": 3.416036433766642e-05, "loss": 1.0118, "step": 188720 }, { "epoch": 1.2057421770185144, "grad_norm": 0.5799207091331482, "learning_rate": 3.415560516931996e-05, "loss": 0.7291, "step": 188730 }, { "epoch": 1.2058060641682533, "grad_norm": 1.3417963981628418, "learning_rate": 3.415084616054025e-05, "loss": 0.6866, "step": 188740 }, { "epoch": 1.2058699513179918, "grad_norm": 2.6382036209106445, "learning_rate": 3.414608731137525e-05, "loss": 0.8929, "step": 188750 }, { "epoch": 1.2059338384677307, "grad_norm": 0.8235493302345276, "learning_rate": 3.414132862187287e-05, "loss": 0.7851, "step": 188760 }, { "epoch": 1.2059977256174692, "grad_norm": 0.9888988137245178, "learning_rate": 3.413657009208102e-05, "loss": 0.8698, "step": 188770 }, { "epoch": 1.206061612767208, "grad_norm": 2.198369026184082, "learning_rate": 3.413181172204763e-05, "loss": 0.8233, "step": 188780 }, { "epoch": 1.2061254999169466, "grad_norm": 0.8852475881576538, "learning_rate": 3.4127053511820626e-05, "loss": 1.0084, "step": 188790 }, { "epoch": 1.2061893870666853, "grad_norm": 1.3241829872131348, "learning_rate": 3.412229546144792e-05, "loss": 1.0275, "step": 188800 }, { "epoch": 1.206253274216424, "grad_norm": 1.0022987127304077, "learning_rate": 3.4117537570977443e-05, "loss": 1.2873, "step": 188810 }, { "epoch": 1.2063171613661627, "grad_norm": 0.8337856531143188, "learning_rate": 3.41127798404571e-05, "loss": 0.8505, "step": 188820 }, { "epoch": 1.2063810485159014, "grad_norm": 1.0508147478103638, "learning_rate": 3.410802226993479e-05, "loss": 0.7073, "step": 188830 }, { "epoch": 1.2064449356656402, "grad_norm": 0.8216559290885925, "learning_rate": 3.4103264859458464e-05, "loss": 0.8745, "step": 188840 }, { "epoch": 1.2065088228153789, "grad_norm": 0.7753427624702454, "learning_rate": 3.409850760907601e-05, "loss": 0.7886, "step": 188850 }, { "epoch": 1.2065727099651176, "grad_norm": 0.857014000415802, "learning_rate": 3.4093750518835346e-05, "loss": 0.8163, "step": 188860 }, { "epoch": 1.2066365971148563, "grad_norm": 1.3117613792419434, "learning_rate": 3.408899358878437e-05, "loss": 0.6619, "step": 188870 }, { "epoch": 1.206700484264595, "grad_norm": 0.9748005270957947, "learning_rate": 3.4084236818970996e-05, "loss": 0.9795, "step": 188880 }, { "epoch": 1.2067643714143337, "grad_norm": 0.5203637480735779, "learning_rate": 3.407948020944312e-05, "loss": 0.8075, "step": 188890 }, { "epoch": 1.2068282585640724, "grad_norm": 0.7732601761817932, "learning_rate": 3.407472376024866e-05, "loss": 0.8769, "step": 188900 }, { "epoch": 1.206892145713811, "grad_norm": 0.9090991020202637, "learning_rate": 3.4069967471435506e-05, "loss": 0.6339, "step": 188910 }, { "epoch": 1.2069560328635498, "grad_norm": 0.8993181586265564, "learning_rate": 3.406521134305156e-05, "loss": 1.1261, "step": 188920 }, { "epoch": 1.2070199200132885, "grad_norm": 0.8620532751083374, "learning_rate": 3.406045537514472e-05, "loss": 0.7915, "step": 188930 }, { "epoch": 1.2070838071630272, "grad_norm": 1.1491893529891968, "learning_rate": 3.405569956776289e-05, "loss": 0.8156, "step": 188940 }, { "epoch": 1.207147694312766, "grad_norm": 0.9209848046302795, "learning_rate": 3.405094392095395e-05, "loss": 0.7993, "step": 188950 }, { "epoch": 1.2072115814625046, "grad_norm": 0.7707030177116394, "learning_rate": 3.404618843476581e-05, "loss": 1.0284, "step": 188960 }, { "epoch": 1.2072754686122433, "grad_norm": 0.7875609397888184, "learning_rate": 3.404143310924635e-05, "loss": 0.8219, "step": 188970 }, { "epoch": 1.207339355761982, "grad_norm": 0.9179214835166931, "learning_rate": 3.403667794444347e-05, "loss": 0.7936, "step": 188980 }, { "epoch": 1.2074032429117207, "grad_norm": 0.7444946765899658, "learning_rate": 3.403192294040505e-05, "loss": 0.7553, "step": 188990 }, { "epoch": 1.2074671300614594, "grad_norm": 1.558156132698059, "learning_rate": 3.4027168097178976e-05, "loss": 0.8449, "step": 189000 }, { "epoch": 1.2075310172111982, "grad_norm": 2.1795334815979004, "learning_rate": 3.402241341481314e-05, "loss": 1.1889, "step": 189010 }, { "epoch": 1.2075949043609369, "grad_norm": 0.6319822072982788, "learning_rate": 3.4017658893355434e-05, "loss": 1.2551, "step": 189020 }, { "epoch": 1.2076587915106756, "grad_norm": 0.8189280033111572, "learning_rate": 3.401290453285371e-05, "loss": 0.8926, "step": 189030 }, { "epoch": 1.2077226786604143, "grad_norm": 0.9746350049972534, "learning_rate": 3.4008150333355875e-05, "loss": 0.7228, "step": 189040 }, { "epoch": 1.207786565810153, "grad_norm": 0.8227341175079346, "learning_rate": 3.4003396294909804e-05, "loss": 0.7056, "step": 189050 }, { "epoch": 1.2078504529598917, "grad_norm": 1.0430861711502075, "learning_rate": 3.3998642417563375e-05, "loss": 0.6838, "step": 189060 }, { "epoch": 1.2079143401096304, "grad_norm": 0.838440477848053, "learning_rate": 3.3993888701364457e-05, "loss": 0.9032, "step": 189070 }, { "epoch": 1.207978227259369, "grad_norm": 0.6549668312072754, "learning_rate": 3.398913514636093e-05, "loss": 0.8481, "step": 189080 }, { "epoch": 1.2080421144091078, "grad_norm": 0.7392321825027466, "learning_rate": 3.398438175260066e-05, "loss": 0.8972, "step": 189090 }, { "epoch": 1.2081060015588465, "grad_norm": 0.9022709131240845, "learning_rate": 3.3979628520131524e-05, "loss": 0.9335, "step": 189100 }, { "epoch": 1.2081698887085852, "grad_norm": 1.0506974458694458, "learning_rate": 3.397487544900139e-05, "loss": 1.1411, "step": 189110 }, { "epoch": 1.208233775858324, "grad_norm": 0.7597492933273315, "learning_rate": 3.3970122539258114e-05, "loss": 0.8804, "step": 189120 }, { "epoch": 1.2082976630080626, "grad_norm": 0.8924365043640137, "learning_rate": 3.396536979094958e-05, "loss": 0.7818, "step": 189130 }, { "epoch": 1.2083615501578013, "grad_norm": 0.6869126558303833, "learning_rate": 3.3960617204123646e-05, "loss": 1.0775, "step": 189140 }, { "epoch": 1.20842543730754, "grad_norm": 0.6726001501083374, "learning_rate": 3.3955864778828167e-05, "loss": 0.9098, "step": 189150 }, { "epoch": 1.2084893244572787, "grad_norm": 0.5952368378639221, "learning_rate": 3.395111251511101e-05, "loss": 0.7089, "step": 189160 }, { "epoch": 1.2085532116070175, "grad_norm": 1.8950144052505493, "learning_rate": 3.394636041302004e-05, "loss": 0.974, "step": 189170 }, { "epoch": 1.2086170987567562, "grad_norm": 1.1229348182678223, "learning_rate": 3.3941608472603106e-05, "loss": 1.1186, "step": 189180 }, { "epoch": 1.2086809859064949, "grad_norm": 0.742145836353302, "learning_rate": 3.393685669390806e-05, "loss": 0.7087, "step": 189190 }, { "epoch": 1.2087448730562333, "grad_norm": 0.7637613415718079, "learning_rate": 3.393210507698278e-05, "loss": 0.9503, "step": 189200 }, { "epoch": 1.2088087602059723, "grad_norm": 0.7326201796531677, "learning_rate": 3.392735362187509e-05, "loss": 1.0978, "step": 189210 }, { "epoch": 1.2088726473557108, "grad_norm": 0.764445960521698, "learning_rate": 3.392260232863286e-05, "loss": 0.8275, "step": 189220 }, { "epoch": 1.2089365345054497, "grad_norm": 1.7644976377487183, "learning_rate": 3.3917851197303926e-05, "loss": 0.8628, "step": 189230 }, { "epoch": 1.2090004216551882, "grad_norm": 0.5463082790374756, "learning_rate": 3.391310022793613e-05, "loss": 0.9615, "step": 189240 }, { "epoch": 1.209064308804927, "grad_norm": 1.1664972305297852, "learning_rate": 3.390834942057738e-05, "loss": 0.7439, "step": 189250 }, { "epoch": 1.2091281959546656, "grad_norm": 0.6254557371139526, "learning_rate": 3.390359877527544e-05, "loss": 0.6675, "step": 189260 }, { "epoch": 1.2091920831044043, "grad_norm": 0.7890540361404419, "learning_rate": 3.389884829207819e-05, "loss": 0.7559, "step": 189270 }, { "epoch": 1.209255970254143, "grad_norm": 1.1657845973968506, "learning_rate": 3.3894097971033465e-05, "loss": 0.7273, "step": 189280 }, { "epoch": 1.2093198574038817, "grad_norm": 0.8113539218902588, "learning_rate": 3.388934781218911e-05, "loss": 0.9024, "step": 189290 }, { "epoch": 1.2093837445536204, "grad_norm": 0.9730884432792664, "learning_rate": 3.388459781559296e-05, "loss": 0.6144, "step": 189300 }, { "epoch": 1.2094476317033591, "grad_norm": 1.0396467447280884, "learning_rate": 3.387984798129284e-05, "loss": 0.6144, "step": 189310 }, { "epoch": 1.2095115188530978, "grad_norm": 1.3935688734054565, "learning_rate": 3.387509830933661e-05, "loss": 0.8328, "step": 189320 }, { "epoch": 1.2095754060028365, "grad_norm": 2.3891825675964355, "learning_rate": 3.387034879977209e-05, "loss": 0.8898, "step": 189330 }, { "epoch": 1.2096392931525752, "grad_norm": 1.1916571855545044, "learning_rate": 3.38655994526471e-05, "loss": 0.911, "step": 189340 }, { "epoch": 1.209703180302314, "grad_norm": 1.0677696466445923, "learning_rate": 3.386085026800948e-05, "loss": 0.7844, "step": 189350 }, { "epoch": 1.2097670674520526, "grad_norm": 0.8219583034515381, "learning_rate": 3.385610124590707e-05, "loss": 0.7822, "step": 189360 }, { "epoch": 1.2098309546017914, "grad_norm": 0.892532229423523, "learning_rate": 3.3851352386387694e-05, "loss": 0.9249, "step": 189370 }, { "epoch": 1.20989484175153, "grad_norm": 1.4263323545455933, "learning_rate": 3.3846603689499155e-05, "loss": 0.9323, "step": 189380 }, { "epoch": 1.2099587289012688, "grad_norm": 0.8026622533798218, "learning_rate": 3.3841855155289304e-05, "loss": 0.7288, "step": 189390 }, { "epoch": 1.2100226160510075, "grad_norm": 0.7562341690063477, "learning_rate": 3.383710678380595e-05, "loss": 0.8231, "step": 189400 }, { "epoch": 1.2100865032007462, "grad_norm": 1.2895803451538086, "learning_rate": 3.38323585750969e-05, "loss": 0.9344, "step": 189410 }, { "epoch": 1.2101503903504849, "grad_norm": 1.0178850889205933, "learning_rate": 3.382761052921e-05, "loss": 0.9551, "step": 189420 }, { "epoch": 1.2102142775002236, "grad_norm": 1.0130857229232788, "learning_rate": 3.382286264619304e-05, "loss": 1.0137, "step": 189430 }, { "epoch": 1.2102781646499623, "grad_norm": 1.47183096408844, "learning_rate": 3.381811492609386e-05, "loss": 0.9249, "step": 189440 }, { "epoch": 1.210342051799701, "grad_norm": 1.0401761531829834, "learning_rate": 3.381336736896026e-05, "loss": 0.8981, "step": 189450 }, { "epoch": 1.2104059389494397, "grad_norm": 1.007563829421997, "learning_rate": 3.3808619974840053e-05, "loss": 0.5923, "step": 189460 }, { "epoch": 1.2104698260991784, "grad_norm": 0.8079440593719482, "learning_rate": 3.3803872743781054e-05, "loss": 1.1246, "step": 189470 }, { "epoch": 1.2105337132489171, "grad_norm": 1.758446455001831, "learning_rate": 3.379912567583106e-05, "loss": 0.8868, "step": 189480 }, { "epoch": 1.2105976003986558, "grad_norm": 0.5423232316970825, "learning_rate": 3.379437877103789e-05, "loss": 0.7521, "step": 189490 }, { "epoch": 1.2106614875483945, "grad_norm": 0.869480311870575, "learning_rate": 3.378963202944935e-05, "loss": 0.9616, "step": 189500 }, { "epoch": 1.2107253746981332, "grad_norm": 0.9608466029167175, "learning_rate": 3.3784885451113235e-05, "loss": 0.766, "step": 189510 }, { "epoch": 1.210789261847872, "grad_norm": 1.1704931259155273, "learning_rate": 3.3780139036077355e-05, "loss": 0.8673, "step": 189520 }, { "epoch": 1.2108531489976107, "grad_norm": 0.8335651159286499, "learning_rate": 3.37753927843895e-05, "loss": 1.0876, "step": 189530 }, { "epoch": 1.2109170361473494, "grad_norm": 0.5794630646705627, "learning_rate": 3.377064669609748e-05, "loss": 0.7555, "step": 189540 }, { "epoch": 1.210980923297088, "grad_norm": 0.7695865035057068, "learning_rate": 3.3765900771249094e-05, "loss": 0.7292, "step": 189550 }, { "epoch": 1.2110448104468268, "grad_norm": 1.706060767173767, "learning_rate": 3.3761155009892106e-05, "loss": 0.9732, "step": 189560 }, { "epoch": 1.2111086975965655, "grad_norm": 1.0700571537017822, "learning_rate": 3.3756409412074365e-05, "loss": 0.7203, "step": 189570 }, { "epoch": 1.2111725847463042, "grad_norm": 1.068686842918396, "learning_rate": 3.3751663977843616e-05, "loss": 0.8256, "step": 189580 }, { "epoch": 1.2112364718960429, "grad_norm": 0.7472430467605591, "learning_rate": 3.374691870724768e-05, "loss": 1.0348, "step": 189590 }, { "epoch": 1.2113003590457816, "grad_norm": 1.1850546598434448, "learning_rate": 3.374217360033433e-05, "loss": 0.794, "step": 189600 }, { "epoch": 1.2113642461955203, "grad_norm": 0.6383751034736633, "learning_rate": 3.373742865715136e-05, "loss": 0.7884, "step": 189610 }, { "epoch": 1.211428133345259, "grad_norm": 0.866807222366333, "learning_rate": 3.373268387774655e-05, "loss": 1.3421, "step": 189620 }, { "epoch": 1.2114920204949977, "grad_norm": 1.0041619539260864, "learning_rate": 3.372793926216769e-05, "loss": 0.7278, "step": 189630 }, { "epoch": 1.2115559076447364, "grad_norm": 2.272212266921997, "learning_rate": 3.372319481046254e-05, "loss": 1.0229, "step": 189640 }, { "epoch": 1.2116197947944751, "grad_norm": 1.066819667816162, "learning_rate": 3.371845052267892e-05, "loss": 0.9702, "step": 189650 }, { "epoch": 1.2116836819442138, "grad_norm": 0.9112004637718201, "learning_rate": 3.371370639886459e-05, "loss": 0.8027, "step": 189660 }, { "epoch": 1.2117475690939525, "grad_norm": 0.8083162307739258, "learning_rate": 3.3708962439067316e-05, "loss": 0.6973, "step": 189670 }, { "epoch": 1.2118114562436912, "grad_norm": 1.1185535192489624, "learning_rate": 3.3704218643334884e-05, "loss": 0.7762, "step": 189680 }, { "epoch": 1.2118753433934297, "grad_norm": 0.5453072786331177, "learning_rate": 3.369947501171507e-05, "loss": 0.7053, "step": 189690 }, { "epoch": 1.2119392305431687, "grad_norm": 0.6224724054336548, "learning_rate": 3.3694731544255646e-05, "loss": 0.8089, "step": 189700 }, { "epoch": 1.2120031176929071, "grad_norm": 0.9544569253921509, "learning_rate": 3.3689988241004385e-05, "loss": 0.8551, "step": 189710 }, { "epoch": 1.212067004842646, "grad_norm": 0.9484859704971313, "learning_rate": 3.368524510200904e-05, "loss": 0.7186, "step": 189720 }, { "epoch": 1.2121308919923846, "grad_norm": 1.351324200630188, "learning_rate": 3.36805021273174e-05, "loss": 0.8923, "step": 189730 }, { "epoch": 1.2121947791421233, "grad_norm": 1.0908766984939575, "learning_rate": 3.367575931697724e-05, "loss": 0.9379, "step": 189740 }, { "epoch": 1.212258666291862, "grad_norm": 1.441122055053711, "learning_rate": 3.3671016671036286e-05, "loss": 0.917, "step": 189750 }, { "epoch": 1.2123225534416007, "grad_norm": 0.6979114413261414, "learning_rate": 3.3666274189542327e-05, "loss": 0.6953, "step": 189760 }, { "epoch": 1.2123864405913394, "grad_norm": 0.9482870697975159, "learning_rate": 3.3661531872543114e-05, "loss": 0.9533, "step": 189770 }, { "epoch": 1.212450327741078, "grad_norm": 0.9386551976203918, "learning_rate": 3.365678972008641e-05, "loss": 1.1059, "step": 189780 }, { "epoch": 1.2125142148908168, "grad_norm": 0.9457994103431702, "learning_rate": 3.365204773221997e-05, "loss": 0.919, "step": 189790 }, { "epoch": 1.2125781020405555, "grad_norm": 1.090871810913086, "learning_rate": 3.364730590899156e-05, "loss": 0.9031, "step": 189800 }, { "epoch": 1.2126419891902942, "grad_norm": 1.0352469682693481, "learning_rate": 3.3642564250448917e-05, "loss": 0.7198, "step": 189810 }, { "epoch": 1.212705876340033, "grad_norm": 0.983015775680542, "learning_rate": 3.3637822756639806e-05, "loss": 0.9113, "step": 189820 }, { "epoch": 1.2127697634897716, "grad_norm": 3.4296910762786865, "learning_rate": 3.3633081427611976e-05, "loss": 0.8825, "step": 189830 }, { "epoch": 1.2128336506395103, "grad_norm": 0.7872678637504578, "learning_rate": 3.362834026341317e-05, "loss": 1.1606, "step": 189840 }, { "epoch": 1.212897537789249, "grad_norm": 0.6566711664199829, "learning_rate": 3.362359926409115e-05, "loss": 0.7937, "step": 189850 }, { "epoch": 1.2129614249389877, "grad_norm": 0.7389879822731018, "learning_rate": 3.361885842969365e-05, "loss": 0.7068, "step": 189860 }, { "epoch": 1.2130253120887264, "grad_norm": 1.2746154069900513, "learning_rate": 3.3614117760268415e-05, "loss": 0.9289, "step": 189870 }, { "epoch": 1.2130891992384651, "grad_norm": 0.8612440228462219, "learning_rate": 3.360937725586318e-05, "loss": 1.0348, "step": 189880 }, { "epoch": 1.2131530863882038, "grad_norm": 0.6161855459213257, "learning_rate": 3.360463691652571e-05, "loss": 0.7618, "step": 189890 }, { "epoch": 1.2132169735379426, "grad_norm": 1.0843278169631958, "learning_rate": 3.3599896742303726e-05, "loss": 0.7903, "step": 189900 }, { "epoch": 1.2132808606876813, "grad_norm": 0.8129070401191711, "learning_rate": 3.359515673324497e-05, "loss": 1.0778, "step": 189910 }, { "epoch": 1.21334474783742, "grad_norm": 1.6477254629135132, "learning_rate": 3.359041688939718e-05, "loss": 0.6379, "step": 189920 }, { "epoch": 1.2134086349871587, "grad_norm": 1.2734267711639404, "learning_rate": 3.358567721080809e-05, "loss": 1.2492, "step": 189930 }, { "epoch": 1.2134725221368974, "grad_norm": 0.704581081867218, "learning_rate": 3.358093769752543e-05, "loss": 1.0449, "step": 189940 }, { "epoch": 1.213536409286636, "grad_norm": 0.846922755241394, "learning_rate": 3.357619834959693e-05, "loss": 1.0302, "step": 189950 }, { "epoch": 1.2136002964363748, "grad_norm": 0.7383447289466858, "learning_rate": 3.357145916707033e-05, "loss": 1.0175, "step": 189960 }, { "epoch": 1.2136641835861135, "grad_norm": 0.7878567576408386, "learning_rate": 3.356672014999333e-05, "loss": 0.7262, "step": 189970 }, { "epoch": 1.2137280707358522, "grad_norm": 1.0822579860687256, "learning_rate": 3.3561981298413695e-05, "loss": 0.9257, "step": 189980 }, { "epoch": 1.213791957885591, "grad_norm": 0.9476717114448547, "learning_rate": 3.355724261237911e-05, "loss": 1.0248, "step": 189990 }, { "epoch": 1.2138558450353296, "grad_norm": 1.3661209344863892, "learning_rate": 3.3552504091937334e-05, "loss": 0.7913, "step": 190000 }, { "epoch": 1.2139197321850683, "grad_norm": 2.8625452518463135, "learning_rate": 3.354776573713606e-05, "loss": 0.7928, "step": 190010 }, { "epoch": 1.213983619334807, "grad_norm": 0.8434425592422485, "learning_rate": 3.354302754802303e-05, "loss": 0.674, "step": 190020 }, { "epoch": 1.2140475064845457, "grad_norm": 0.6829739809036255, "learning_rate": 3.353828952464594e-05, "loss": 0.7713, "step": 190030 }, { "epoch": 1.2141113936342844, "grad_norm": 1.4027974605560303, "learning_rate": 3.353355166705251e-05, "loss": 0.7881, "step": 190040 }, { "epoch": 1.2141752807840231, "grad_norm": 1.39128839969635, "learning_rate": 3.352881397529047e-05, "loss": 1.0224, "step": 190050 }, { "epoch": 1.2142391679337619, "grad_norm": 0.9742370843887329, "learning_rate": 3.352407644940753e-05, "loss": 0.8518, "step": 190060 }, { "epoch": 1.2143030550835006, "grad_norm": 1.9331669807434082, "learning_rate": 3.351933908945138e-05, "loss": 0.766, "step": 190070 }, { "epoch": 1.2143669422332393, "grad_norm": 1.6505056619644165, "learning_rate": 3.3514601895469736e-05, "loss": 0.9179, "step": 190080 }, { "epoch": 1.214430829382978, "grad_norm": 1.2252271175384521, "learning_rate": 3.3509864867510325e-05, "loss": 1.046, "step": 190090 }, { "epoch": 1.2144947165327167, "grad_norm": 0.7833622694015503, "learning_rate": 3.3505128005620845e-05, "loss": 0.7841, "step": 190100 }, { "epoch": 1.2145586036824554, "grad_norm": 0.8546239137649536, "learning_rate": 3.3500391309848986e-05, "loss": 0.8422, "step": 190110 }, { "epoch": 1.214622490832194, "grad_norm": 1.0654160976409912, "learning_rate": 3.349565478024247e-05, "loss": 0.874, "step": 190120 }, { "epoch": 1.2146863779819328, "grad_norm": 1.2579134702682495, "learning_rate": 3.349091841684898e-05, "loss": 0.8686, "step": 190130 }, { "epoch": 1.2147502651316715, "grad_norm": 0.8867964744567871, "learning_rate": 3.3486182219716235e-05, "loss": 1.1406, "step": 190140 }, { "epoch": 1.2148141522814102, "grad_norm": 0.8828266859054565, "learning_rate": 3.348144618889191e-05, "loss": 0.7873, "step": 190150 }, { "epoch": 1.214878039431149, "grad_norm": 0.7557622194290161, "learning_rate": 3.347671032442372e-05, "loss": 0.9467, "step": 190160 }, { "epoch": 1.2149419265808876, "grad_norm": 1.5542864799499512, "learning_rate": 3.3471974626359346e-05, "loss": 0.9072, "step": 190170 }, { "epoch": 1.215005813730626, "grad_norm": 1.122002363204956, "learning_rate": 3.3467239094746494e-05, "loss": 1.0869, "step": 190180 }, { "epoch": 1.215069700880365, "grad_norm": 1.4732335805892944, "learning_rate": 3.346250372963284e-05, "loss": 0.6884, "step": 190190 }, { "epoch": 1.2151335880301035, "grad_norm": 0.712639570236206, "learning_rate": 3.345776853106609e-05, "loss": 0.691, "step": 190200 }, { "epoch": 1.2151974751798424, "grad_norm": 0.7023781538009644, "learning_rate": 3.345303349909391e-05, "loss": 0.7389, "step": 190210 }, { "epoch": 1.215261362329581, "grad_norm": 1.098771333694458, "learning_rate": 3.344829863376402e-05, "loss": 0.7947, "step": 190220 }, { "epoch": 1.2153252494793196, "grad_norm": 0.7597054243087769, "learning_rate": 3.344356393512407e-05, "loss": 0.6477, "step": 190230 }, { "epoch": 1.2153891366290583, "grad_norm": 1.0728919506072998, "learning_rate": 3.343882940322174e-05, "loss": 0.7738, "step": 190240 }, { "epoch": 1.215453023778797, "grad_norm": 1.2330894470214844, "learning_rate": 3.343409503810474e-05, "loss": 1.2956, "step": 190250 }, { "epoch": 1.2155169109285358, "grad_norm": 1.1694852113723755, "learning_rate": 3.3429360839820725e-05, "loss": 0.7961, "step": 190260 }, { "epoch": 1.2155807980782745, "grad_norm": 0.6705962419509888, "learning_rate": 3.342462680841739e-05, "loss": 1.0564, "step": 190270 }, { "epoch": 1.2156446852280132, "grad_norm": 1.190106749534607, "learning_rate": 3.34198929439424e-05, "loss": 0.7572, "step": 190280 }, { "epoch": 1.2157085723777519, "grad_norm": 0.7782067656517029, "learning_rate": 3.341515924644343e-05, "loss": 1.093, "step": 190290 }, { "epoch": 1.2157724595274906, "grad_norm": 0.8707125186920166, "learning_rate": 3.341042571596816e-05, "loss": 0.6307, "step": 190300 }, { "epoch": 1.2158363466772293, "grad_norm": 1.6781306266784668, "learning_rate": 3.3405692352564246e-05, "loss": 0.8267, "step": 190310 }, { "epoch": 1.215900233826968, "grad_norm": 1.4789141416549683, "learning_rate": 3.3400959156279377e-05, "loss": 0.6929, "step": 190320 }, { "epoch": 1.2159641209767067, "grad_norm": 1.2151391506195068, "learning_rate": 3.33962261271612e-05, "loss": 0.8728, "step": 190330 }, { "epoch": 1.2160280081264454, "grad_norm": 1.0864027738571167, "learning_rate": 3.339149326525739e-05, "loss": 1.0483, "step": 190340 }, { "epoch": 1.216091895276184, "grad_norm": 0.9961766600608826, "learning_rate": 3.338676057061562e-05, "loss": 0.872, "step": 190350 }, { "epoch": 1.2161557824259228, "grad_norm": 1.3357287645339966, "learning_rate": 3.3382028043283536e-05, "loss": 1.0093, "step": 190360 }, { "epoch": 1.2162196695756615, "grad_norm": 0.7204846143722534, "learning_rate": 3.3377295683308806e-05, "loss": 0.7794, "step": 190370 }, { "epoch": 1.2162835567254002, "grad_norm": 0.8506677150726318, "learning_rate": 3.3372563490739094e-05, "loss": 0.9173, "step": 190380 }, { "epoch": 1.216347443875139, "grad_norm": 1.3914761543273926, "learning_rate": 3.336783146562205e-05, "loss": 0.9274, "step": 190390 }, { "epoch": 1.2164113310248776, "grad_norm": 0.8650677800178528, "learning_rate": 3.3363099608005335e-05, "loss": 0.7552, "step": 190400 }, { "epoch": 1.2164752181746163, "grad_norm": 1.133594036102295, "learning_rate": 3.335836791793658e-05, "loss": 0.6892, "step": 190410 }, { "epoch": 1.216539105324355, "grad_norm": 0.9037141799926758, "learning_rate": 3.3353636395463485e-05, "loss": 1.0421, "step": 190420 }, { "epoch": 1.2166029924740938, "grad_norm": 1.0905022621154785, "learning_rate": 3.334890504063366e-05, "loss": 0.8529, "step": 190430 }, { "epoch": 1.2166668796238325, "grad_norm": 1.3914424180984497, "learning_rate": 3.334417385349476e-05, "loss": 0.9783, "step": 190440 }, { "epoch": 1.2167307667735712, "grad_norm": 0.8010818958282471, "learning_rate": 3.3339442834094454e-05, "loss": 1.014, "step": 190450 }, { "epoch": 1.2167946539233099, "grad_norm": 0.6773678064346313, "learning_rate": 3.333471198248036e-05, "loss": 0.8047, "step": 190460 }, { "epoch": 1.2168585410730486, "grad_norm": 0.8083564043045044, "learning_rate": 3.332998129870014e-05, "loss": 0.7306, "step": 190470 }, { "epoch": 1.2169224282227873, "grad_norm": 0.7467992901802063, "learning_rate": 3.332525078280143e-05, "loss": 0.7576, "step": 190480 }, { "epoch": 1.216986315372526, "grad_norm": 1.5822430849075317, "learning_rate": 3.332052043483187e-05, "loss": 0.7701, "step": 190490 }, { "epoch": 1.2170502025222647, "grad_norm": 0.7331676483154297, "learning_rate": 3.3315790254839095e-05, "loss": 0.8167, "step": 190500 }, { "epoch": 1.2171140896720034, "grad_norm": 0.6782907247543335, "learning_rate": 3.331106024287075e-05, "loss": 1.0039, "step": 190510 }, { "epoch": 1.2171779768217421, "grad_norm": 0.7742295861244202, "learning_rate": 3.330633039897447e-05, "loss": 0.8161, "step": 190520 }, { "epoch": 1.2172418639714808, "grad_norm": 0.5957803726196289, "learning_rate": 3.330160072319788e-05, "loss": 0.8088, "step": 190530 }, { "epoch": 1.2173057511212195, "grad_norm": 0.9488677978515625, "learning_rate": 3.329687121558862e-05, "loss": 0.8291, "step": 190540 }, { "epoch": 1.2173696382709582, "grad_norm": 0.7537437081336975, "learning_rate": 3.329214187619432e-05, "loss": 1.0181, "step": 190550 }, { "epoch": 1.217433525420697, "grad_norm": 1.4307500123977661, "learning_rate": 3.328741270506259e-05, "loss": 0.7942, "step": 190560 }, { "epoch": 1.2174974125704356, "grad_norm": 0.7630217671394348, "learning_rate": 3.328268370224109e-05, "loss": 0.8375, "step": 190570 }, { "epoch": 1.2175612997201744, "grad_norm": 1.0125505924224854, "learning_rate": 3.327795486777742e-05, "loss": 0.778, "step": 190580 }, { "epoch": 1.217625186869913, "grad_norm": 0.6416699886322021, "learning_rate": 3.327322620171921e-05, "loss": 0.8773, "step": 190590 }, { "epoch": 1.2176890740196518, "grad_norm": 1.3993701934814453, "learning_rate": 3.326849770411408e-05, "loss": 0.8246, "step": 190600 }, { "epoch": 1.2177529611693905, "grad_norm": 0.6875872015953064, "learning_rate": 3.326376937500965e-05, "loss": 1.1023, "step": 190610 }, { "epoch": 1.2178168483191292, "grad_norm": 0.9343377351760864, "learning_rate": 3.325904121445354e-05, "loss": 0.7328, "step": 190620 }, { "epoch": 1.2178807354688679, "grad_norm": 0.8546002507209778, "learning_rate": 3.325431322249338e-05, "loss": 0.7355, "step": 190630 }, { "epoch": 1.2179446226186066, "grad_norm": 0.593250572681427, "learning_rate": 3.324958539917677e-05, "loss": 0.6562, "step": 190640 }, { "epoch": 1.218008509768345, "grad_norm": 0.856907844543457, "learning_rate": 3.3244857744551325e-05, "loss": 0.9296, "step": 190650 }, { "epoch": 1.218072396918084, "grad_norm": 0.9372380375862122, "learning_rate": 3.324013025866467e-05, "loss": 0.8646, "step": 190660 }, { "epoch": 1.2181362840678225, "grad_norm": 0.7435608506202698, "learning_rate": 3.323540294156439e-05, "loss": 0.9015, "step": 190670 }, { "epoch": 1.2182001712175614, "grad_norm": 0.686058521270752, "learning_rate": 3.3230675793298104e-05, "loss": 1.0199, "step": 190680 }, { "epoch": 1.2182640583673, "grad_norm": 0.8744069933891296, "learning_rate": 3.322594881391342e-05, "loss": 0.8603, "step": 190690 }, { "epoch": 1.2183279455170388, "grad_norm": 0.6687055826187134, "learning_rate": 3.322169467690033e-05, "loss": 0.8983, "step": 190700 }, { "epoch": 1.2183918326667773, "grad_norm": 0.745978832244873, "learning_rate": 3.321696801852185e-05, "loss": 0.7131, "step": 190710 }, { "epoch": 1.218455719816516, "grad_norm": 0.979073703289032, "learning_rate": 3.3212241529163016e-05, "loss": 0.7781, "step": 190720 }, { "epoch": 1.2185196069662547, "grad_norm": 1.0259783267974854, "learning_rate": 3.320751520887142e-05, "loss": 0.7987, "step": 190730 }, { "epoch": 1.2185834941159934, "grad_norm": 1.424546241760254, "learning_rate": 3.3202789057694683e-05, "loss": 0.8471, "step": 190740 }, { "epoch": 1.2186473812657321, "grad_norm": 0.8425334095954895, "learning_rate": 3.31980630756804e-05, "loss": 0.7256, "step": 190750 }, { "epoch": 1.2187112684154708, "grad_norm": 1.0759365558624268, "learning_rate": 3.319333726287615e-05, "loss": 0.9268, "step": 190760 }, { "epoch": 1.2187751555652095, "grad_norm": 0.826580286026001, "learning_rate": 3.318861161932954e-05, "loss": 0.9029, "step": 190770 }, { "epoch": 1.2188390427149483, "grad_norm": 0.7088884711265564, "learning_rate": 3.3183886145088146e-05, "loss": 0.9784, "step": 190780 }, { "epoch": 1.218902929864687, "grad_norm": 2.455657720565796, "learning_rate": 3.3179160840199566e-05, "loss": 0.8813, "step": 190790 }, { "epoch": 1.2189668170144257, "grad_norm": 1.166312575340271, "learning_rate": 3.3174435704711396e-05, "loss": 0.7366, "step": 190800 }, { "epoch": 1.2190307041641644, "grad_norm": 0.9697980880737305, "learning_rate": 3.316971073867121e-05, "loss": 1.0195, "step": 190810 }, { "epoch": 1.219094591313903, "grad_norm": 0.8241257667541504, "learning_rate": 3.31649859421266e-05, "loss": 0.9047, "step": 190820 }, { "epoch": 1.2191584784636418, "grad_norm": 1.1289937496185303, "learning_rate": 3.3160261315125145e-05, "loss": 0.9061, "step": 190830 }, { "epoch": 1.2192223656133805, "grad_norm": 1.415996789932251, "learning_rate": 3.315553685771443e-05, "loss": 0.892, "step": 190840 }, { "epoch": 1.2192862527631192, "grad_norm": 1.2349244356155396, "learning_rate": 3.315081256994204e-05, "loss": 0.8346, "step": 190850 }, { "epoch": 1.219350139912858, "grad_norm": 0.7951316833496094, "learning_rate": 3.314608845185553e-05, "loss": 0.8926, "step": 190860 }, { "epoch": 1.2194140270625966, "grad_norm": 0.918441891670227, "learning_rate": 3.3141364503502495e-05, "loss": 0.7915, "step": 190870 }, { "epoch": 1.2194779142123353, "grad_norm": 1.6058706045150757, "learning_rate": 3.31366407249305e-05, "loss": 0.8828, "step": 190880 }, { "epoch": 1.219541801362074, "grad_norm": 0.8341480493545532, "learning_rate": 3.313191711618712e-05, "loss": 0.6081, "step": 190890 }, { "epoch": 1.2196056885118127, "grad_norm": 1.021825909614563, "learning_rate": 3.312719367731993e-05, "loss": 0.9466, "step": 190900 }, { "epoch": 1.2196695756615514, "grad_norm": 1.2775073051452637, "learning_rate": 3.31224704083765e-05, "loss": 0.7967, "step": 190910 }, { "epoch": 1.2197334628112901, "grad_norm": 1.687404751777649, "learning_rate": 3.3117747309404397e-05, "loss": 0.9625, "step": 190920 }, { "epoch": 1.2197973499610288, "grad_norm": 0.7202122211456299, "learning_rate": 3.3113024380451176e-05, "loss": 0.7475, "step": 190930 }, { "epoch": 1.2198612371107675, "grad_norm": 1.1875548362731934, "learning_rate": 3.310830162156441e-05, "loss": 0.8918, "step": 190940 }, { "epoch": 1.2199251242605063, "grad_norm": 1.0419461727142334, "learning_rate": 3.310357903279166e-05, "loss": 0.623, "step": 190950 }, { "epoch": 1.219989011410245, "grad_norm": 1.0206866264343262, "learning_rate": 3.3098856614180495e-05, "loss": 1.1513, "step": 190960 }, { "epoch": 1.2200528985599837, "grad_norm": 0.7848687767982483, "learning_rate": 3.309413436577846e-05, "loss": 0.8548, "step": 190970 }, { "epoch": 1.2201167857097224, "grad_norm": 1.786482334136963, "learning_rate": 3.308941228763311e-05, "loss": 1.0621, "step": 190980 }, { "epoch": 1.220180672859461, "grad_norm": 1.0571478605270386, "learning_rate": 3.3084690379792014e-05, "loss": 1.0426, "step": 190990 }, { "epoch": 1.2202445600091998, "grad_norm": 0.8210548758506775, "learning_rate": 3.307996864230273e-05, "loss": 1.103, "step": 191000 }, { "epoch": 1.2203084471589385, "grad_norm": 1.0246258974075317, "learning_rate": 3.3075247075212785e-05, "loss": 0.7603, "step": 191010 }, { "epoch": 1.2203723343086772, "grad_norm": 0.6933344602584839, "learning_rate": 3.3070525678569745e-05, "loss": 0.7222, "step": 191020 }, { "epoch": 1.220436221458416, "grad_norm": 0.7241206169128418, "learning_rate": 3.306580445242117e-05, "loss": 0.6726, "step": 191030 }, { "epoch": 1.2205001086081546, "grad_norm": 0.6975740194320679, "learning_rate": 3.306108339681458e-05, "loss": 0.8145, "step": 191040 }, { "epoch": 1.2205639957578933, "grad_norm": 0.8648557662963867, "learning_rate": 3.305636251179753e-05, "loss": 0.8079, "step": 191050 }, { "epoch": 1.220627882907632, "grad_norm": 0.6180557012557983, "learning_rate": 3.305164179741758e-05, "loss": 0.9549, "step": 191060 }, { "epoch": 1.2206917700573707, "grad_norm": 0.8685292601585388, "learning_rate": 3.304692125372225e-05, "loss": 0.9466, "step": 191070 }, { "epoch": 1.2207556572071094, "grad_norm": 0.7630671262741089, "learning_rate": 3.30422008807591e-05, "loss": 0.8865, "step": 191080 }, { "epoch": 1.2208195443568481, "grad_norm": 1.0005232095718384, "learning_rate": 3.303748067857565e-05, "loss": 1.246, "step": 191090 }, { "epoch": 1.2208834315065868, "grad_norm": 5.073404788970947, "learning_rate": 3.303276064721945e-05, "loss": 0.786, "step": 191100 }, { "epoch": 1.2209473186563256, "grad_norm": 1.2058204412460327, "learning_rate": 3.302804078673804e-05, "loss": 1.02, "step": 191110 }, { "epoch": 1.2210112058060643, "grad_norm": 1.3755779266357422, "learning_rate": 3.302332109717892e-05, "loss": 0.751, "step": 191120 }, { "epoch": 1.221075092955803, "grad_norm": 1.1300941705703735, "learning_rate": 3.301860157858966e-05, "loss": 0.8189, "step": 191130 }, { "epoch": 1.2211389801055414, "grad_norm": 1.0134592056274414, "learning_rate": 3.3013882231017764e-05, "loss": 0.7888, "step": 191140 }, { "epoch": 1.2212028672552804, "grad_norm": 0.8436269164085388, "learning_rate": 3.300916305451077e-05, "loss": 0.7243, "step": 191150 }, { "epoch": 1.2212667544050189, "grad_norm": 1.3353471755981445, "learning_rate": 3.30044440491162e-05, "loss": 0.8878, "step": 191160 }, { "epoch": 1.2213306415547578, "grad_norm": 1.0653821229934692, "learning_rate": 3.2999725214881597e-05, "loss": 0.8101, "step": 191170 }, { "epoch": 1.2213945287044963, "grad_norm": 1.0878440141677856, "learning_rate": 3.299500655185445e-05, "loss": 1.0963, "step": 191180 }, { "epoch": 1.2214584158542352, "grad_norm": 0.7621514201164246, "learning_rate": 3.2990288060082306e-05, "loss": 0.9118, "step": 191190 }, { "epoch": 1.2215223030039737, "grad_norm": 1.2937744855880737, "learning_rate": 3.2985569739612676e-05, "loss": 0.7773, "step": 191200 }, { "epoch": 1.2215861901537124, "grad_norm": 0.9347992539405823, "learning_rate": 3.298085159049308e-05, "loss": 0.8938, "step": 191210 }, { "epoch": 1.221650077303451, "grad_norm": 1.0627859830856323, "learning_rate": 3.297613361277103e-05, "loss": 1.0111, "step": 191220 }, { "epoch": 1.2217139644531898, "grad_norm": 0.9506715536117554, "learning_rate": 3.297141580649405e-05, "loss": 0.7416, "step": 191230 }, { "epoch": 1.2217778516029285, "grad_norm": 1.2876331806182861, "learning_rate": 3.296669817170964e-05, "loss": 0.7476, "step": 191240 }, { "epoch": 1.2218417387526672, "grad_norm": 1.331278681755066, "learning_rate": 3.2961980708465315e-05, "loss": 0.8709, "step": 191250 }, { "epoch": 1.221905625902406, "grad_norm": 1.1935945749282837, "learning_rate": 3.295726341680857e-05, "loss": 0.923, "step": 191260 }, { "epoch": 1.2219695130521446, "grad_norm": 0.8223473429679871, "learning_rate": 3.2952546296786934e-05, "loss": 0.9257, "step": 191270 }, { "epoch": 1.2220334002018833, "grad_norm": 0.7247016429901123, "learning_rate": 3.294782934844791e-05, "loss": 0.8564, "step": 191280 }, { "epoch": 1.222097287351622, "grad_norm": 0.6738024950027466, "learning_rate": 3.2943112571838996e-05, "loss": 0.8172, "step": 191290 }, { "epoch": 1.2221611745013607, "grad_norm": 1.0389232635498047, "learning_rate": 3.29383959670077e-05, "loss": 0.9731, "step": 191300 }, { "epoch": 1.2222250616510995, "grad_norm": 0.9643621444702148, "learning_rate": 3.2933679534001515e-05, "loss": 0.9725, "step": 191310 }, { "epoch": 1.2222889488008382, "grad_norm": 2.0298118591308594, "learning_rate": 3.292896327286794e-05, "loss": 0.6888, "step": 191320 }, { "epoch": 1.2223528359505769, "grad_norm": 0.9155459403991699, "learning_rate": 3.2924247183654464e-05, "loss": 0.9241, "step": 191330 }, { "epoch": 1.2224167231003156, "grad_norm": 0.6312406659126282, "learning_rate": 3.291953126640863e-05, "loss": 0.9328, "step": 191340 }, { "epoch": 1.2224806102500543, "grad_norm": 0.8391063213348389, "learning_rate": 3.291481552117786e-05, "loss": 1.0933, "step": 191350 }, { "epoch": 1.222544497399793, "grad_norm": 1.127882719039917, "learning_rate": 3.291009994800968e-05, "loss": 0.9218, "step": 191360 }, { "epoch": 1.2226083845495317, "grad_norm": 0.8426359295845032, "learning_rate": 3.290538454695157e-05, "loss": 0.8109, "step": 191370 }, { "epoch": 1.2226722716992704, "grad_norm": 0.7490180730819702, "learning_rate": 3.2900669318051036e-05, "loss": 0.8676, "step": 191380 }, { "epoch": 1.222736158849009, "grad_norm": 0.675996720790863, "learning_rate": 3.289595426135555e-05, "loss": 0.9908, "step": 191390 }, { "epoch": 1.2228000459987478, "grad_norm": 1.0120384693145752, "learning_rate": 3.2891239376912614e-05, "loss": 0.6029, "step": 191400 }, { "epoch": 1.2228639331484865, "grad_norm": 1.0978142023086548, "learning_rate": 3.288652466476969e-05, "loss": 1.0245, "step": 191410 }, { "epoch": 1.2229278202982252, "grad_norm": 0.7779439091682434, "learning_rate": 3.288181012497427e-05, "loss": 0.7234, "step": 191420 }, { "epoch": 1.222991707447964, "grad_norm": 1.177938461303711, "learning_rate": 3.287709575757383e-05, "loss": 0.8939, "step": 191430 }, { "epoch": 1.2230555945977026, "grad_norm": 1.2629878520965576, "learning_rate": 3.2872381562615853e-05, "loss": 0.7932, "step": 191440 }, { "epoch": 1.2231194817474413, "grad_norm": 0.6756806969642639, "learning_rate": 3.286766754014781e-05, "loss": 0.8743, "step": 191450 }, { "epoch": 1.22318336889718, "grad_norm": 0.9224807620048523, "learning_rate": 3.2862953690217176e-05, "loss": 0.9364, "step": 191460 }, { "epoch": 1.2232472560469188, "grad_norm": 1.0424355268478394, "learning_rate": 3.2858240012871424e-05, "loss": 0.8107, "step": 191470 }, { "epoch": 1.2233111431966575, "grad_norm": 1.0432953834533691, "learning_rate": 3.2853526508158014e-05, "loss": 0.8718, "step": 191480 }, { "epoch": 1.2233750303463962, "grad_norm": 1.1413217782974243, "learning_rate": 3.284881317612444e-05, "loss": 0.9998, "step": 191490 }, { "epoch": 1.2234389174961349, "grad_norm": 2.2246835231781006, "learning_rate": 3.284410001681815e-05, "loss": 0.6342, "step": 191500 }, { "epoch": 1.2235028046458736, "grad_norm": 0.6261698007583618, "learning_rate": 3.283938703028662e-05, "loss": 0.7577, "step": 191510 }, { "epoch": 1.2235666917956123, "grad_norm": 1.1829447746276855, "learning_rate": 3.28346742165773e-05, "loss": 0.7946, "step": 191520 }, { "epoch": 1.223630578945351, "grad_norm": 0.9962440133094788, "learning_rate": 3.282996157573767e-05, "loss": 0.9398, "step": 191530 }, { "epoch": 1.2236944660950897, "grad_norm": 0.9726690053939819, "learning_rate": 3.282524910781517e-05, "loss": 0.8636, "step": 191540 }, { "epoch": 1.2237583532448284, "grad_norm": 1.3285902738571167, "learning_rate": 3.282053681285728e-05, "loss": 1.0809, "step": 191550 }, { "epoch": 1.223822240394567, "grad_norm": 0.8424548506736755, "learning_rate": 3.2815824690911444e-05, "loss": 0.8103, "step": 191560 }, { "epoch": 1.2238861275443058, "grad_norm": 0.6262918710708618, "learning_rate": 3.2811112742025115e-05, "loss": 0.8269, "step": 191570 }, { "epoch": 1.2239500146940445, "grad_norm": 0.8682219982147217, "learning_rate": 3.2806400966245745e-05, "loss": 0.9268, "step": 191580 }, { "epoch": 1.2240139018437832, "grad_norm": 1.0514038801193237, "learning_rate": 3.28016893636208e-05, "loss": 0.7626, "step": 191590 }, { "epoch": 1.224077788993522, "grad_norm": 1.6504403352737427, "learning_rate": 3.279697793419773e-05, "loss": 1.0023, "step": 191600 }, { "epoch": 1.2241416761432606, "grad_norm": 0.846878170967102, "learning_rate": 3.279226667802396e-05, "loss": 0.7239, "step": 191610 }, { "epoch": 1.2242055632929993, "grad_norm": 0.8492310643196106, "learning_rate": 3.278755559514696e-05, "loss": 1.0004, "step": 191620 }, { "epoch": 1.2242694504427378, "grad_norm": 0.77544105052948, "learning_rate": 3.2782844685614164e-05, "loss": 0.7413, "step": 191630 }, { "epoch": 1.2243333375924768, "grad_norm": 0.8446138501167297, "learning_rate": 3.2778133949473025e-05, "loss": 0.9818, "step": 191640 }, { "epoch": 1.2243972247422152, "grad_norm": 0.6860386729240417, "learning_rate": 3.277342338677096e-05, "loss": 1.087, "step": 191650 }, { "epoch": 1.2244611118919542, "grad_norm": 0.47785627841949463, "learning_rate": 3.276871299755544e-05, "loss": 0.801, "step": 191660 }, { "epoch": 1.2245249990416927, "grad_norm": 0.8878980278968811, "learning_rate": 3.276400278187388e-05, "loss": 0.9389, "step": 191670 }, { "epoch": 1.2245888861914314, "grad_norm": 1.2902075052261353, "learning_rate": 3.275929273977373e-05, "loss": 0.8234, "step": 191680 }, { "epoch": 1.22465277334117, "grad_norm": 1.8300102949142456, "learning_rate": 3.275458287130241e-05, "loss": 0.6632, "step": 191690 }, { "epoch": 1.2247166604909088, "grad_norm": 1.1442055702209473, "learning_rate": 3.2749873176507364e-05, "loss": 0.9729, "step": 191700 }, { "epoch": 1.2247805476406475, "grad_norm": 3.596463918685913, "learning_rate": 3.274516365543601e-05, "loss": 0.6886, "step": 191710 }, { "epoch": 1.2248444347903862, "grad_norm": 1.2126635313034058, "learning_rate": 3.274045430813579e-05, "loss": 0.9968, "step": 191720 }, { "epoch": 1.224908321940125, "grad_norm": 1.014853835105896, "learning_rate": 3.273574513465413e-05, "loss": 0.7696, "step": 191730 }, { "epoch": 1.2249722090898636, "grad_norm": 1.4263054132461548, "learning_rate": 3.273103613503846e-05, "loss": 1.0814, "step": 191740 }, { "epoch": 1.2250360962396023, "grad_norm": 1.2331584692001343, "learning_rate": 3.272632730933618e-05, "loss": 0.9616, "step": 191750 }, { "epoch": 1.225099983389341, "grad_norm": 1.4743117094039917, "learning_rate": 3.272161865759474e-05, "loss": 1.1666, "step": 191760 }, { "epoch": 1.2251638705390797, "grad_norm": 0.7259349822998047, "learning_rate": 3.2716910179861537e-05, "loss": 0.8348, "step": 191770 }, { "epoch": 1.2252277576888184, "grad_norm": 1.300727128982544, "learning_rate": 3.2712201876184004e-05, "loss": 0.705, "step": 191780 }, { "epoch": 1.2252916448385571, "grad_norm": 1.0736730098724365, "learning_rate": 3.2707493746609554e-05, "loss": 0.714, "step": 191790 }, { "epoch": 1.2253555319882958, "grad_norm": 0.7128208875656128, "learning_rate": 3.2702785791185606e-05, "loss": 0.8655, "step": 191800 }, { "epoch": 1.2254194191380345, "grad_norm": 1.3977357149124146, "learning_rate": 3.269807800995957e-05, "loss": 1.2464, "step": 191810 }, { "epoch": 1.2254833062877732, "grad_norm": 1.369526982307434, "learning_rate": 3.269337040297885e-05, "loss": 0.8314, "step": 191820 }, { "epoch": 1.225547193437512, "grad_norm": 1.0817862749099731, "learning_rate": 3.2688662970290885e-05, "loss": 0.9184, "step": 191830 }, { "epoch": 1.2256110805872507, "grad_norm": 1.0620317459106445, "learning_rate": 3.268395571194305e-05, "loss": 1.0192, "step": 191840 }, { "epoch": 1.2256749677369894, "grad_norm": 0.844619870185852, "learning_rate": 3.267924862798275e-05, "loss": 0.8088, "step": 191850 }, { "epoch": 1.225738854886728, "grad_norm": 0.6405353546142578, "learning_rate": 3.267454171845741e-05, "loss": 0.8423, "step": 191860 }, { "epoch": 1.2258027420364668, "grad_norm": 1.2829723358154297, "learning_rate": 3.2669834983414416e-05, "loss": 1.0363, "step": 191870 }, { "epoch": 1.2258666291862055, "grad_norm": 1.374717116355896, "learning_rate": 3.266512842290118e-05, "loss": 0.9436, "step": 191880 }, { "epoch": 1.2259305163359442, "grad_norm": 1.1757957935333252, "learning_rate": 3.266042203696509e-05, "loss": 0.8262, "step": 191890 }, { "epoch": 1.225994403485683, "grad_norm": 1.0372143983840942, "learning_rate": 3.265571582565355e-05, "loss": 0.7161, "step": 191900 }, { "epoch": 1.2260582906354216, "grad_norm": 0.8718850016593933, "learning_rate": 3.265100978901396e-05, "loss": 1.1899, "step": 191910 }, { "epoch": 1.2261221777851603, "grad_norm": 1.32882821559906, "learning_rate": 3.2646303927093716e-05, "loss": 0.9109, "step": 191920 }, { "epoch": 1.226186064934899, "grad_norm": 1.2186968326568604, "learning_rate": 3.2641598239940206e-05, "loss": 0.8187, "step": 191930 }, { "epoch": 1.2262499520846377, "grad_norm": 1.1600444316864014, "learning_rate": 3.263689272760081e-05, "loss": 1.1301, "step": 191940 }, { "epoch": 1.2263138392343764, "grad_norm": 1.9314838647842407, "learning_rate": 3.263218739012294e-05, "loss": 0.9535, "step": 191950 }, { "epoch": 1.2263777263841151, "grad_norm": 0.8182092308998108, "learning_rate": 3.2627482227553954e-05, "loss": 0.6432, "step": 191960 }, { "epoch": 1.2264416135338538, "grad_norm": 0.852057695388794, "learning_rate": 3.262277723994126e-05, "loss": 0.9246, "step": 191970 }, { "epoch": 1.2265055006835925, "grad_norm": 0.7511641979217529, "learning_rate": 3.2618072427332224e-05, "loss": 0.7807, "step": 191980 }, { "epoch": 1.2265693878333312, "grad_norm": 0.7494274377822876, "learning_rate": 3.261336778977424e-05, "loss": 0.7198, "step": 191990 }, { "epoch": 1.22663327498307, "grad_norm": 1.205119013786316, "learning_rate": 3.260866332731469e-05, "loss": 0.7046, "step": 192000 }, { "epoch": 1.2266971621328087, "grad_norm": 0.45299506187438965, "learning_rate": 3.2603959040000944e-05, "loss": 0.6128, "step": 192010 }, { "epoch": 1.2267610492825474, "grad_norm": 1.5304423570632935, "learning_rate": 3.259925492788037e-05, "loss": 1.0436, "step": 192020 }, { "epoch": 1.226824936432286, "grad_norm": 0.8956804275512695, "learning_rate": 3.2594550991000364e-05, "loss": 0.7963, "step": 192030 }, { "epoch": 1.2268888235820248, "grad_norm": 1.0764594078063965, "learning_rate": 3.258984722940829e-05, "loss": 0.855, "step": 192040 }, { "epoch": 1.2269527107317635, "grad_norm": 1.0527817010879517, "learning_rate": 3.2585143643151505e-05, "loss": 0.7834, "step": 192050 }, { "epoch": 1.2270165978815022, "grad_norm": 1.3264557123184204, "learning_rate": 3.25804402322774e-05, "loss": 0.7283, "step": 192060 }, { "epoch": 1.227080485031241, "grad_norm": 0.8222655653953552, "learning_rate": 3.2575736996833325e-05, "loss": 0.8905, "step": 192070 }, { "epoch": 1.2271443721809796, "grad_norm": 2.544546604156494, "learning_rate": 3.2571033936866653e-05, "loss": 1.1683, "step": 192080 }, { "epoch": 1.2272082593307183, "grad_norm": 1.200516939163208, "learning_rate": 3.2566331052424744e-05, "loss": 1.0274, "step": 192090 }, { "epoch": 1.227272146480457, "grad_norm": 0.9582846760749817, "learning_rate": 3.256162834355497e-05, "loss": 0.7701, "step": 192100 }, { "epoch": 1.2273360336301957, "grad_norm": 0.7705967426300049, "learning_rate": 3.255692581030467e-05, "loss": 0.9304, "step": 192110 }, { "epoch": 1.2273999207799342, "grad_norm": 0.9376071691513062, "learning_rate": 3.2552223452721234e-05, "loss": 0.897, "step": 192120 }, { "epoch": 1.2274638079296731, "grad_norm": 1.4855351448059082, "learning_rate": 3.2547521270852e-05, "loss": 0.7594, "step": 192130 }, { "epoch": 1.2275276950794116, "grad_norm": 0.7116539478302002, "learning_rate": 3.254281926474432e-05, "loss": 0.802, "step": 192140 }, { "epoch": 1.2275915822291505, "grad_norm": 1.2037607431411743, "learning_rate": 3.2538117434445556e-05, "loss": 0.9778, "step": 192150 }, { "epoch": 1.227655469378889, "grad_norm": 0.8576934337615967, "learning_rate": 3.253341578000306e-05, "loss": 0.7583, "step": 192160 }, { "epoch": 1.2277193565286277, "grad_norm": 0.8133600354194641, "learning_rate": 3.252871430146417e-05, "loss": 0.948, "step": 192170 }, { "epoch": 1.2277832436783664, "grad_norm": 0.8566866517066956, "learning_rate": 3.252401299887625e-05, "loss": 0.8403, "step": 192180 }, { "epoch": 1.2278471308281051, "grad_norm": 0.8782423138618469, "learning_rate": 3.251931187228664e-05, "loss": 0.7959, "step": 192190 }, { "epoch": 1.2279110179778439, "grad_norm": 0.6727086305618286, "learning_rate": 3.251461092174267e-05, "loss": 0.8979, "step": 192200 }, { "epoch": 1.2279749051275826, "grad_norm": 1.1492390632629395, "learning_rate": 3.2509910147291704e-05, "loss": 0.6958, "step": 192210 }, { "epoch": 1.2280387922773213, "grad_norm": 1.4259767532348633, "learning_rate": 3.2505209548981074e-05, "loss": 0.831, "step": 192220 }, { "epoch": 1.22810267942706, "grad_norm": 0.921388566493988, "learning_rate": 3.2500509126858115e-05, "loss": 0.8291, "step": 192230 }, { "epoch": 1.2281665665767987, "grad_norm": 0.6656931638717651, "learning_rate": 3.2495808880970166e-05, "loss": 0.7202, "step": 192240 }, { "epoch": 1.2282304537265374, "grad_norm": 0.8429076075553894, "learning_rate": 3.249110881136458e-05, "loss": 1.0291, "step": 192250 }, { "epoch": 1.228294340876276, "grad_norm": 1.1925345659255981, "learning_rate": 3.248640891808866e-05, "loss": 1.0243, "step": 192260 }, { "epoch": 1.2283582280260148, "grad_norm": 0.6658920049667358, "learning_rate": 3.248170920118976e-05, "loss": 0.9112, "step": 192270 }, { "epoch": 1.2284221151757535, "grad_norm": 1.026580572128296, "learning_rate": 3.2477009660715195e-05, "loss": 0.6001, "step": 192280 }, { "epoch": 1.2284860023254922, "grad_norm": 0.8293872475624084, "learning_rate": 3.247231029671232e-05, "loss": 0.8562, "step": 192290 }, { "epoch": 1.228549889475231, "grad_norm": 0.8070954084396362, "learning_rate": 3.2467611109228426e-05, "loss": 0.8052, "step": 192300 }, { "epoch": 1.2286137766249696, "grad_norm": 0.7816912531852722, "learning_rate": 3.2462912098310876e-05, "loss": 0.7136, "step": 192310 }, { "epoch": 1.2286776637747083, "grad_norm": 0.7612060308456421, "learning_rate": 3.245821326400696e-05, "loss": 0.9215, "step": 192320 }, { "epoch": 1.228741550924447, "grad_norm": 0.707601010799408, "learning_rate": 3.245351460636401e-05, "loss": 0.9532, "step": 192330 }, { "epoch": 1.2288054380741857, "grad_norm": 1.278326153755188, "learning_rate": 3.244881612542935e-05, "loss": 0.8568, "step": 192340 }, { "epoch": 1.2288693252239244, "grad_norm": 0.7208459377288818, "learning_rate": 3.24441178212503e-05, "loss": 1.0636, "step": 192350 }, { "epoch": 1.2289332123736632, "grad_norm": 0.7168135643005371, "learning_rate": 3.243941969387416e-05, "loss": 0.7759, "step": 192360 }, { "epoch": 1.2289970995234019, "grad_norm": 0.7322360873222351, "learning_rate": 3.243472174334827e-05, "loss": 0.7761, "step": 192370 }, { "epoch": 1.2290609866731406, "grad_norm": 0.6737679839134216, "learning_rate": 3.243002396971992e-05, "loss": 0.9582, "step": 192380 }, { "epoch": 1.2291248738228793, "grad_norm": 0.962618350982666, "learning_rate": 3.242532637303643e-05, "loss": 1.1989, "step": 192390 }, { "epoch": 1.229188760972618, "grad_norm": 0.9641563296318054, "learning_rate": 3.2420628953345105e-05, "loss": 0.74, "step": 192400 }, { "epoch": 1.2292526481223567, "grad_norm": 0.8702797293663025, "learning_rate": 3.241593171069326e-05, "loss": 0.9423, "step": 192410 }, { "epoch": 1.2293165352720954, "grad_norm": 1.1447423696517944, "learning_rate": 3.241123464512819e-05, "loss": 0.9131, "step": 192420 }, { "epoch": 1.229380422421834, "grad_norm": 1.0794869661331177, "learning_rate": 3.24065377566972e-05, "loss": 0.7764, "step": 192430 }, { "epoch": 1.2294443095715728, "grad_norm": 1.5134004354476929, "learning_rate": 3.24018410454476e-05, "loss": 1.0517, "step": 192440 }, { "epoch": 1.2295081967213115, "grad_norm": 1.1577551364898682, "learning_rate": 3.239714451142668e-05, "loss": 0.8146, "step": 192450 }, { "epoch": 1.2295720838710502, "grad_norm": 1.0094727277755737, "learning_rate": 3.239244815468175e-05, "loss": 0.8686, "step": 192460 }, { "epoch": 1.229635971020789, "grad_norm": 0.7291733026504517, "learning_rate": 3.23877519752601e-05, "loss": 0.9864, "step": 192470 }, { "epoch": 1.2296998581705276, "grad_norm": 0.7421193718910217, "learning_rate": 3.238305597320903e-05, "loss": 0.9084, "step": 192480 }, { "epoch": 1.2297637453202663, "grad_norm": 0.5920099020004272, "learning_rate": 3.237836014857581e-05, "loss": 0.9251, "step": 192490 }, { "epoch": 1.229827632470005, "grad_norm": 0.7378367781639099, "learning_rate": 3.2373664501407766e-05, "loss": 0.98, "step": 192500 }, { "epoch": 1.2298915196197437, "grad_norm": 1.1757078170776367, "learning_rate": 3.236896903175216e-05, "loss": 0.7089, "step": 192510 }, { "epoch": 1.2299554067694825, "grad_norm": 0.9285265803337097, "learning_rate": 3.236427373965629e-05, "loss": 0.7968, "step": 192520 }, { "epoch": 1.2300192939192212, "grad_norm": 1.0863308906555176, "learning_rate": 3.235957862516745e-05, "loss": 0.8758, "step": 192530 }, { "epoch": 1.2300831810689599, "grad_norm": 1.2274580001831055, "learning_rate": 3.2354883688332906e-05, "loss": 0.8596, "step": 192540 }, { "epoch": 1.2301470682186986, "grad_norm": 0.7428937554359436, "learning_rate": 3.235018892919995e-05, "loss": 0.8672, "step": 192550 }, { "epoch": 1.2302109553684373, "grad_norm": 1.2199949026107788, "learning_rate": 3.234549434781586e-05, "loss": 0.8324, "step": 192560 }, { "epoch": 1.230274842518176, "grad_norm": 1.1966480016708374, "learning_rate": 3.234079994422791e-05, "loss": 1.0522, "step": 192570 }, { "epoch": 1.2303387296679147, "grad_norm": 0.7035223245620728, "learning_rate": 3.233610571848339e-05, "loss": 0.7794, "step": 192580 }, { "epoch": 1.2304026168176534, "grad_norm": 0.9693596959114075, "learning_rate": 3.2331411670629564e-05, "loss": 0.7748, "step": 192590 }, { "epoch": 1.230466503967392, "grad_norm": 0.9353430867195129, "learning_rate": 3.2326717800713706e-05, "loss": 0.9076, "step": 192600 }, { "epoch": 1.2305303911171306, "grad_norm": 0.6712273359298706, "learning_rate": 3.232202410878309e-05, "loss": 0.8616, "step": 192610 }, { "epoch": 1.2305942782668695, "grad_norm": 2.5598068237304688, "learning_rate": 3.2317330594884986e-05, "loss": 0.735, "step": 192620 }, { "epoch": 1.230658165416608, "grad_norm": 1.0685700178146362, "learning_rate": 3.2312637259066654e-05, "loss": 0.955, "step": 192630 }, { "epoch": 1.230722052566347, "grad_norm": 0.6315664052963257, "learning_rate": 3.230794410137537e-05, "loss": 0.6154, "step": 192640 }, { "epoch": 1.2307859397160854, "grad_norm": 0.6316712498664856, "learning_rate": 3.23032511218584e-05, "loss": 1.101, "step": 192650 }, { "epoch": 1.2308498268658241, "grad_norm": 1.0245057344436646, "learning_rate": 3.2298558320563e-05, "loss": 1.1672, "step": 192660 }, { "epoch": 1.2309137140155628, "grad_norm": 0.8964295387268066, "learning_rate": 3.2293865697536426e-05, "loss": 0.916, "step": 192670 }, { "epoch": 1.2309776011653015, "grad_norm": 1.661603331565857, "learning_rate": 3.228917325282595e-05, "loss": 0.8007, "step": 192680 }, { "epoch": 1.2310414883150402, "grad_norm": 0.7393468022346497, "learning_rate": 3.2284480986478813e-05, "loss": 0.7766, "step": 192690 }, { "epoch": 1.231105375464779, "grad_norm": 1.1308892965316772, "learning_rate": 3.227978889854229e-05, "loss": 0.8523, "step": 192700 }, { "epoch": 1.2311692626145176, "grad_norm": 0.988743782043457, "learning_rate": 3.2275096989063616e-05, "loss": 0.9271, "step": 192710 }, { "epoch": 1.2312331497642564, "grad_norm": 0.7368068695068359, "learning_rate": 3.2270405258090054e-05, "loss": 0.7516, "step": 192720 }, { "epoch": 1.231297036913995, "grad_norm": 0.8602904677391052, "learning_rate": 3.2266182852874775e-05, "loss": 0.6705, "step": 192730 }, { "epoch": 1.2313609240637338, "grad_norm": 0.8390285968780518, "learning_rate": 3.226149146119108e-05, "loss": 0.7796, "step": 192740 }, { "epoch": 1.2314248112134725, "grad_norm": 0.7270960211753845, "learning_rate": 3.225680024814951e-05, "loss": 1.0052, "step": 192750 }, { "epoch": 1.2314886983632112, "grad_norm": 0.8915246725082397, "learning_rate": 3.2252109213797317e-05, "loss": 0.8711, "step": 192760 }, { "epoch": 1.2315525855129499, "grad_norm": 0.6008066534996033, "learning_rate": 3.2247418358181734e-05, "loss": 0.7466, "step": 192770 }, { "epoch": 1.2316164726626886, "grad_norm": 0.7765604257583618, "learning_rate": 3.224272768135002e-05, "loss": 0.8973, "step": 192780 }, { "epoch": 1.2316803598124273, "grad_norm": 0.9872602820396423, "learning_rate": 3.223803718334939e-05, "loss": 1.0003, "step": 192790 }, { "epoch": 1.231744246962166, "grad_norm": 0.7912330031394958, "learning_rate": 3.22333468642271e-05, "loss": 1.015, "step": 192800 }, { "epoch": 1.2318081341119047, "grad_norm": 0.8799734711647034, "learning_rate": 3.222865672403037e-05, "loss": 0.6711, "step": 192810 }, { "epoch": 1.2318720212616434, "grad_norm": 0.6278074383735657, "learning_rate": 3.2223966762806446e-05, "loss": 0.9729, "step": 192820 }, { "epoch": 1.2319359084113821, "grad_norm": 1.1327438354492188, "learning_rate": 3.221927698060255e-05, "loss": 1.0065, "step": 192830 }, { "epoch": 1.2319997955611208, "grad_norm": 1.7792538404464722, "learning_rate": 3.221458737746592e-05, "loss": 0.7122, "step": 192840 }, { "epoch": 1.2320636827108595, "grad_norm": 0.876568078994751, "learning_rate": 3.220989795344378e-05, "loss": 0.9008, "step": 192850 }, { "epoch": 1.2321275698605982, "grad_norm": 0.969321608543396, "learning_rate": 3.2205208708583355e-05, "loss": 1.1909, "step": 192860 }, { "epoch": 1.232191457010337, "grad_norm": 0.681924045085907, "learning_rate": 3.220051964293188e-05, "loss": 0.9973, "step": 192870 }, { "epoch": 1.2322553441600756, "grad_norm": 1.640181541442871, "learning_rate": 3.2195830756536574e-05, "loss": 0.7027, "step": 192880 }, { "epoch": 1.2323192313098144, "grad_norm": 0.5599543452262878, "learning_rate": 3.2191142049444646e-05, "loss": 0.7875, "step": 192890 }, { "epoch": 1.232383118459553, "grad_norm": 1.001563310623169, "learning_rate": 3.218645352170333e-05, "loss": 0.8589, "step": 192900 }, { "epoch": 1.2324470056092918, "grad_norm": 1.0089075565338135, "learning_rate": 3.2181765173359836e-05, "loss": 0.8123, "step": 192910 }, { "epoch": 1.2325108927590305, "grad_norm": 1.5100098848342896, "learning_rate": 3.217707700446138e-05, "loss": 0.7626, "step": 192920 }, { "epoch": 1.2325747799087692, "grad_norm": 0.9354732632637024, "learning_rate": 3.2172389015055184e-05, "loss": 0.8715, "step": 192930 }, { "epoch": 1.2326386670585079, "grad_norm": 1.4577165842056274, "learning_rate": 3.216770120518846e-05, "loss": 0.8773, "step": 192940 }, { "epoch": 1.2327025542082466, "grad_norm": 0.8571488857269287, "learning_rate": 3.21630135749084e-05, "loss": 0.8343, "step": 192950 }, { "epoch": 1.2327664413579853, "grad_norm": 1.2611160278320312, "learning_rate": 3.2158326124262225e-05, "loss": 0.674, "step": 192960 }, { "epoch": 1.232830328507724, "grad_norm": 0.894373893737793, "learning_rate": 3.215363885329714e-05, "loss": 1.2393, "step": 192970 }, { "epoch": 1.2328942156574627, "grad_norm": 0.5427113175392151, "learning_rate": 3.2148951762060356e-05, "loss": 0.794, "step": 192980 }, { "epoch": 1.2329581028072014, "grad_norm": 2.207989454269409, "learning_rate": 3.214426485059906e-05, "loss": 0.8682, "step": 192990 }, { "epoch": 1.2330219899569401, "grad_norm": 1.0269194841384888, "learning_rate": 3.213957811896048e-05, "loss": 1.0283, "step": 193000 }, { "epoch": 1.2330858771066788, "grad_norm": 0.8881884217262268, "learning_rate": 3.213489156719179e-05, "loss": 0.9296, "step": 193010 }, { "epoch": 1.2331497642564175, "grad_norm": 1.8312245607376099, "learning_rate": 3.2130205195340204e-05, "loss": 0.627, "step": 193020 }, { "epoch": 1.2332136514061562, "grad_norm": 1.116809368133545, "learning_rate": 3.21255190034529e-05, "loss": 0.8914, "step": 193030 }, { "epoch": 1.233277538555895, "grad_norm": 1.0772712230682373, "learning_rate": 3.2120832991577094e-05, "loss": 0.7899, "step": 193040 }, { "epoch": 1.2333414257056337, "grad_norm": 1.190745234489441, "learning_rate": 3.2116147159759966e-05, "loss": 0.895, "step": 193050 }, { "epoch": 1.2334053128553724, "grad_norm": 0.9631375670433044, "learning_rate": 3.21114615080487e-05, "loss": 0.9266, "step": 193060 }, { "epoch": 1.233469200005111, "grad_norm": 0.6763384342193604, "learning_rate": 3.2106776036490494e-05, "loss": 1.1094, "step": 193070 }, { "epoch": 1.2335330871548496, "grad_norm": 0.7202142477035522, "learning_rate": 3.210209074513253e-05, "loss": 0.9108, "step": 193080 }, { "epoch": 1.2335969743045885, "grad_norm": 1.647141933441162, "learning_rate": 3.2097405634022005e-05, "loss": 1.1182, "step": 193090 }, { "epoch": 1.233660861454327, "grad_norm": 1.490446925163269, "learning_rate": 3.209272070320609e-05, "loss": 1.0184, "step": 193100 }, { "epoch": 1.233724748604066, "grad_norm": 0.8899498581886292, "learning_rate": 3.208803595273198e-05, "loss": 0.7708, "step": 193110 }, { "epoch": 1.2337886357538044, "grad_norm": 1.018667221069336, "learning_rate": 3.2083351382646834e-05, "loss": 1.0222, "step": 193120 }, { "epoch": 1.2338525229035433, "grad_norm": 1.8434399366378784, "learning_rate": 3.2078666992997834e-05, "loss": 0.7171, "step": 193130 }, { "epoch": 1.2339164100532818, "grad_norm": 0.5759685039520264, "learning_rate": 3.207398278383217e-05, "loss": 0.8113, "step": 193140 }, { "epoch": 1.2339802972030205, "grad_norm": 0.9592820405960083, "learning_rate": 3.206929875519701e-05, "loss": 0.9567, "step": 193150 }, { "epoch": 1.2340441843527592, "grad_norm": 1.4874789714813232, "learning_rate": 3.206461490713951e-05, "loss": 1.1218, "step": 193160 }, { "epoch": 1.234108071502498, "grad_norm": 0.6673001050949097, "learning_rate": 3.205993123970687e-05, "loss": 0.7205, "step": 193170 }, { "epoch": 1.2341719586522366, "grad_norm": 0.860629141330719, "learning_rate": 3.205524775294624e-05, "loss": 0.8202, "step": 193180 }, { "epoch": 1.2342358458019753, "grad_norm": 0.6842666268348694, "learning_rate": 3.205056444690478e-05, "loss": 1.0571, "step": 193190 }, { "epoch": 1.234299732951714, "grad_norm": 1.0597096681594849, "learning_rate": 3.2045881321629664e-05, "loss": 0.8567, "step": 193200 }, { "epoch": 1.2343636201014527, "grad_norm": 0.901559591293335, "learning_rate": 3.2041198377168066e-05, "loss": 1.1343, "step": 193210 }, { "epoch": 1.2344275072511914, "grad_norm": 0.7798748016357422, "learning_rate": 3.203651561356714e-05, "loss": 0.8617, "step": 193220 }, { "epoch": 1.2344913944009301, "grad_norm": 0.8212858438491821, "learning_rate": 3.203183303087403e-05, "loss": 0.8774, "step": 193230 }, { "epoch": 1.2345552815506688, "grad_norm": 1.2697757482528687, "learning_rate": 3.202715062913592e-05, "loss": 0.8944, "step": 193240 }, { "epoch": 1.2346191687004076, "grad_norm": 1.0339276790618896, "learning_rate": 3.202246840839994e-05, "loss": 0.9764, "step": 193250 }, { "epoch": 1.2346830558501463, "grad_norm": 1.3854345083236694, "learning_rate": 3.201778636871325e-05, "loss": 0.9081, "step": 193260 }, { "epoch": 1.234746942999885, "grad_norm": 1.7363767623901367, "learning_rate": 3.201310451012303e-05, "loss": 0.9234, "step": 193270 }, { "epoch": 1.2348108301496237, "grad_norm": 0.8943231105804443, "learning_rate": 3.200842283267638e-05, "loss": 0.9892, "step": 193280 }, { "epoch": 1.2348747172993624, "grad_norm": 0.6131458282470703, "learning_rate": 3.200374133642049e-05, "loss": 0.6881, "step": 193290 }, { "epoch": 1.234938604449101, "grad_norm": 0.9070971608161926, "learning_rate": 3.19990600214025e-05, "loss": 0.8429, "step": 193300 }, { "epoch": 1.2350024915988398, "grad_norm": 3.0522189140319824, "learning_rate": 3.199437888766954e-05, "loss": 1.1211, "step": 193310 }, { "epoch": 1.2350663787485785, "grad_norm": 1.3165589570999146, "learning_rate": 3.198969793526877e-05, "loss": 0.7646, "step": 193320 }, { "epoch": 1.2351302658983172, "grad_norm": 1.1292219161987305, "learning_rate": 3.1985017164247325e-05, "loss": 0.8684, "step": 193330 }, { "epoch": 1.235194153048056, "grad_norm": 1.0225328207015991, "learning_rate": 3.198033657465233e-05, "loss": 0.8399, "step": 193340 }, { "epoch": 1.2352580401977946, "grad_norm": 0.7963989973068237, "learning_rate": 3.1975656166530946e-05, "loss": 0.824, "step": 193350 }, { "epoch": 1.2353219273475333, "grad_norm": 1.6515822410583496, "learning_rate": 3.197097593993029e-05, "loss": 0.8579, "step": 193360 }, { "epoch": 1.235385814497272, "grad_norm": 0.9961569905281067, "learning_rate": 3.196629589489751e-05, "loss": 1.0444, "step": 193370 }, { "epoch": 1.2354497016470107, "grad_norm": 0.921463131904602, "learning_rate": 3.196161603147972e-05, "loss": 1.0651, "step": 193380 }, { "epoch": 1.2355135887967494, "grad_norm": 0.5503140091896057, "learning_rate": 3.195693634972408e-05, "loss": 0.7384, "step": 193390 }, { "epoch": 1.2355774759464881, "grad_norm": 1.171606183052063, "learning_rate": 3.1952256849677684e-05, "loss": 0.8194, "step": 193400 }, { "epoch": 1.2356413630962269, "grad_norm": 0.7877267599105835, "learning_rate": 3.194757753138769e-05, "loss": 0.8236, "step": 193410 }, { "epoch": 1.2357052502459656, "grad_norm": 0.6800931096076965, "learning_rate": 3.19428983949012e-05, "loss": 0.9972, "step": 193420 }, { "epoch": 1.2357691373957043, "grad_norm": 1.1945538520812988, "learning_rate": 3.1938219440265355e-05, "loss": 1.0245, "step": 193430 }, { "epoch": 1.235833024545443, "grad_norm": 0.9393901824951172, "learning_rate": 3.1933540667527256e-05, "loss": 1.0416, "step": 193440 }, { "epoch": 1.2358969116951817, "grad_norm": 0.9092146754264832, "learning_rate": 3.192886207673404e-05, "loss": 1.0766, "step": 193450 }, { "epoch": 1.2359607988449204, "grad_norm": 0.8779313564300537, "learning_rate": 3.19241836679328e-05, "loss": 0.9729, "step": 193460 }, { "epoch": 1.236024685994659, "grad_norm": 1.0780725479125977, "learning_rate": 3.191950544117068e-05, "loss": 1.0057, "step": 193470 }, { "epoch": 1.2360885731443978, "grad_norm": 1.7977213859558105, "learning_rate": 3.1914827396494776e-05, "loss": 0.767, "step": 193480 }, { "epoch": 1.2361524602941365, "grad_norm": 1.097962498664856, "learning_rate": 3.191014953395221e-05, "loss": 0.8727, "step": 193490 }, { "epoch": 1.2362163474438752, "grad_norm": 1.1607288122177124, "learning_rate": 3.190547185359008e-05, "loss": 0.9234, "step": 193500 }, { "epoch": 1.236280234593614, "grad_norm": 0.9202963709831238, "learning_rate": 3.1900794355455514e-05, "loss": 1.3214, "step": 193510 }, { "epoch": 1.2363441217433526, "grad_norm": 0.9579522609710693, "learning_rate": 3.1896117039595606e-05, "loss": 0.7031, "step": 193520 }, { "epoch": 1.2364080088930913, "grad_norm": 0.6659561395645142, "learning_rate": 3.189143990605746e-05, "loss": 0.8339, "step": 193530 }, { "epoch": 1.23647189604283, "grad_norm": 1.919846534729004, "learning_rate": 3.188676295488817e-05, "loss": 0.8283, "step": 193540 }, { "epoch": 1.2365357831925687, "grad_norm": 0.9194838404655457, "learning_rate": 3.1882086186134866e-05, "loss": 0.8513, "step": 193550 }, { "epoch": 1.2365996703423074, "grad_norm": 0.7087141275405884, "learning_rate": 3.187740959984461e-05, "loss": 0.8013, "step": 193560 }, { "epoch": 1.236663557492046, "grad_norm": 1.0234582424163818, "learning_rate": 3.187273319606453e-05, "loss": 0.8943, "step": 193570 }, { "epoch": 1.2367274446417849, "grad_norm": 0.5449272990226746, "learning_rate": 3.18680569748417e-05, "loss": 0.8737, "step": 193580 }, { "epoch": 1.2367913317915233, "grad_norm": 0.695446789264679, "learning_rate": 3.186338093622323e-05, "loss": 1.0306, "step": 193590 }, { "epoch": 1.2368552189412623, "grad_norm": 0.7840051651000977, "learning_rate": 3.185870508025619e-05, "loss": 0.701, "step": 193600 }, { "epoch": 1.2369191060910008, "grad_norm": 1.039993405342102, "learning_rate": 3.185402940698769e-05, "loss": 0.8228, "step": 193610 }, { "epoch": 1.2369829932407397, "grad_norm": 0.6685113906860352, "learning_rate": 3.1849353916464826e-05, "loss": 0.9071, "step": 193620 }, { "epoch": 1.2370468803904782, "grad_norm": 1.0278228521347046, "learning_rate": 3.1844678608734664e-05, "loss": 0.729, "step": 193630 }, { "epoch": 1.2371107675402169, "grad_norm": 0.9203469753265381, "learning_rate": 3.1840003483844296e-05, "loss": 0.6591, "step": 193640 }, { "epoch": 1.2371746546899556, "grad_norm": 0.6178497076034546, "learning_rate": 3.1835328541840796e-05, "loss": 0.8599, "step": 193650 }, { "epoch": 1.2372385418396943, "grad_norm": 0.9682593941688538, "learning_rate": 3.183065378277126e-05, "loss": 0.8228, "step": 193660 }, { "epoch": 1.237302428989433, "grad_norm": 0.5989915132522583, "learning_rate": 3.1825979206682753e-05, "loss": 0.8924, "step": 193670 }, { "epoch": 1.2373663161391717, "grad_norm": 0.9581364989280701, "learning_rate": 3.182130481362237e-05, "loss": 0.9859, "step": 193680 }, { "epoch": 1.2374302032889104, "grad_norm": 1.2858854532241821, "learning_rate": 3.181663060363717e-05, "loss": 0.9602, "step": 193690 }, { "epoch": 1.237494090438649, "grad_norm": 1.281391978263855, "learning_rate": 3.181195657677422e-05, "loss": 0.7789, "step": 193700 }, { "epoch": 1.2375579775883878, "grad_norm": 0.7628605365753174, "learning_rate": 3.180728273308061e-05, "loss": 0.8923, "step": 193710 }, { "epoch": 1.2376218647381265, "grad_norm": 0.8544741272926331, "learning_rate": 3.180260907260339e-05, "loss": 1.0046, "step": 193720 }, { "epoch": 1.2376857518878652, "grad_norm": 0.8687821626663208, "learning_rate": 3.179793559538966e-05, "loss": 0.7476, "step": 193730 }, { "epoch": 1.237749639037604, "grad_norm": 0.9045094847679138, "learning_rate": 3.179326230148646e-05, "loss": 0.8623, "step": 193740 }, { "epoch": 1.2378135261873426, "grad_norm": 0.7554922699928284, "learning_rate": 3.1788589190940856e-05, "loss": 0.7176, "step": 193750 }, { "epoch": 1.2378774133370813, "grad_norm": 1.8358988761901855, "learning_rate": 3.17839162637999e-05, "loss": 0.8954, "step": 193760 }, { "epoch": 1.23794130048682, "grad_norm": 1.0413012504577637, "learning_rate": 3.177924352011069e-05, "loss": 0.6721, "step": 193770 }, { "epoch": 1.2380051876365588, "grad_norm": 0.7290290594100952, "learning_rate": 3.177457095992025e-05, "loss": 0.8537, "step": 193780 }, { "epoch": 1.2380690747862975, "grad_norm": 1.435036540031433, "learning_rate": 3.1769898583275646e-05, "loss": 1.1552, "step": 193790 }, { "epoch": 1.2381329619360362, "grad_norm": 1.10448157787323, "learning_rate": 3.176522639022394e-05, "loss": 0.6483, "step": 193800 }, { "epoch": 1.2381968490857749, "grad_norm": 0.7884492874145508, "learning_rate": 3.1760554380812165e-05, "loss": 1.0546, "step": 193810 }, { "epoch": 1.2382607362355136, "grad_norm": 0.8081900477409363, "learning_rate": 3.17558825550874e-05, "loss": 0.8096, "step": 193820 }, { "epoch": 1.2383246233852523, "grad_norm": 1.044958233833313, "learning_rate": 3.175121091309669e-05, "loss": 0.8731, "step": 193830 }, { "epoch": 1.238388510534991, "grad_norm": 0.8754411935806274, "learning_rate": 3.1746539454887055e-05, "loss": 0.9596, "step": 193840 }, { "epoch": 1.2384523976847297, "grad_norm": 0.7443397045135498, "learning_rate": 3.174186818050557e-05, "loss": 0.8056, "step": 193850 }, { "epoch": 1.2385162848344684, "grad_norm": 1.0024755001068115, "learning_rate": 3.173719708999926e-05, "loss": 0.9443, "step": 193860 }, { "epoch": 1.2385801719842071, "grad_norm": 1.0295993089675903, "learning_rate": 3.1732526183415186e-05, "loss": 0.8562, "step": 193870 }, { "epoch": 1.2386440591339458, "grad_norm": 2.135509729385376, "learning_rate": 3.172785546080037e-05, "loss": 0.8922, "step": 193880 }, { "epoch": 1.2387079462836845, "grad_norm": 1.0635528564453125, "learning_rate": 3.1723184922201854e-05, "loss": 0.835, "step": 193890 }, { "epoch": 1.2387718334334232, "grad_norm": 0.9661139249801636, "learning_rate": 3.1718514567666685e-05, "loss": 0.7667, "step": 193900 }, { "epoch": 1.238835720583162, "grad_norm": 0.840607225894928, "learning_rate": 3.1713844397241886e-05, "loss": 0.7449, "step": 193910 }, { "epoch": 1.2388996077329006, "grad_norm": 0.5099063515663147, "learning_rate": 3.1709174410974504e-05, "loss": 0.8591, "step": 193920 }, { "epoch": 1.2389634948826393, "grad_norm": 0.8879222273826599, "learning_rate": 3.1704504608911554e-05, "loss": 0.8582, "step": 193930 }, { "epoch": 1.239027382032378, "grad_norm": 0.518216609954834, "learning_rate": 3.169983499110006e-05, "loss": 0.9629, "step": 193940 }, { "epoch": 1.2390912691821168, "grad_norm": 1.1863969564437866, "learning_rate": 3.169516555758707e-05, "loss": 0.9316, "step": 193950 }, { "epoch": 1.2391551563318555, "grad_norm": 0.9996239542961121, "learning_rate": 3.169049630841959e-05, "loss": 0.7029, "step": 193960 }, { "epoch": 1.2392190434815942, "grad_norm": 1.6813340187072754, "learning_rate": 3.168582724364466e-05, "loss": 0.7484, "step": 193970 }, { "epoch": 1.2392829306313329, "grad_norm": 1.458324909210205, "learning_rate": 3.168115836330929e-05, "loss": 0.7864, "step": 193980 }, { "epoch": 1.2393468177810716, "grad_norm": 1.1674745082855225, "learning_rate": 3.167648966746051e-05, "loss": 0.8352, "step": 193990 }, { "epoch": 1.2394107049308103, "grad_norm": 5.070311546325684, "learning_rate": 3.167182115614532e-05, "loss": 1.0867, "step": 194000 }, { "epoch": 1.239474592080549, "grad_norm": 1.2760869264602661, "learning_rate": 3.1667152829410755e-05, "loss": 0.8498, "step": 194010 }, { "epoch": 1.2395384792302877, "grad_norm": 1.25545334815979, "learning_rate": 3.166248468730382e-05, "loss": 0.7311, "step": 194020 }, { "epoch": 1.2396023663800264, "grad_norm": 1.5020123720169067, "learning_rate": 3.1657816729871524e-05, "loss": 1.0177, "step": 194030 }, { "epoch": 1.2396662535297651, "grad_norm": 1.1968601942062378, "learning_rate": 3.1653148957160886e-05, "loss": 0.7715, "step": 194040 }, { "epoch": 1.2397301406795038, "grad_norm": 0.8993906378746033, "learning_rate": 3.1648481369218905e-05, "loss": 0.9558, "step": 194050 }, { "epoch": 1.2397940278292423, "grad_norm": 1.380210280418396, "learning_rate": 3.164381396609261e-05, "loss": 0.8604, "step": 194060 }, { "epoch": 1.2398579149789812, "grad_norm": 0.8667689561843872, "learning_rate": 3.163914674782897e-05, "loss": 0.7385, "step": 194070 }, { "epoch": 1.2399218021287197, "grad_norm": 1.3134424686431885, "learning_rate": 3.163447971447501e-05, "loss": 0.8272, "step": 194080 }, { "epoch": 1.2399856892784586, "grad_norm": 0.7698752284049988, "learning_rate": 3.162981286607773e-05, "loss": 0.7033, "step": 194090 }, { "epoch": 1.2400495764281971, "grad_norm": 0.7156646847724915, "learning_rate": 3.162514620268413e-05, "loss": 1.1074, "step": 194100 }, { "epoch": 1.2401134635779358, "grad_norm": 0.8076853156089783, "learning_rate": 3.16204797243412e-05, "loss": 0.7992, "step": 194110 }, { "epoch": 1.2401773507276745, "grad_norm": 1.0264087915420532, "learning_rate": 3.161581343109594e-05, "loss": 0.8523, "step": 194120 }, { "epoch": 1.2402412378774132, "grad_norm": 1.4501597881317139, "learning_rate": 3.1611147322995335e-05, "loss": 0.9584, "step": 194130 }, { "epoch": 1.240305125027152, "grad_norm": 1.5734119415283203, "learning_rate": 3.160648140008639e-05, "loss": 1.0022, "step": 194140 }, { "epoch": 1.2403690121768907, "grad_norm": 0.6637634038925171, "learning_rate": 3.160181566241609e-05, "loss": 0.9012, "step": 194150 }, { "epoch": 1.2404328993266294, "grad_norm": 0.6105542182922363, "learning_rate": 3.1597150110031436e-05, "loss": 0.75, "step": 194160 }, { "epoch": 1.240496786476368, "grad_norm": 0.6703608632087708, "learning_rate": 3.159248474297939e-05, "loss": 0.7869, "step": 194170 }, { "epoch": 1.2405606736261068, "grad_norm": 1.018967628479004, "learning_rate": 3.158781956130695e-05, "loss": 0.6713, "step": 194180 }, { "epoch": 1.2406245607758455, "grad_norm": 1.2115850448608398, "learning_rate": 3.1583154565061094e-05, "loss": 0.848, "step": 194190 }, { "epoch": 1.2406884479255842, "grad_norm": 1.2993463277816772, "learning_rate": 3.157848975428881e-05, "loss": 0.9696, "step": 194200 }, { "epoch": 1.240752335075323, "grad_norm": 1.075815200805664, "learning_rate": 3.157382512903707e-05, "loss": 0.8145, "step": 194210 }, { "epoch": 1.2408162222250616, "grad_norm": 2.959564447402954, "learning_rate": 3.1569160689352844e-05, "loss": 0.8707, "step": 194220 }, { "epoch": 1.2408801093748003, "grad_norm": 0.7935780882835388, "learning_rate": 3.156449643528312e-05, "loss": 0.8066, "step": 194230 }, { "epoch": 1.240943996524539, "grad_norm": 1.5406337976455688, "learning_rate": 3.155983236687486e-05, "loss": 0.8783, "step": 194240 }, { "epoch": 1.2410078836742777, "grad_norm": 3.1429214477539062, "learning_rate": 3.155516848417505e-05, "loss": 0.8014, "step": 194250 }, { "epoch": 1.2410717708240164, "grad_norm": 0.884060263633728, "learning_rate": 3.155050478723065e-05, "loss": 0.5811, "step": 194260 }, { "epoch": 1.2411356579737551, "grad_norm": 1.2400538921356201, "learning_rate": 3.1545841276088625e-05, "loss": 0.9368, "step": 194270 }, { "epoch": 1.2411995451234938, "grad_norm": 0.5689629912376404, "learning_rate": 3.154117795079594e-05, "loss": 0.6744, "step": 194280 }, { "epoch": 1.2412634322732325, "grad_norm": 1.01374089717865, "learning_rate": 3.153651481139956e-05, "loss": 0.7638, "step": 194290 }, { "epoch": 1.2413273194229713, "grad_norm": 1.1598334312438965, "learning_rate": 3.153185185794646e-05, "loss": 0.8611, "step": 194300 }, { "epoch": 1.24139120657271, "grad_norm": 1.4664697647094727, "learning_rate": 3.152718909048359e-05, "loss": 0.6583, "step": 194310 }, { "epoch": 1.2414550937224487, "grad_norm": 0.7975323796272278, "learning_rate": 3.152252650905789e-05, "loss": 0.8507, "step": 194320 }, { "epoch": 1.2415189808721874, "grad_norm": 1.023056983947754, "learning_rate": 3.151786411371634e-05, "loss": 0.9125, "step": 194330 }, { "epoch": 1.241582868021926, "grad_norm": 0.8665074706077576, "learning_rate": 3.151320190450589e-05, "loss": 0.9165, "step": 194340 }, { "epoch": 1.2416467551716648, "grad_norm": 1.0078295469284058, "learning_rate": 3.1508539881473495e-05, "loss": 1.0755, "step": 194350 }, { "epoch": 1.2417106423214035, "grad_norm": 1.1269526481628418, "learning_rate": 3.1503878044666095e-05, "loss": 1.0677, "step": 194360 }, { "epoch": 1.2417745294711422, "grad_norm": 0.7636091709136963, "learning_rate": 3.1499216394130646e-05, "loss": 1.1119, "step": 194370 }, { "epoch": 1.241838416620881, "grad_norm": 0.6014677882194519, "learning_rate": 3.14945549299141e-05, "loss": 0.768, "step": 194380 }, { "epoch": 1.2419023037706196, "grad_norm": 1.1704468727111816, "learning_rate": 3.1489893652063384e-05, "loss": 0.8032, "step": 194390 }, { "epoch": 1.2419661909203583, "grad_norm": 1.918944239616394, "learning_rate": 3.148523256062548e-05, "loss": 0.7275, "step": 194400 }, { "epoch": 1.242030078070097, "grad_norm": 1.5561200380325317, "learning_rate": 3.148057165564728e-05, "loss": 1.0547, "step": 194410 }, { "epoch": 1.2420939652198357, "grad_norm": 0.7839997410774231, "learning_rate": 3.147591093717575e-05, "loss": 0.8712, "step": 194420 }, { "epoch": 1.2421578523695744, "grad_norm": 1.108949065208435, "learning_rate": 3.147125040525781e-05, "loss": 0.7229, "step": 194430 }, { "epoch": 1.2422217395193131, "grad_norm": 0.8365166783332825, "learning_rate": 3.146659005994042e-05, "loss": 0.8297, "step": 194440 }, { "epoch": 1.2422856266690518, "grad_norm": 0.9354850053787231, "learning_rate": 3.146192990127049e-05, "loss": 0.8969, "step": 194450 }, { "epoch": 1.2423495138187906, "grad_norm": 0.6852608323097229, "learning_rate": 3.145726992929497e-05, "loss": 0.8833, "step": 194460 }, { "epoch": 1.2424134009685293, "grad_norm": 0.6809893846511841, "learning_rate": 3.145261014406079e-05, "loss": 0.8947, "step": 194470 }, { "epoch": 1.242477288118268, "grad_norm": 1.083614706993103, "learning_rate": 3.1447950545614854e-05, "loss": 0.8988, "step": 194480 }, { "epoch": 1.2425411752680067, "grad_norm": 1.200301170349121, "learning_rate": 3.144329113400413e-05, "loss": 1.0784, "step": 194490 }, { "epoch": 1.2426050624177454, "grad_norm": 1.089137077331543, "learning_rate": 3.14386319092755e-05, "loss": 1.0893, "step": 194500 }, { "epoch": 1.242668949567484, "grad_norm": 0.8329066038131714, "learning_rate": 3.1433972871475914e-05, "loss": 1.0404, "step": 194510 }, { "epoch": 1.2427328367172228, "grad_norm": 1.1481367349624634, "learning_rate": 3.142931402065228e-05, "loss": 0.7052, "step": 194520 }, { "epoch": 1.2427967238669615, "grad_norm": 0.6459367871284485, "learning_rate": 3.142465535685152e-05, "loss": 0.8926, "step": 194530 }, { "epoch": 1.2428606110167002, "grad_norm": 0.748914897441864, "learning_rate": 3.141999688012055e-05, "loss": 0.7519, "step": 194540 }, { "epoch": 1.2429244981664387, "grad_norm": 0.689520001411438, "learning_rate": 3.141533859050628e-05, "loss": 0.9843, "step": 194550 }, { "epoch": 1.2429883853161776, "grad_norm": 0.8296200633049011, "learning_rate": 3.141068048805563e-05, "loss": 0.9837, "step": 194560 }, { "epoch": 1.243052272465916, "grad_norm": 0.9028075933456421, "learning_rate": 3.140602257281552e-05, "loss": 1.0621, "step": 194570 }, { "epoch": 1.243116159615655, "grad_norm": 1.247193455696106, "learning_rate": 3.1401364844832846e-05, "loss": 1.0044, "step": 194580 }, { "epoch": 1.2431800467653935, "grad_norm": 1.976901650428772, "learning_rate": 3.139670730415451e-05, "loss": 0.6906, "step": 194590 }, { "epoch": 1.2432439339151322, "grad_norm": 0.7390189170837402, "learning_rate": 3.139204995082743e-05, "loss": 0.9386, "step": 194600 }, { "epoch": 1.243307821064871, "grad_norm": 0.6892824769020081, "learning_rate": 3.138739278489851e-05, "loss": 0.8764, "step": 194610 }, { "epoch": 1.2433717082146096, "grad_norm": 2.279017686843872, "learning_rate": 3.138273580641464e-05, "loss": 0.7605, "step": 194620 }, { "epoch": 1.2434355953643483, "grad_norm": 0.8297634720802307, "learning_rate": 3.137807901542272e-05, "loss": 1.045, "step": 194630 }, { "epoch": 1.243499482514087, "grad_norm": 0.7811027765274048, "learning_rate": 3.137342241196967e-05, "loss": 0.8654, "step": 194640 }, { "epoch": 1.2435633696638257, "grad_norm": 1.1974806785583496, "learning_rate": 3.136876599610235e-05, "loss": 0.9548, "step": 194650 }, { "epoch": 1.2436272568135645, "grad_norm": 1.1165138483047485, "learning_rate": 3.136410976786769e-05, "loss": 0.6052, "step": 194660 }, { "epoch": 1.2436911439633032, "grad_norm": 0.7963413000106812, "learning_rate": 3.135945372731257e-05, "loss": 0.9678, "step": 194670 }, { "epoch": 1.2437550311130419, "grad_norm": 1.1966108083724976, "learning_rate": 3.135479787448387e-05, "loss": 0.9356, "step": 194680 }, { "epoch": 1.2438189182627806, "grad_norm": 0.6879404783248901, "learning_rate": 3.135014220942849e-05, "loss": 0.7895, "step": 194690 }, { "epoch": 1.2438828054125193, "grad_norm": 0.9736181497573853, "learning_rate": 3.1345486732193306e-05, "loss": 0.9982, "step": 194700 }, { "epoch": 1.243946692562258, "grad_norm": 0.9534584283828735, "learning_rate": 3.1340831442825214e-05, "loss": 0.8743, "step": 194710 }, { "epoch": 1.2440105797119967, "grad_norm": 1.5264173746109009, "learning_rate": 3.133617634137109e-05, "loss": 0.9591, "step": 194720 }, { "epoch": 1.2440744668617354, "grad_norm": 1.1958099603652954, "learning_rate": 3.133152142787782e-05, "loss": 0.7427, "step": 194730 }, { "epoch": 1.244138354011474, "grad_norm": 1.0329030752182007, "learning_rate": 3.132686670239228e-05, "loss": 1.0823, "step": 194740 }, { "epoch": 1.2442022411612128, "grad_norm": 0.9985544085502625, "learning_rate": 3.132221216496134e-05, "loss": 0.8161, "step": 194750 }, { "epoch": 1.2442661283109515, "grad_norm": 2.1537814140319824, "learning_rate": 3.131755781563189e-05, "loss": 0.9614, "step": 194760 }, { "epoch": 1.2443300154606902, "grad_norm": 0.9693751335144043, "learning_rate": 3.1312903654450796e-05, "loss": 1.1127, "step": 194770 }, { "epoch": 1.244393902610429, "grad_norm": 0.8665310740470886, "learning_rate": 3.130824968146492e-05, "loss": 0.9338, "step": 194780 }, { "epoch": 1.2444577897601676, "grad_norm": 1.027868390083313, "learning_rate": 3.130359589672115e-05, "loss": 0.9843, "step": 194790 }, { "epoch": 1.2445216769099063, "grad_norm": 1.299623966217041, "learning_rate": 3.1298942300266344e-05, "loss": 0.7385, "step": 194800 }, { "epoch": 1.244585564059645, "grad_norm": 0.7882144451141357, "learning_rate": 3.129428889214736e-05, "loss": 0.6872, "step": 194810 }, { "epoch": 1.2446494512093838, "grad_norm": 1.2492918968200684, "learning_rate": 3.1289635672411076e-05, "loss": 0.8676, "step": 194820 }, { "epoch": 1.2447133383591225, "grad_norm": 1.1216018199920654, "learning_rate": 3.1284982641104344e-05, "loss": 0.9722, "step": 194830 }, { "epoch": 1.2447772255088612, "grad_norm": 0.9031000137329102, "learning_rate": 3.128032979827403e-05, "loss": 0.8253, "step": 194840 }, { "epoch": 1.2448411126585999, "grad_norm": 2.528519630432129, "learning_rate": 3.1275677143966985e-05, "loss": 1.207, "step": 194850 }, { "epoch": 1.2449049998083386, "grad_norm": 0.9558800458908081, "learning_rate": 3.127102467823007e-05, "loss": 0.9207, "step": 194860 }, { "epoch": 1.2449688869580773, "grad_norm": 0.5063762664794922, "learning_rate": 3.1266372401110134e-05, "loss": 0.8639, "step": 194870 }, { "epoch": 1.245032774107816, "grad_norm": 0.9158902764320374, "learning_rate": 3.1261720312654044e-05, "loss": 0.9413, "step": 194880 }, { "epoch": 1.2450966612575547, "grad_norm": 0.824600338935852, "learning_rate": 3.125706841290866e-05, "loss": 0.8742, "step": 194890 }, { "epoch": 1.2451605484072934, "grad_norm": 0.9135797619819641, "learning_rate": 3.12524167019208e-05, "loss": 0.712, "step": 194900 }, { "epoch": 1.245224435557032, "grad_norm": 0.5630869269371033, "learning_rate": 3.124776517973731e-05, "loss": 0.7903, "step": 194910 }, { "epoch": 1.2452883227067708, "grad_norm": 1.2710615396499634, "learning_rate": 3.124311384640505e-05, "loss": 0.8565, "step": 194920 }, { "epoch": 1.2453522098565095, "grad_norm": 1.194273829460144, "learning_rate": 3.123846270197087e-05, "loss": 0.7865, "step": 194930 }, { "epoch": 1.2454160970062482, "grad_norm": 1.1291714906692505, "learning_rate": 3.123381174648159e-05, "loss": 0.6668, "step": 194940 }, { "epoch": 1.245479984155987, "grad_norm": 1.230466365814209, "learning_rate": 3.1229160979984065e-05, "loss": 0.7462, "step": 194950 }, { "epoch": 1.2455438713057256, "grad_norm": 1.0248101949691772, "learning_rate": 3.122451040252513e-05, "loss": 0.8264, "step": 194960 }, { "epoch": 1.2456077584554643, "grad_norm": 0.7084385752677917, "learning_rate": 3.1219860014151616e-05, "loss": 0.9541, "step": 194970 }, { "epoch": 1.245671645605203, "grad_norm": 0.9096771478652954, "learning_rate": 3.121520981491035e-05, "loss": 0.8213, "step": 194980 }, { "epoch": 1.2457355327549418, "grad_norm": 0.7235396504402161, "learning_rate": 3.121055980484819e-05, "loss": 0.681, "step": 194990 }, { "epoch": 1.2457994199046805, "grad_norm": 1.0630961656570435, "learning_rate": 3.120590998401194e-05, "loss": 0.8318, "step": 195000 }, { "epoch": 1.2458633070544192, "grad_norm": 0.9689163565635681, "learning_rate": 3.120126035244844e-05, "loss": 0.7171, "step": 195010 }, { "epoch": 1.2459271942041577, "grad_norm": 1.0845943689346313, "learning_rate": 3.11966109102045e-05, "loss": 0.8903, "step": 195020 }, { "epoch": 1.2459910813538966, "grad_norm": 1.0844838619232178, "learning_rate": 3.1191961657326965e-05, "loss": 0.9605, "step": 195030 }, { "epoch": 1.246054968503635, "grad_norm": 0.8601891994476318, "learning_rate": 3.118731259386265e-05, "loss": 0.784, "step": 195040 }, { "epoch": 1.246118855653374, "grad_norm": 0.6311177015304565, "learning_rate": 3.1182663719858364e-05, "loss": 0.7194, "step": 195050 }, { "epoch": 1.2461827428031125, "grad_norm": 0.9953409433364868, "learning_rate": 3.117801503536094e-05, "loss": 1.0471, "step": 195060 }, { "epoch": 1.2462466299528514, "grad_norm": 0.5984851121902466, "learning_rate": 3.117336654041718e-05, "loss": 0.7776, "step": 195070 }, { "epoch": 1.2463105171025899, "grad_norm": 1.1203337907791138, "learning_rate": 3.116871823507391e-05, "loss": 0.8142, "step": 195080 }, { "epoch": 1.2463744042523286, "grad_norm": 0.659214437007904, "learning_rate": 3.1164070119377944e-05, "loss": 0.78, "step": 195090 }, { "epoch": 1.2464382914020673, "grad_norm": 1.9500774145126343, "learning_rate": 3.115942219337609e-05, "loss": 0.9543, "step": 195100 }, { "epoch": 1.246502178551806, "grad_norm": 0.9088325500488281, "learning_rate": 3.1154774457115144e-05, "loss": 0.7535, "step": 195110 }, { "epoch": 1.2465660657015447, "grad_norm": 0.9878364205360413, "learning_rate": 3.1150126910641926e-05, "loss": 0.8623, "step": 195120 }, { "epoch": 1.2466299528512834, "grad_norm": 0.9298221468925476, "learning_rate": 3.114547955400324e-05, "loss": 0.7283, "step": 195130 }, { "epoch": 1.2466938400010221, "grad_norm": 0.98885577917099, "learning_rate": 3.1140832387245885e-05, "loss": 0.6204, "step": 195140 }, { "epoch": 1.2467577271507608, "grad_norm": 1.1176702976226807, "learning_rate": 3.113618541041666e-05, "loss": 0.7693, "step": 195150 }, { "epoch": 1.2468216143004995, "grad_norm": 0.5629855394363403, "learning_rate": 3.1131538623562375e-05, "loss": 0.8783, "step": 195160 }, { "epoch": 1.2468855014502382, "grad_norm": 0.6804969310760498, "learning_rate": 3.112689202672981e-05, "loss": 1.0256, "step": 195170 }, { "epoch": 1.246949388599977, "grad_norm": 0.5467301607131958, "learning_rate": 3.1122245619965764e-05, "loss": 0.853, "step": 195180 }, { "epoch": 1.2470132757497157, "grad_norm": 0.6404921412467957, "learning_rate": 3.111759940331704e-05, "loss": 0.7456, "step": 195190 }, { "epoch": 1.2470771628994544, "grad_norm": 1.025201439857483, "learning_rate": 3.111295337683044e-05, "loss": 0.8196, "step": 195200 }, { "epoch": 1.247141050049193, "grad_norm": 0.9093812108039856, "learning_rate": 3.110830754055273e-05, "loss": 0.8695, "step": 195210 }, { "epoch": 1.2472049371989318, "grad_norm": 0.4639185667037964, "learning_rate": 3.110366189453071e-05, "loss": 0.8906, "step": 195220 }, { "epoch": 1.2472688243486705, "grad_norm": 1.9653066396713257, "learning_rate": 3.1099016438811156e-05, "loss": 0.7307, "step": 195230 }, { "epoch": 1.2473327114984092, "grad_norm": 1.3371989727020264, "learning_rate": 3.1094371173440864e-05, "loss": 0.9541, "step": 195240 }, { "epoch": 1.247396598648148, "grad_norm": 1.0733586549758911, "learning_rate": 3.108972609846661e-05, "loss": 0.583, "step": 195250 }, { "epoch": 1.2474604857978866, "grad_norm": 1.862831950187683, "learning_rate": 3.108508121393517e-05, "loss": 1.1434, "step": 195260 }, { "epoch": 1.2475243729476253, "grad_norm": 0.9113034605979919, "learning_rate": 3.108043651989333e-05, "loss": 0.9859, "step": 195270 }, { "epoch": 1.247588260097364, "grad_norm": 1.056965708732605, "learning_rate": 3.107579201638786e-05, "loss": 1.1087, "step": 195280 }, { "epoch": 1.2476521472471027, "grad_norm": 0.8885679841041565, "learning_rate": 3.107114770346554e-05, "loss": 0.9199, "step": 195290 }, { "epoch": 1.2477160343968414, "grad_norm": 0.5740584135055542, "learning_rate": 3.106650358117314e-05, "loss": 0.7694, "step": 195300 }, { "epoch": 1.2477799215465801, "grad_norm": 1.5246422290802002, "learning_rate": 3.106185964955742e-05, "loss": 0.8454, "step": 195310 }, { "epoch": 1.2478438086963188, "grad_norm": 0.8860229849815369, "learning_rate": 3.105721590866516e-05, "loss": 1.0609, "step": 195320 }, { "epoch": 1.2479076958460575, "grad_norm": 0.7222634553909302, "learning_rate": 3.105257235854312e-05, "loss": 0.8697, "step": 195330 }, { "epoch": 1.2479715829957962, "grad_norm": 1.1808580160140991, "learning_rate": 3.1047928999238074e-05, "loss": 0.7241, "step": 195340 }, { "epoch": 1.248035470145535, "grad_norm": 0.7724068760871887, "learning_rate": 3.1043285830796776e-05, "loss": 0.8333, "step": 195350 }, { "epoch": 1.2480993572952737, "grad_norm": 1.1032763719558716, "learning_rate": 3.1038642853266e-05, "loss": 0.802, "step": 195360 }, { "epoch": 1.2481632444450124, "grad_norm": 0.927266001701355, "learning_rate": 3.1034000066692496e-05, "loss": 0.8883, "step": 195370 }, { "epoch": 1.248227131594751, "grad_norm": 0.8661271333694458, "learning_rate": 3.1029357471123e-05, "loss": 0.9052, "step": 195380 }, { "epoch": 1.2482910187444898, "grad_norm": 1.4251922369003296, "learning_rate": 3.10247150666043e-05, "loss": 0.9302, "step": 195390 }, { "epoch": 1.2483549058942285, "grad_norm": 0.845050573348999, "learning_rate": 3.102007285318313e-05, "loss": 0.737, "step": 195400 }, { "epoch": 1.2484187930439672, "grad_norm": 1.1283615827560425, "learning_rate": 3.101543083090624e-05, "loss": 0.6329, "step": 195410 }, { "epoch": 1.248482680193706, "grad_norm": 1.0217286348342896, "learning_rate": 3.1010788999820396e-05, "loss": 1.0233, "step": 195420 }, { "epoch": 1.2485465673434446, "grad_norm": 0.9749916791915894, "learning_rate": 3.100614735997233e-05, "loss": 0.9356, "step": 195430 }, { "epoch": 1.2486104544931833, "grad_norm": 0.8937061429023743, "learning_rate": 3.10015059114088e-05, "loss": 0.7443, "step": 195440 }, { "epoch": 1.248674341642922, "grad_norm": 0.9025242328643799, "learning_rate": 3.0996864654176525e-05, "loss": 0.6304, "step": 195450 }, { "epoch": 1.2487382287926607, "grad_norm": 2.0243749618530273, "learning_rate": 3.099222358832228e-05, "loss": 0.8513, "step": 195460 }, { "epoch": 1.2488021159423994, "grad_norm": 1.7450834512710571, "learning_rate": 3.0987582713892784e-05, "loss": 0.7545, "step": 195470 }, { "epoch": 1.2488660030921381, "grad_norm": 1.119706392288208, "learning_rate": 3.098294203093477e-05, "loss": 0.6262, "step": 195480 }, { "epoch": 1.2489298902418768, "grad_norm": 1.065361499786377, "learning_rate": 3.097830153949498e-05, "loss": 0.8959, "step": 195490 }, { "epoch": 1.2489937773916155, "grad_norm": 0.8525824546813965, "learning_rate": 3.097366123962015e-05, "loss": 0.8344, "step": 195500 }, { "epoch": 1.249057664541354, "grad_norm": 0.8765125870704651, "learning_rate": 3.096902113135702e-05, "loss": 1.0526, "step": 195510 }, { "epoch": 1.249121551691093, "grad_norm": 1.3569093942642212, "learning_rate": 3.09643812147523e-05, "loss": 0.7687, "step": 195520 }, { "epoch": 1.2491854388408314, "grad_norm": 1.1045929193496704, "learning_rate": 3.0959741489852746e-05, "loss": 0.8376, "step": 195530 }, { "epoch": 1.2492493259905704, "grad_norm": 0.7480252981185913, "learning_rate": 3.095510195670506e-05, "loss": 0.8225, "step": 195540 }, { "epoch": 1.2493132131403089, "grad_norm": 1.0368316173553467, "learning_rate": 3.095046261535597e-05, "loss": 0.8449, "step": 195550 }, { "epoch": 1.2493771002900478, "grad_norm": 1.2551803588867188, "learning_rate": 3.0945823465852204e-05, "loss": 0.7941, "step": 195560 }, { "epoch": 1.2494409874397863, "grad_norm": 1.1370221376419067, "learning_rate": 3.094118450824048e-05, "loss": 0.8795, "step": 195570 }, { "epoch": 1.249504874589525, "grad_norm": 1.0258110761642456, "learning_rate": 3.0936545742567514e-05, "loss": 0.7045, "step": 195580 }, { "epoch": 1.2495687617392637, "grad_norm": 1.3879752159118652, "learning_rate": 3.0931907168880027e-05, "loss": 0.9711, "step": 195590 }, { "epoch": 1.2496326488890024, "grad_norm": 1.1215720176696777, "learning_rate": 3.0927268787224734e-05, "loss": 0.7599, "step": 195600 }, { "epoch": 1.249696536038741, "grad_norm": 0.6473504304885864, "learning_rate": 3.092263059764834e-05, "loss": 1.1675, "step": 195610 }, { "epoch": 1.2497604231884798, "grad_norm": 1.1090569496154785, "learning_rate": 3.091799260019757e-05, "loss": 0.9287, "step": 195620 }, { "epoch": 1.2498243103382185, "grad_norm": 1.0651631355285645, "learning_rate": 3.0913354794919105e-05, "loss": 0.772, "step": 195630 }, { "epoch": 1.2498881974879572, "grad_norm": 0.8213653564453125, "learning_rate": 3.090871718185968e-05, "loss": 0.8394, "step": 195640 }, { "epoch": 1.249952084637696, "grad_norm": 1.845961332321167, "learning_rate": 3.090407976106599e-05, "loss": 0.7424, "step": 195650 }, { "epoch": 1.2500159717874346, "grad_norm": 1.1385325193405151, "learning_rate": 3.089944253258473e-05, "loss": 1.0301, "step": 195660 }, { "epoch": 1.2500798589371733, "grad_norm": 1.0003492832183838, "learning_rate": 3.089480549646262e-05, "loss": 0.8332, "step": 195670 }, { "epoch": 1.250143746086912, "grad_norm": 1.6224372386932373, "learning_rate": 3.089016865274634e-05, "loss": 0.5775, "step": 195680 }, { "epoch": 1.2502076332366507, "grad_norm": 0.7729224562644958, "learning_rate": 3.08855320014826e-05, "loss": 0.9885, "step": 195690 }, { "epoch": 1.2502715203863894, "grad_norm": 0.7621885538101196, "learning_rate": 3.088089554271808e-05, "loss": 0.8704, "step": 195700 }, { "epoch": 1.2503354075361282, "grad_norm": 0.81306391954422, "learning_rate": 3.0876259276499475e-05, "loss": 0.9436, "step": 195710 }, { "epoch": 1.2503992946858669, "grad_norm": 0.8978168964385986, "learning_rate": 3.087162320287349e-05, "loss": 0.8368, "step": 195720 }, { "epoch": 1.2504631818356056, "grad_norm": 0.8647493124008179, "learning_rate": 3.086698732188682e-05, "loss": 0.63, "step": 195730 }, { "epoch": 1.2505270689853443, "grad_norm": 0.9556788206100464, "learning_rate": 3.086235163358613e-05, "loss": 0.7618, "step": 195740 }, { "epoch": 1.250590956135083, "grad_norm": 1.103699803352356, "learning_rate": 3.0857716138018115e-05, "loss": 0.6418, "step": 195750 }, { "epoch": 1.2506548432848217, "grad_norm": 0.795481264591217, "learning_rate": 3.0853080835229465e-05, "loss": 0.9007, "step": 195760 }, { "epoch": 1.2507187304345604, "grad_norm": 0.680173397064209, "learning_rate": 3.084844572526685e-05, "loss": 1.0055, "step": 195770 }, { "epoch": 1.250782617584299, "grad_norm": 0.8150850534439087, "learning_rate": 3.0843810808176956e-05, "loss": 0.5794, "step": 195780 }, { "epoch": 1.2508465047340378, "grad_norm": 1.4309477806091309, "learning_rate": 3.083917608400646e-05, "loss": 0.9091, "step": 195790 }, { "epoch": 1.2509103918837765, "grad_norm": 0.845729410648346, "learning_rate": 3.083454155280204e-05, "loss": 0.8298, "step": 195800 }, { "epoch": 1.2509742790335152, "grad_norm": 0.8186609148979187, "learning_rate": 3.0829907214610366e-05, "loss": 1.1078, "step": 195810 }, { "epoch": 1.251038166183254, "grad_norm": 0.772532045841217, "learning_rate": 3.082527306947811e-05, "loss": 0.9443, "step": 195820 }, { "epoch": 1.2511020533329926, "grad_norm": 0.7840969562530518, "learning_rate": 3.082063911745194e-05, "loss": 0.7895, "step": 195830 }, { "epoch": 1.2511659404827313, "grad_norm": 0.6535977125167847, "learning_rate": 3.081600535857853e-05, "loss": 0.9439, "step": 195840 }, { "epoch": 1.25122982763247, "grad_norm": 1.039899468421936, "learning_rate": 3.081137179290454e-05, "loss": 0.8581, "step": 195850 }, { "epoch": 1.2512937147822087, "grad_norm": 0.915335476398468, "learning_rate": 3.080673842047666e-05, "loss": 1.1498, "step": 195860 }, { "epoch": 1.2513576019319474, "grad_norm": 0.7430047392845154, "learning_rate": 3.0802105241341494e-05, "loss": 0.6901, "step": 195870 }, { "epoch": 1.2514214890816862, "grad_norm": 1.866835594177246, "learning_rate": 3.0797472255545755e-05, "loss": 0.9969, "step": 195880 }, { "epoch": 1.2514853762314249, "grad_norm": 1.2250245809555054, "learning_rate": 3.079283946313608e-05, "loss": 0.8529, "step": 195890 }, { "epoch": 1.2515492633811636, "grad_norm": 1.3015787601470947, "learning_rate": 3.078820686415912e-05, "loss": 0.9146, "step": 195900 }, { "epoch": 1.2516131505309023, "grad_norm": 0.7178229093551636, "learning_rate": 3.078357445866155e-05, "loss": 0.9116, "step": 195910 }, { "epoch": 1.251677037680641, "grad_norm": 0.6988886594772339, "learning_rate": 3.0778942246690004e-05, "loss": 0.7955, "step": 195920 }, { "epoch": 1.2517409248303797, "grad_norm": 0.7773096561431885, "learning_rate": 3.077431022829113e-05, "loss": 0.7112, "step": 195930 }, { "epoch": 1.2518048119801184, "grad_norm": 0.7745444178581238, "learning_rate": 3.076967840351159e-05, "loss": 0.777, "step": 195940 }, { "epoch": 1.251868699129857, "grad_norm": 0.8006591796875, "learning_rate": 3.076504677239803e-05, "loss": 0.851, "step": 195950 }, { "epoch": 1.2519325862795958, "grad_norm": 0.7855653166770935, "learning_rate": 3.0760415334997084e-05, "loss": 0.8963, "step": 195960 }, { "epoch": 1.2519964734293345, "grad_norm": 1.0454212427139282, "learning_rate": 3.075578409135541e-05, "loss": 0.9647, "step": 195970 }, { "epoch": 1.252060360579073, "grad_norm": 0.722151517868042, "learning_rate": 3.075115304151963e-05, "loss": 1.154, "step": 195980 }, { "epoch": 1.252124247728812, "grad_norm": 0.9353591203689575, "learning_rate": 3.074652218553639e-05, "loss": 0.9055, "step": 195990 }, { "epoch": 1.2521881348785504, "grad_norm": 0.6113839745521545, "learning_rate": 3.0741891523452334e-05, "loss": 0.7152, "step": 196000 }, { "epoch": 1.2522520220282893, "grad_norm": 0.8380521535873413, "learning_rate": 3.0737261055314085e-05, "loss": 0.8254, "step": 196010 }, { "epoch": 1.2523159091780278, "grad_norm": 1.1471387147903442, "learning_rate": 3.073309379985188e-05, "loss": 1.0999, "step": 196020 }, { "epoch": 1.2523797963277667, "grad_norm": 0.9497846364974976, "learning_rate": 3.072846370033915e-05, "loss": 0.6518, "step": 196030 }, { "epoch": 1.2524436834775052, "grad_norm": 1.842544436454773, "learning_rate": 3.0723833794907464e-05, "loss": 0.7074, "step": 196040 }, { "epoch": 1.2525075706272442, "grad_norm": 1.0247862339019775, "learning_rate": 3.071920408360344e-05, "loss": 0.8442, "step": 196050 }, { "epoch": 1.2525714577769826, "grad_norm": 1.2822681665420532, "learning_rate": 3.071457456647372e-05, "loss": 0.7831, "step": 196060 }, { "epoch": 1.2526353449267216, "grad_norm": 1.1187118291854858, "learning_rate": 3.070994524356492e-05, "loss": 0.7251, "step": 196070 }, { "epoch": 1.25269923207646, "grad_norm": 1.0392087697982788, "learning_rate": 3.070531611492366e-05, "loss": 0.6043, "step": 196080 }, { "epoch": 1.2527631192261988, "grad_norm": 0.8084442615509033, "learning_rate": 3.070068718059655e-05, "loss": 1.0564, "step": 196090 }, { "epoch": 1.2528270063759375, "grad_norm": 0.8950974941253662, "learning_rate": 3.069605844063023e-05, "loss": 0.7615, "step": 196100 }, { "epoch": 1.2528908935256762, "grad_norm": 0.8876667022705078, "learning_rate": 3.069142989507129e-05, "loss": 0.9022, "step": 196110 }, { "epoch": 1.2529547806754149, "grad_norm": 0.6729691028594971, "learning_rate": 3.0686801543966356e-05, "loss": 1.014, "step": 196120 }, { "epoch": 1.2530186678251536, "grad_norm": 1.1825615167617798, "learning_rate": 3.0682173387362046e-05, "loss": 0.7734, "step": 196130 }, { "epoch": 1.2530825549748923, "grad_norm": 2.5809266567230225, "learning_rate": 3.0677545425304955e-05, "loss": 0.7741, "step": 196140 }, { "epoch": 1.253146442124631, "grad_norm": 1.2367600202560425, "learning_rate": 3.0672917657841696e-05, "loss": 0.8362, "step": 196150 }, { "epoch": 1.2532103292743697, "grad_norm": 1.5456008911132812, "learning_rate": 3.066829008501888e-05, "loss": 0.8762, "step": 196160 }, { "epoch": 1.2532742164241084, "grad_norm": 0.9506556987762451, "learning_rate": 3.06636627068831e-05, "loss": 0.7799, "step": 196170 }, { "epoch": 1.2533381035738471, "grad_norm": 1.2111282348632812, "learning_rate": 3.065903552348098e-05, "loss": 0.8307, "step": 196180 }, { "epoch": 1.2534019907235858, "grad_norm": 1.244231939315796, "learning_rate": 3.0654408534859094e-05, "loss": 0.7969, "step": 196190 }, { "epoch": 1.2534658778733245, "grad_norm": 0.7318935990333557, "learning_rate": 3.064978174106406e-05, "loss": 0.9228, "step": 196200 }, { "epoch": 1.2535297650230632, "grad_norm": 0.7732114195823669, "learning_rate": 3.0645155142142455e-05, "loss": 1.0287, "step": 196210 }, { "epoch": 1.253593652172802, "grad_norm": 0.8616588115692139, "learning_rate": 3.064052873814088e-05, "loss": 0.8152, "step": 196220 }, { "epoch": 1.2536575393225406, "grad_norm": 2.2946550846099854, "learning_rate": 3.063590252910594e-05, "loss": 0.753, "step": 196230 }, { "epoch": 1.2537214264722794, "grad_norm": 0.6799231171607971, "learning_rate": 3.0631276515084205e-05, "loss": 0.8045, "step": 196240 }, { "epoch": 1.253785313622018, "grad_norm": 1.3932530879974365, "learning_rate": 3.062665069612228e-05, "loss": 0.623, "step": 196250 }, { "epoch": 1.2538492007717568, "grad_norm": 1.0541167259216309, "learning_rate": 3.062202507226674e-05, "loss": 0.9539, "step": 196260 }, { "epoch": 1.2539130879214955, "grad_norm": 0.8246195912361145, "learning_rate": 3.061739964356417e-05, "loss": 0.7437, "step": 196270 }, { "epoch": 1.2539769750712342, "grad_norm": 1.299216866493225, "learning_rate": 3.0612774410061154e-05, "loss": 0.882, "step": 196280 }, { "epoch": 1.2540408622209729, "grad_norm": 0.7007962465286255, "learning_rate": 3.060814937180427e-05, "loss": 0.5721, "step": 196290 }, { "epoch": 1.2541047493707116, "grad_norm": 0.92581707239151, "learning_rate": 3.06035245288401e-05, "loss": 0.9306, "step": 196300 }, { "epoch": 1.2541686365204503, "grad_norm": 0.7394659519195557, "learning_rate": 3.059889988121521e-05, "loss": 0.8613, "step": 196310 }, { "epoch": 1.254232523670189, "grad_norm": 0.8280680179595947, "learning_rate": 3.05942754289762e-05, "loss": 0.9097, "step": 196320 }, { "epoch": 1.2542964108199277, "grad_norm": 0.8643571734428406, "learning_rate": 3.058965117216961e-05, "loss": 0.8487, "step": 196330 }, { "epoch": 1.2543602979696664, "grad_norm": 0.7133888602256775, "learning_rate": 3.0585027110842033e-05, "loss": 1.0621, "step": 196340 }, { "epoch": 1.2544241851194051, "grad_norm": 0.9498386979103088, "learning_rate": 3.0580403245040016e-05, "loss": 0.8696, "step": 196350 }, { "epoch": 1.2544880722691438, "grad_norm": 0.786381721496582, "learning_rate": 3.0575779574810147e-05, "loss": 0.9355, "step": 196360 }, { "epoch": 1.2545519594188825, "grad_norm": 0.961892306804657, "learning_rate": 3.0571156100198986e-05, "loss": 0.7257, "step": 196370 }, { "epoch": 1.2546158465686212, "grad_norm": 2.5090837478637695, "learning_rate": 3.056653282125309e-05, "loss": 0.9214, "step": 196380 }, { "epoch": 1.25467973371836, "grad_norm": 0.7858637571334839, "learning_rate": 3.056190973801902e-05, "loss": 0.8509, "step": 196390 }, { "epoch": 1.2547436208680987, "grad_norm": 1.216647744178772, "learning_rate": 3.0557286850543345e-05, "loss": 0.8276, "step": 196400 }, { "epoch": 1.2548075080178374, "grad_norm": 0.8572155237197876, "learning_rate": 3.05526641588726e-05, "loss": 0.6625, "step": 196410 }, { "epoch": 1.254871395167576, "grad_norm": 1.4202823638916016, "learning_rate": 3.054804166305335e-05, "loss": 0.6364, "step": 196420 }, { "epoch": 1.2549352823173148, "grad_norm": 0.9645159244537354, "learning_rate": 3.0543419363132154e-05, "loss": 0.9696, "step": 196430 }, { "epoch": 1.2549991694670535, "grad_norm": 0.9839354753494263, "learning_rate": 3.053879725915556e-05, "loss": 0.9092, "step": 196440 }, { "epoch": 1.255063056616792, "grad_norm": 1.0340949296951294, "learning_rate": 3.053417535117011e-05, "loss": 0.8126, "step": 196450 }, { "epoch": 1.255126943766531, "grad_norm": 1.2581630945205688, "learning_rate": 3.052955363922235e-05, "loss": 0.691, "step": 196460 }, { "epoch": 1.2551908309162694, "grad_norm": 0.8256975412368774, "learning_rate": 3.052493212335884e-05, "loss": 0.8336, "step": 196470 }, { "epoch": 1.2552547180660083, "grad_norm": 1.2465929985046387, "learning_rate": 3.052031080362611e-05, "loss": 1.2096, "step": 196480 }, { "epoch": 1.2553186052157468, "grad_norm": 1.2942495346069336, "learning_rate": 3.05156896800707e-05, "loss": 0.8418, "step": 196490 }, { "epoch": 1.2553824923654857, "grad_norm": 1.565579891204834, "learning_rate": 3.051106875273915e-05, "loss": 0.8584, "step": 196500 }, { "epoch": 1.2554463795152242, "grad_norm": 0.9140028357505798, "learning_rate": 3.0506448021678004e-05, "loss": 1.2363, "step": 196510 }, { "epoch": 1.2555102666649631, "grad_norm": 1.4598805904388428, "learning_rate": 3.050182748693378e-05, "loss": 0.7464, "step": 196520 }, { "epoch": 1.2555741538147016, "grad_norm": 1.033338189125061, "learning_rate": 3.049720714855303e-05, "loss": 0.9934, "step": 196530 }, { "epoch": 1.2556380409644405, "grad_norm": 2.08406925201416, "learning_rate": 3.0492587006582267e-05, "loss": 1.2789, "step": 196540 }, { "epoch": 1.255701928114179, "grad_norm": 0.5521570444107056, "learning_rate": 3.0487967061068036e-05, "loss": 0.7427, "step": 196550 }, { "epoch": 1.255765815263918, "grad_norm": 1.137140154838562, "learning_rate": 3.0483347312056853e-05, "loss": 0.769, "step": 196560 }, { "epoch": 1.2558297024136564, "grad_norm": 1.1652560234069824, "learning_rate": 3.0478727759595248e-05, "loss": 0.7821, "step": 196570 }, { "epoch": 1.2558935895633951, "grad_norm": 1.1104927062988281, "learning_rate": 3.0474108403729752e-05, "loss": 0.8623, "step": 196580 }, { "epoch": 1.2559574767131338, "grad_norm": 0.8415306210517883, "learning_rate": 3.0469489244506865e-05, "loss": 0.9975, "step": 196590 }, { "epoch": 1.2560213638628726, "grad_norm": 1.3708555698394775, "learning_rate": 3.0464870281973123e-05, "loss": 0.773, "step": 196600 }, { "epoch": 1.2560852510126113, "grad_norm": 1.2786868810653687, "learning_rate": 3.046025151617503e-05, "loss": 0.8677, "step": 196610 }, { "epoch": 1.25614913816235, "grad_norm": 1.9700566530227661, "learning_rate": 3.0455632947159117e-05, "loss": 0.8535, "step": 196620 }, { "epoch": 1.2562130253120887, "grad_norm": 0.4788389205932617, "learning_rate": 3.0451014574971892e-05, "loss": 1.1064, "step": 196630 }, { "epoch": 1.2562769124618274, "grad_norm": 0.41164281964302063, "learning_rate": 3.0446396399659855e-05, "loss": 0.9192, "step": 196640 }, { "epoch": 1.256340799611566, "grad_norm": 0.979625940322876, "learning_rate": 3.0441778421269523e-05, "loss": 0.9667, "step": 196650 }, { "epoch": 1.2564046867613048, "grad_norm": 0.9145374298095703, "learning_rate": 3.0437160639847405e-05, "loss": 0.9318, "step": 196660 }, { "epoch": 1.2564685739110435, "grad_norm": 1.1360008716583252, "learning_rate": 3.043254305544e-05, "loss": 0.8037, "step": 196670 }, { "epoch": 1.2565324610607822, "grad_norm": 0.9261152744293213, "learning_rate": 3.0427925668093804e-05, "loss": 1.0606, "step": 196680 }, { "epoch": 1.256596348210521, "grad_norm": 3.820808172225952, "learning_rate": 3.0423308477855344e-05, "loss": 0.8752, "step": 196690 }, { "epoch": 1.2566602353602596, "grad_norm": 1.1207785606384277, "learning_rate": 3.04186914847711e-05, "loss": 0.869, "step": 196700 }, { "epoch": 1.2567241225099983, "grad_norm": 0.5529508590698242, "learning_rate": 3.041407468888758e-05, "loss": 0.8173, "step": 196710 }, { "epoch": 1.256788009659737, "grad_norm": 0.9832448959350586, "learning_rate": 3.0409458090251265e-05, "loss": 0.8848, "step": 196720 }, { "epoch": 1.2568518968094757, "grad_norm": 0.6877294778823853, "learning_rate": 3.040484168890866e-05, "loss": 0.7783, "step": 196730 }, { "epoch": 1.2569157839592144, "grad_norm": 0.9312469959259033, "learning_rate": 3.0400225484906243e-05, "loss": 0.7148, "step": 196740 }, { "epoch": 1.2569796711089531, "grad_norm": 0.7746081352233887, "learning_rate": 3.0395609478290522e-05, "loss": 0.9964, "step": 196750 }, { "epoch": 1.2570435582586919, "grad_norm": 1.5182709693908691, "learning_rate": 3.0390993669107966e-05, "loss": 0.9878, "step": 196760 }, { "epoch": 1.2571074454084306, "grad_norm": 1.004671573638916, "learning_rate": 3.0386378057405067e-05, "loss": 1.0418, "step": 196770 }, { "epoch": 1.2571713325581693, "grad_norm": 0.7396956086158752, "learning_rate": 3.0381762643228316e-05, "loss": 0.9656, "step": 196780 }, { "epoch": 1.257235219707908, "grad_norm": 1.0894277095794678, "learning_rate": 3.0377147426624186e-05, "loss": 0.947, "step": 196790 }, { "epoch": 1.2572991068576467, "grad_norm": 0.967881441116333, "learning_rate": 3.0372532407639155e-05, "loss": 0.9679, "step": 196800 }, { "epoch": 1.2573629940073854, "grad_norm": 1.0434962511062622, "learning_rate": 3.0367917586319704e-05, "loss": 0.7369, "step": 196810 }, { "epoch": 1.257426881157124, "grad_norm": 0.7365828156471252, "learning_rate": 3.0363302962712305e-05, "loss": 0.8695, "step": 196820 }, { "epoch": 1.2574907683068628, "grad_norm": 0.7892202734947205, "learning_rate": 3.0358688536863433e-05, "loss": 0.8961, "step": 196830 }, { "epoch": 1.2575546554566015, "grad_norm": 0.9244031310081482, "learning_rate": 3.0354074308819563e-05, "loss": 0.8052, "step": 196840 }, { "epoch": 1.2576185426063402, "grad_norm": 0.9582728147506714, "learning_rate": 3.0349460278627163e-05, "loss": 0.9504, "step": 196850 }, { "epoch": 1.257682429756079, "grad_norm": 1.100187063217163, "learning_rate": 3.0344846446332692e-05, "loss": 0.7798, "step": 196860 }, { "epoch": 1.2577463169058176, "grad_norm": 0.7992842197418213, "learning_rate": 3.0340232811982628e-05, "loss": 0.9233, "step": 196870 }, { "epoch": 1.2578102040555563, "grad_norm": 1.033463716506958, "learning_rate": 3.0335619375623408e-05, "loss": 0.8229, "step": 196880 }, { "epoch": 1.257874091205295, "grad_norm": 1.035979986190796, "learning_rate": 3.033100613730153e-05, "loss": 1.1535, "step": 196890 }, { "epoch": 1.2579379783550337, "grad_norm": 0.9329689741134644, "learning_rate": 3.0326393097063432e-05, "loss": 0.7973, "step": 196900 }, { "epoch": 1.2580018655047724, "grad_norm": 1.8775664567947388, "learning_rate": 3.032178025495558e-05, "loss": 0.84, "step": 196910 }, { "epoch": 1.2580657526545111, "grad_norm": 0.7317743301391602, "learning_rate": 3.0317167611024423e-05, "loss": 0.8763, "step": 196920 }, { "epoch": 1.2581296398042499, "grad_norm": 0.7808213233947754, "learning_rate": 3.031255516531642e-05, "loss": 0.8849, "step": 196930 }, { "epoch": 1.2581935269539883, "grad_norm": 0.7983580827713013, "learning_rate": 3.0307942917878014e-05, "loss": 0.9162, "step": 196940 }, { "epoch": 1.2582574141037273, "grad_norm": 1.1718703508377075, "learning_rate": 3.0303330868755663e-05, "loss": 0.6528, "step": 196950 }, { "epoch": 1.2583213012534658, "grad_norm": 0.7717203497886658, "learning_rate": 3.02987190179958e-05, "loss": 0.7946, "step": 196960 }, { "epoch": 1.2583851884032047, "grad_norm": 1.1155431270599365, "learning_rate": 3.029410736564489e-05, "loss": 0.9417, "step": 196970 }, { "epoch": 1.2584490755529432, "grad_norm": 1.0586241483688354, "learning_rate": 3.0289495911749387e-05, "loss": 0.7738, "step": 196980 }, { "epoch": 1.258512962702682, "grad_norm": 1.0919402837753296, "learning_rate": 3.0284884656355695e-05, "loss": 1.0467, "step": 196990 }, { "epoch": 1.2585768498524206, "grad_norm": 0.8078431487083435, "learning_rate": 3.028027359951025e-05, "loss": 1.0393, "step": 197000 }, { "epoch": 1.2586407370021595, "grad_norm": 1.106803297996521, "learning_rate": 3.0275662741259527e-05, "loss": 0.8736, "step": 197010 }, { "epoch": 1.258704624151898, "grad_norm": 0.7414875030517578, "learning_rate": 3.0271052081649942e-05, "loss": 0.7568, "step": 197020 }, { "epoch": 1.258768511301637, "grad_norm": 0.8508109450340271, "learning_rate": 3.026644162072793e-05, "loss": 0.989, "step": 197030 }, { "epoch": 1.2588323984513754, "grad_norm": 1.1377967596054077, "learning_rate": 3.0261831358539926e-05, "loss": 0.908, "step": 197040 }, { "epoch": 1.2588962856011143, "grad_norm": 0.7796303629875183, "learning_rate": 3.0257221295132354e-05, "loss": 0.6397, "step": 197050 }, { "epoch": 1.2589601727508528, "grad_norm": 1.2880197763442993, "learning_rate": 3.025261143055164e-05, "loss": 0.8341, "step": 197060 }, { "epoch": 1.2590240599005915, "grad_norm": 0.8077268600463867, "learning_rate": 3.024800176484422e-05, "loss": 0.5744, "step": 197070 }, { "epoch": 1.2590879470503302, "grad_norm": 0.8637357354164124, "learning_rate": 3.0243392298056505e-05, "loss": 1.15, "step": 197080 }, { "epoch": 1.259151834200069, "grad_norm": 1.3049520254135132, "learning_rate": 3.0238783030234925e-05, "loss": 1.1268, "step": 197090 }, { "epoch": 1.2592157213498076, "grad_norm": 0.9427032470703125, "learning_rate": 3.0234173961425894e-05, "loss": 0.8156, "step": 197100 }, { "epoch": 1.2592796084995463, "grad_norm": 0.559723973274231, "learning_rate": 3.0229565091675826e-05, "loss": 0.9744, "step": 197110 }, { "epoch": 1.259343495649285, "grad_norm": 0.7008159160614014, "learning_rate": 3.0224956421031146e-05, "loss": 0.9577, "step": 197120 }, { "epoch": 1.2594073827990238, "grad_norm": 1.0198113918304443, "learning_rate": 3.0220347949538264e-05, "loss": 0.8379, "step": 197130 }, { "epoch": 1.2594712699487625, "grad_norm": 1.9242488145828247, "learning_rate": 3.0215739677243593e-05, "loss": 0.9567, "step": 197140 }, { "epoch": 1.2595351570985012, "grad_norm": 1.6917319297790527, "learning_rate": 3.0211131604193532e-05, "loss": 0.7219, "step": 197150 }, { "epoch": 1.2595990442482399, "grad_norm": 0.9386038780212402, "learning_rate": 3.02065237304345e-05, "loss": 1.1366, "step": 197160 }, { "epoch": 1.2596629313979786, "grad_norm": 1.1322776079177856, "learning_rate": 3.020191605601289e-05, "loss": 0.8448, "step": 197170 }, { "epoch": 1.2597268185477173, "grad_norm": 1.149288296699524, "learning_rate": 3.0197308580975126e-05, "loss": 0.9294, "step": 197180 }, { "epoch": 1.259790705697456, "grad_norm": 0.8767491579055786, "learning_rate": 3.0192701305367587e-05, "loss": 0.7238, "step": 197190 }, { "epoch": 1.2598545928471947, "grad_norm": 0.973002552986145, "learning_rate": 3.0188094229236674e-05, "loss": 0.8918, "step": 197200 }, { "epoch": 1.2599184799969334, "grad_norm": 2.295564651489258, "learning_rate": 3.0183487352628802e-05, "loss": 0.8087, "step": 197210 }, { "epoch": 1.259982367146672, "grad_norm": 0.9704807996749878, "learning_rate": 3.017888067559036e-05, "loss": 0.7005, "step": 197220 }, { "epoch": 1.2600462542964108, "grad_norm": 0.8905156850814819, "learning_rate": 3.0174274198167728e-05, "loss": 1.0, "step": 197230 }, { "epoch": 1.2601101414461495, "grad_norm": 1.1902166604995728, "learning_rate": 3.016966792040732e-05, "loss": 1.016, "step": 197240 }, { "epoch": 1.2601740285958882, "grad_norm": 1.5252759456634521, "learning_rate": 3.0165061842355503e-05, "loss": 0.8924, "step": 197250 }, { "epoch": 1.260237915745627, "grad_norm": 0.6883949041366577, "learning_rate": 3.016045596405867e-05, "loss": 1.0273, "step": 197260 }, { "epoch": 1.2603018028953656, "grad_norm": 1.1482892036437988, "learning_rate": 3.0155850285563213e-05, "loss": 1.0745, "step": 197270 }, { "epoch": 1.2603656900451043, "grad_norm": 1.319387435913086, "learning_rate": 3.0151244806915513e-05, "loss": 0.9949, "step": 197280 }, { "epoch": 1.260429577194843, "grad_norm": 1.022769570350647, "learning_rate": 3.0146639528161947e-05, "loss": 0.764, "step": 197290 }, { "epoch": 1.2604934643445818, "grad_norm": 0.9216936230659485, "learning_rate": 3.0142034449348898e-05, "loss": 1.0093, "step": 197300 }, { "epoch": 1.2605573514943205, "grad_norm": 1.382753610610962, "learning_rate": 3.013742957052274e-05, "loss": 0.8119, "step": 197310 }, { "epoch": 1.2606212386440592, "grad_norm": 0.9630887508392334, "learning_rate": 3.013282489172985e-05, "loss": 0.9822, "step": 197320 }, { "epoch": 1.2606851257937979, "grad_norm": 1.4023778438568115, "learning_rate": 3.0128220413016604e-05, "loss": 0.8795, "step": 197330 }, { "epoch": 1.2607490129435366, "grad_norm": 0.5781784653663635, "learning_rate": 3.0123616134429368e-05, "loss": 0.9546, "step": 197340 }, { "epoch": 1.2608129000932753, "grad_norm": 1.133649468421936, "learning_rate": 3.0119012056014513e-05, "loss": 0.5692, "step": 197350 }, { "epoch": 1.260876787243014, "grad_norm": 0.9575822353363037, "learning_rate": 3.0114408177818405e-05, "loss": 0.7463, "step": 197360 }, { "epoch": 1.2609406743927527, "grad_norm": 1.02314293384552, "learning_rate": 3.0109804499887406e-05, "loss": 0.8035, "step": 197370 }, { "epoch": 1.2610045615424914, "grad_norm": 0.8817002177238464, "learning_rate": 3.0105201022267894e-05, "loss": 1.0917, "step": 197380 }, { "epoch": 1.2610684486922301, "grad_norm": 0.5148343443870544, "learning_rate": 3.010059774500621e-05, "loss": 0.7236, "step": 197390 }, { "epoch": 1.2611323358419688, "grad_norm": 0.890557050704956, "learning_rate": 3.0095994668148725e-05, "loss": 0.8603, "step": 197400 }, { "epoch": 1.2611962229917075, "grad_norm": 1.0954136848449707, "learning_rate": 3.0091391791741784e-05, "loss": 1.0766, "step": 197410 }, { "epoch": 1.2612601101414462, "grad_norm": 1.0484198331832886, "learning_rate": 3.008678911583176e-05, "loss": 0.9275, "step": 197420 }, { "epoch": 1.2613239972911847, "grad_norm": 1.0964504480361938, "learning_rate": 3.0082186640465e-05, "loss": 0.8913, "step": 197430 }, { "epoch": 1.2613878844409236, "grad_norm": 0.7645079493522644, "learning_rate": 3.0077584365687848e-05, "loss": 1.0927, "step": 197440 }, { "epoch": 1.2614517715906621, "grad_norm": 0.9771102666854858, "learning_rate": 3.007298229154666e-05, "loss": 0.9772, "step": 197450 }, { "epoch": 1.261515658740401, "grad_norm": 0.7499563694000244, "learning_rate": 3.0068380418087792e-05, "loss": 0.8616, "step": 197460 }, { "epoch": 1.2615795458901395, "grad_norm": 0.7899210453033447, "learning_rate": 3.0063778745357563e-05, "loss": 0.7892, "step": 197470 }, { "epoch": 1.2616434330398785, "grad_norm": 0.6827421188354492, "learning_rate": 3.005917727340233e-05, "loss": 0.8407, "step": 197480 }, { "epoch": 1.261707320189617, "grad_norm": 1.049285888671875, "learning_rate": 3.0054576002268432e-05, "loss": 0.9158, "step": 197490 }, { "epoch": 1.2617712073393559, "grad_norm": 0.9220422506332397, "learning_rate": 3.004997493200221e-05, "loss": 0.8226, "step": 197500 }, { "epoch": 1.2618350944890944, "grad_norm": 0.9056112170219421, "learning_rate": 3.004537406265e-05, "loss": 0.7789, "step": 197510 }, { "epoch": 1.2618989816388333, "grad_norm": 0.9944071769714355, "learning_rate": 3.0040773394258128e-05, "loss": 0.8665, "step": 197520 }, { "epoch": 1.2619628687885718, "grad_norm": 1.2284094095230103, "learning_rate": 3.0036172926872937e-05, "loss": 1.025, "step": 197530 }, { "epoch": 1.2620267559383105, "grad_norm": 0.7492192387580872, "learning_rate": 3.0031572660540764e-05, "loss": 1.1059, "step": 197540 }, { "epoch": 1.2620906430880492, "grad_norm": 0.6289995312690735, "learning_rate": 3.0026972595307924e-05, "loss": 0.9333, "step": 197550 }, { "epoch": 1.262154530237788, "grad_norm": 0.9772926568984985, "learning_rate": 3.002237273122075e-05, "loss": 0.8707, "step": 197560 }, { "epoch": 1.2622184173875266, "grad_norm": 1.2405903339385986, "learning_rate": 3.0017773068325566e-05, "loss": 0.7973, "step": 197570 }, { "epoch": 1.2622823045372653, "grad_norm": 0.9798763990402222, "learning_rate": 3.00131736066687e-05, "loss": 0.9111, "step": 197580 }, { "epoch": 1.262346191687004, "grad_norm": 0.9784902334213257, "learning_rate": 3.000857434629646e-05, "loss": 0.9779, "step": 197590 }, { "epoch": 1.2624100788367427, "grad_norm": 0.873577892780304, "learning_rate": 3.0003975287255172e-05, "loss": 0.8118, "step": 197600 }, { "epoch": 1.2624739659864814, "grad_norm": 1.508326768875122, "learning_rate": 2.9999376429591154e-05, "loss": 1.0005, "step": 197610 }, { "epoch": 1.2625378531362201, "grad_norm": 0.8667922616004944, "learning_rate": 2.9994777773350713e-05, "loss": 0.8489, "step": 197620 }, { "epoch": 1.2626017402859588, "grad_norm": 1.183371901512146, "learning_rate": 2.9990179318580176e-05, "loss": 0.8954, "step": 197630 }, { "epoch": 1.2626656274356975, "grad_norm": 0.7158766984939575, "learning_rate": 2.998558106532584e-05, "loss": 0.9686, "step": 197640 }, { "epoch": 1.2627295145854363, "grad_norm": 0.7767731547355652, "learning_rate": 2.998098301363401e-05, "loss": 1.0142, "step": 197650 }, { "epoch": 1.262793401735175, "grad_norm": 1.035164475440979, "learning_rate": 2.9976385163551012e-05, "loss": 0.9737, "step": 197660 }, { "epoch": 1.2628572888849137, "grad_norm": 0.8004717230796814, "learning_rate": 2.9971787515123135e-05, "loss": 0.935, "step": 197670 }, { "epoch": 1.2629211760346524, "grad_norm": 1.0571773052215576, "learning_rate": 2.9967190068396677e-05, "loss": 0.9205, "step": 197680 }, { "epoch": 1.262985063184391, "grad_norm": 0.6689577102661133, "learning_rate": 2.9962592823417955e-05, "loss": 0.977, "step": 197690 }, { "epoch": 1.2630489503341298, "grad_norm": 0.8524081110954285, "learning_rate": 2.9957995780233256e-05, "loss": 0.9482, "step": 197700 }, { "epoch": 1.2631128374838685, "grad_norm": 1.0077905654907227, "learning_rate": 2.9953398938888878e-05, "loss": 0.8617, "step": 197710 }, { "epoch": 1.2631767246336072, "grad_norm": 1.403054118156433, "learning_rate": 2.9948802299431113e-05, "loss": 0.8789, "step": 197720 }, { "epoch": 1.263240611783346, "grad_norm": 0.8395767211914062, "learning_rate": 2.994420586190625e-05, "loss": 1.0282, "step": 197730 }, { "epoch": 1.2633044989330846, "grad_norm": 1.5448747873306274, "learning_rate": 2.9939609626360588e-05, "loss": 0.8616, "step": 197740 }, { "epoch": 1.2633683860828233, "grad_norm": 3.1555674076080322, "learning_rate": 2.9935013592840423e-05, "loss": 1.0513, "step": 197750 }, { "epoch": 1.263432273232562, "grad_norm": 1.0118703842163086, "learning_rate": 2.9930417761392015e-05, "loss": 0.9609, "step": 197760 }, { "epoch": 1.2634961603823007, "grad_norm": 2.897057294845581, "learning_rate": 2.9925822132061677e-05, "loss": 1.2673, "step": 197770 }, { "epoch": 1.2635600475320394, "grad_norm": 0.9232097864151001, "learning_rate": 2.9921226704895667e-05, "loss": 0.9313, "step": 197780 }, { "epoch": 1.2636239346817781, "grad_norm": 0.918526291847229, "learning_rate": 2.9916631479940278e-05, "loss": 0.8895, "step": 197790 }, { "epoch": 1.2636878218315168, "grad_norm": 1.1380006074905396, "learning_rate": 2.9912036457241788e-05, "loss": 0.8376, "step": 197800 }, { "epoch": 1.2637517089812556, "grad_norm": 0.881756603717804, "learning_rate": 2.990744163684646e-05, "loss": 1.0668, "step": 197810 }, { "epoch": 1.2638155961309943, "grad_norm": 1.0361684560775757, "learning_rate": 2.9902847018800584e-05, "loss": 1.0976, "step": 197820 }, { "epoch": 1.263879483280733, "grad_norm": 0.8286011219024658, "learning_rate": 2.9898252603150424e-05, "loss": 1.007, "step": 197830 }, { "epoch": 1.2639433704304717, "grad_norm": 1.2168878316879272, "learning_rate": 2.9893658389942252e-05, "loss": 0.677, "step": 197840 }, { "epoch": 1.2640072575802104, "grad_norm": 1.848334789276123, "learning_rate": 2.9889064379222332e-05, "loss": 0.9353, "step": 197850 }, { "epoch": 1.264071144729949, "grad_norm": 0.9026688933372498, "learning_rate": 2.9884470571036937e-05, "loss": 0.9686, "step": 197860 }, { "epoch": 1.2641350318796878, "grad_norm": 0.7856665849685669, "learning_rate": 2.987987696543232e-05, "loss": 0.7682, "step": 197870 }, { "epoch": 1.2641989190294265, "grad_norm": 3.0376346111297607, "learning_rate": 2.9875283562454748e-05, "loss": 0.7571, "step": 197880 }, { "epoch": 1.2642628061791652, "grad_norm": 0.8250278830528259, "learning_rate": 2.987069036215049e-05, "loss": 0.8977, "step": 197890 }, { "epoch": 1.264326693328904, "grad_norm": 0.7953085899353027, "learning_rate": 2.9866097364565783e-05, "loss": 0.7127, "step": 197900 }, { "epoch": 1.2643905804786426, "grad_norm": 0.7985715866088867, "learning_rate": 2.98615045697469e-05, "loss": 0.9748, "step": 197910 }, { "epoch": 1.264454467628381, "grad_norm": 1.1099581718444824, "learning_rate": 2.9856911977740088e-05, "loss": 0.9364, "step": 197920 }, { "epoch": 1.26451835477812, "grad_norm": 0.980712354183197, "learning_rate": 2.9852319588591588e-05, "loss": 0.7622, "step": 197930 }, { "epoch": 1.2645822419278585, "grad_norm": 0.9018016457557678, "learning_rate": 2.984772740234767e-05, "loss": 0.7392, "step": 197940 }, { "epoch": 1.2646461290775974, "grad_norm": 1.323249101638794, "learning_rate": 2.984313541905459e-05, "loss": 0.8726, "step": 197950 }, { "epoch": 1.264710016227336, "grad_norm": 0.9077749848365784, "learning_rate": 2.9838543638758554e-05, "loss": 0.6612, "step": 197960 }, { "epoch": 1.2647739033770748, "grad_norm": 0.6576750874519348, "learning_rate": 2.9833952061505832e-05, "loss": 0.7659, "step": 197970 }, { "epoch": 1.2648377905268133, "grad_norm": 0.9078711867332458, "learning_rate": 2.982936068734265e-05, "loss": 0.9886, "step": 197980 }, { "epoch": 1.2649016776765523, "grad_norm": 1.642651081085205, "learning_rate": 2.982476951631526e-05, "loss": 0.9448, "step": 197990 }, { "epoch": 1.2649655648262907, "grad_norm": 0.8564273118972778, "learning_rate": 2.9820178548469896e-05, "loss": 0.9367, "step": 198000 }, { "epoch": 1.2650294519760297, "grad_norm": 1.8555928468704224, "learning_rate": 2.9815587783852794e-05, "loss": 0.9377, "step": 198010 }, { "epoch": 1.2650933391257682, "grad_norm": 0.5984758138656616, "learning_rate": 2.981099722251018e-05, "loss": 0.7801, "step": 198020 }, { "epoch": 1.2651572262755069, "grad_norm": 0.771589457988739, "learning_rate": 2.980640686448829e-05, "loss": 0.845, "step": 198030 }, { "epoch": 1.2652211134252456, "grad_norm": 0.9007082581520081, "learning_rate": 2.9801816709833353e-05, "loss": 0.8925, "step": 198040 }, { "epoch": 1.2652850005749843, "grad_norm": 1.667065978050232, "learning_rate": 2.979722675859159e-05, "loss": 0.8729, "step": 198050 }, { "epoch": 1.265348887724723, "grad_norm": 1.2028627395629883, "learning_rate": 2.979263701080924e-05, "loss": 1.0106, "step": 198060 }, { "epoch": 1.2654127748744617, "grad_norm": 1.319518804550171, "learning_rate": 2.9788047466532515e-05, "loss": 0.9549, "step": 198070 }, { "epoch": 1.2654766620242004, "grad_norm": 0.5411072373390198, "learning_rate": 2.978345812580764e-05, "loss": 0.6616, "step": 198080 }, { "epoch": 1.265540549173939, "grad_norm": 0.7059124708175659, "learning_rate": 2.977886898868083e-05, "loss": 0.6686, "step": 198090 }, { "epoch": 1.2656044363236778, "grad_norm": 1.225595235824585, "learning_rate": 2.97742800551983e-05, "loss": 0.7636, "step": 198100 }, { "epoch": 1.2656683234734165, "grad_norm": 1.0952637195587158, "learning_rate": 2.9769691325406273e-05, "loss": 1.007, "step": 198110 }, { "epoch": 1.2657322106231552, "grad_norm": 0.895312488079071, "learning_rate": 2.976510279935095e-05, "loss": 0.8403, "step": 198120 }, { "epoch": 1.265796097772894, "grad_norm": 0.6350535154342651, "learning_rate": 2.9760514477078554e-05, "loss": 0.7502, "step": 198130 }, { "epoch": 1.2658599849226326, "grad_norm": 0.737232506275177, "learning_rate": 2.975592635863529e-05, "loss": 0.839, "step": 198140 }, { "epoch": 1.2659238720723713, "grad_norm": 0.6077031493186951, "learning_rate": 2.975133844406735e-05, "loss": 0.7285, "step": 198150 }, { "epoch": 1.26598775922211, "grad_norm": 0.9517359137535095, "learning_rate": 2.9746750733420958e-05, "loss": 0.7235, "step": 198160 }, { "epoch": 1.2660516463718487, "grad_norm": 0.7495896220207214, "learning_rate": 2.9742163226742304e-05, "loss": 0.8919, "step": 198170 }, { "epoch": 1.2661155335215875, "grad_norm": 2.226393699645996, "learning_rate": 2.9737575924077593e-05, "loss": 0.9916, "step": 198180 }, { "epoch": 1.2661794206713262, "grad_norm": 0.949053168296814, "learning_rate": 2.973298882547302e-05, "loss": 0.6971, "step": 198190 }, { "epoch": 1.2662433078210649, "grad_norm": 1.5963283777236938, "learning_rate": 2.972840193097478e-05, "loss": 0.8806, "step": 198200 }, { "epoch": 1.2663071949708036, "grad_norm": 1.0817798376083374, "learning_rate": 2.9723815240629083e-05, "loss": 1.0479, "step": 198210 }, { "epoch": 1.2663710821205423, "grad_norm": 0.6202261447906494, "learning_rate": 2.9719228754482097e-05, "loss": 0.6736, "step": 198220 }, { "epoch": 1.266434969270281, "grad_norm": 1.1600061655044556, "learning_rate": 2.9714642472580024e-05, "loss": 0.9442, "step": 198230 }, { "epoch": 1.2664988564200197, "grad_norm": 1.1812655925750732, "learning_rate": 2.9710056394969056e-05, "loss": 0.8706, "step": 198240 }, { "epoch": 1.2665627435697584, "grad_norm": 1.3576058149337769, "learning_rate": 2.9705470521695368e-05, "loss": 0.9426, "step": 198250 }, { "epoch": 1.266626630719497, "grad_norm": 1.4901412725448608, "learning_rate": 2.9700884852805133e-05, "loss": 0.8986, "step": 198260 }, { "epoch": 1.2666905178692358, "grad_norm": 0.6664626002311707, "learning_rate": 2.9696299388344572e-05, "loss": 1.1324, "step": 198270 }, { "epoch": 1.2667544050189745, "grad_norm": 0.7542868256568909, "learning_rate": 2.969171412835983e-05, "loss": 0.9587, "step": 198280 }, { "epoch": 1.2668182921687132, "grad_norm": 1.228543996810913, "learning_rate": 2.96871290728971e-05, "loss": 0.9034, "step": 198290 }, { "epoch": 1.266882179318452, "grad_norm": 0.6784017086029053, "learning_rate": 2.968254422200256e-05, "loss": 0.6943, "step": 198300 }, { "epoch": 1.2669460664681906, "grad_norm": 0.9931260347366333, "learning_rate": 2.967795957572237e-05, "loss": 0.9826, "step": 198310 }, { "epoch": 1.2670099536179293, "grad_norm": 1.9696242809295654, "learning_rate": 2.967337513410271e-05, "loss": 0.8181, "step": 198320 }, { "epoch": 1.267073840767668, "grad_norm": 2.127666473388672, "learning_rate": 2.9668790897189748e-05, "loss": 0.7966, "step": 198330 }, { "epoch": 1.2671377279174068, "grad_norm": 0.9904428720474243, "learning_rate": 2.9664206865029652e-05, "loss": 0.8374, "step": 198340 }, { "epoch": 1.2672016150671455, "grad_norm": 0.7887634038925171, "learning_rate": 2.965962303766858e-05, "loss": 0.8897, "step": 198350 }, { "epoch": 1.2672655022168842, "grad_norm": 1.2394057512283325, "learning_rate": 2.9655039415152708e-05, "loss": 1.0418, "step": 198360 }, { "epoch": 1.2673293893666229, "grad_norm": 1.0656827688217163, "learning_rate": 2.965045599752818e-05, "loss": 0.764, "step": 198370 }, { "epoch": 1.2673932765163616, "grad_norm": 0.8545356392860413, "learning_rate": 2.9645872784841176e-05, "loss": 0.7794, "step": 198380 }, { "epoch": 1.2674571636661003, "grad_norm": 0.9308279752731323, "learning_rate": 2.964128977713784e-05, "loss": 0.8551, "step": 198390 }, { "epoch": 1.267521050815839, "grad_norm": 1.182393193244934, "learning_rate": 2.9636706974464324e-05, "loss": 0.7615, "step": 198400 }, { "epoch": 1.2675849379655775, "grad_norm": 0.7643315196037292, "learning_rate": 2.9632124376866787e-05, "loss": 0.8983, "step": 198410 }, { "epoch": 1.2676488251153164, "grad_norm": 0.9876617789268494, "learning_rate": 2.9627541984391377e-05, "loss": 0.9168, "step": 198420 }, { "epoch": 1.2677127122650549, "grad_norm": 1.2725698947906494, "learning_rate": 2.962295979708426e-05, "loss": 0.8674, "step": 198430 }, { "epoch": 1.2677765994147938, "grad_norm": 0.755490243434906, "learning_rate": 2.961837781499155e-05, "loss": 0.8002, "step": 198440 }, { "epoch": 1.2678404865645323, "grad_norm": 0.9564643502235413, "learning_rate": 2.9613796038159407e-05, "loss": 0.7711, "step": 198450 }, { "epoch": 1.2679043737142712, "grad_norm": 0.7563437223434448, "learning_rate": 2.9609214466633984e-05, "loss": 0.8209, "step": 198460 }, { "epoch": 1.2679682608640097, "grad_norm": 1.0872384309768677, "learning_rate": 2.96046331004614e-05, "loss": 0.9514, "step": 198470 }, { "epoch": 1.2680321480137486, "grad_norm": 0.8535314798355103, "learning_rate": 2.9600051939687812e-05, "loss": 0.914, "step": 198480 }, { "epoch": 1.2680960351634871, "grad_norm": 1.0216346979141235, "learning_rate": 2.9595470984359352e-05, "loss": 0.9734, "step": 198490 }, { "epoch": 1.268159922313226, "grad_norm": 1.756752610206604, "learning_rate": 2.9590890234522147e-05, "loss": 0.9822, "step": 198500 }, { "epoch": 1.2682238094629645, "grad_norm": 1.0920852422714233, "learning_rate": 2.9586309690222337e-05, "loss": 1.1895, "step": 198510 }, { "epoch": 1.2682876966127032, "grad_norm": 0.9064481854438782, "learning_rate": 2.9581729351506036e-05, "loss": 0.8215, "step": 198520 }, { "epoch": 1.268351583762442, "grad_norm": 0.8382233381271362, "learning_rate": 2.9577149218419398e-05, "loss": 0.9083, "step": 198530 }, { "epoch": 1.2684154709121807, "grad_norm": 1.0321544408798218, "learning_rate": 2.9572569291008534e-05, "loss": 1.1281, "step": 198540 }, { "epoch": 1.2684793580619194, "grad_norm": 0.7525801658630371, "learning_rate": 2.956798956931957e-05, "loss": 1.0749, "step": 198550 }, { "epoch": 1.268543245211658, "grad_norm": 0.8359514474868774, "learning_rate": 2.956341005339862e-05, "loss": 0.9472, "step": 198560 }, { "epoch": 1.2686071323613968, "grad_norm": 0.6227117776870728, "learning_rate": 2.9558830743291822e-05, "loss": 0.8116, "step": 198570 }, { "epoch": 1.2686710195111355, "grad_norm": 1.3645145893096924, "learning_rate": 2.9554251639045266e-05, "loss": 0.9916, "step": 198580 }, { "epoch": 1.2687349066608742, "grad_norm": 0.8045088052749634, "learning_rate": 2.954967274070509e-05, "loss": 0.7611, "step": 198590 }, { "epoch": 1.268798793810613, "grad_norm": 0.8705744743347168, "learning_rate": 2.9545094048317412e-05, "loss": 0.9175, "step": 198600 }, { "epoch": 1.2688626809603516, "grad_norm": 0.8257783055305481, "learning_rate": 2.954051556192833e-05, "loss": 0.7481, "step": 198610 }, { "epoch": 1.2689265681100903, "grad_norm": 1.0180158615112305, "learning_rate": 2.9535937281583947e-05, "loss": 0.9748, "step": 198620 }, { "epoch": 1.268990455259829, "grad_norm": 0.809568464756012, "learning_rate": 2.953135920733039e-05, "loss": 0.8703, "step": 198630 }, { "epoch": 1.2690543424095677, "grad_norm": 1.0721912384033203, "learning_rate": 2.952678133921375e-05, "loss": 0.8546, "step": 198640 }, { "epoch": 1.2691182295593064, "grad_norm": 1.1542046070098877, "learning_rate": 2.9522203677280136e-05, "loss": 0.7243, "step": 198650 }, { "epoch": 1.2691821167090451, "grad_norm": 0.7112246751785278, "learning_rate": 2.9517626221575645e-05, "loss": 1.0079, "step": 198660 }, { "epoch": 1.2692460038587838, "grad_norm": 0.9132784008979797, "learning_rate": 2.9513048972146373e-05, "loss": 0.8804, "step": 198670 }, { "epoch": 1.2693098910085225, "grad_norm": 0.778862476348877, "learning_rate": 2.950847192903843e-05, "loss": 0.9576, "step": 198680 }, { "epoch": 1.2693737781582612, "grad_norm": 0.9121737480163574, "learning_rate": 2.9503895092297894e-05, "loss": 0.9902, "step": 198690 }, { "epoch": 1.269437665308, "grad_norm": 0.9337990880012512, "learning_rate": 2.949931846197087e-05, "loss": 0.9189, "step": 198700 }, { "epoch": 1.2695015524577387, "grad_norm": 1.054032325744629, "learning_rate": 2.9494742038103444e-05, "loss": 0.8774, "step": 198710 }, { "epoch": 1.2695654396074774, "grad_norm": 0.9600640535354614, "learning_rate": 2.949016582074171e-05, "loss": 0.6799, "step": 198720 }, { "epoch": 1.269629326757216, "grad_norm": 2.320675849914551, "learning_rate": 2.9485589809931746e-05, "loss": 0.714, "step": 198730 }, { "epoch": 1.2696932139069548, "grad_norm": 0.8881000876426697, "learning_rate": 2.9481014005719644e-05, "loss": 0.9684, "step": 198740 }, { "epoch": 1.2697571010566935, "grad_norm": Infinity, "learning_rate": 2.9476895958607998e-05, "loss": 0.995, "step": 198750 }, { "epoch": 1.2698209882064322, "grad_norm": 0.66578608751297, "learning_rate": 2.9472320547058774e-05, "loss": 0.9148, "step": 198760 }, { "epoch": 1.269884875356171, "grad_norm": 0.7851095795631409, "learning_rate": 2.946774534224105e-05, "loss": 1.02, "step": 198770 }, { "epoch": 1.2699487625059096, "grad_norm": 0.8484801650047302, "learning_rate": 2.9463170344200885e-05, "loss": 0.8683, "step": 198780 }, { "epoch": 1.2700126496556483, "grad_norm": 1.1238106489181519, "learning_rate": 2.9458595552984368e-05, "loss": 0.7311, "step": 198790 }, { "epoch": 1.270076536805387, "grad_norm": 0.7110521197319031, "learning_rate": 2.945402096863756e-05, "loss": 0.6694, "step": 198800 }, { "epoch": 1.2701404239551257, "grad_norm": 1.1236708164215088, "learning_rate": 2.9449446591206536e-05, "loss": 0.7267, "step": 198810 }, { "epoch": 1.2702043111048644, "grad_norm": 1.026677131652832, "learning_rate": 2.9444872420737362e-05, "loss": 0.9999, "step": 198820 }, { "epoch": 1.2702681982546031, "grad_norm": 1.0339289903640747, "learning_rate": 2.9440298457276105e-05, "loss": 0.7496, "step": 198830 }, { "epoch": 1.2703320854043418, "grad_norm": 0.7419828176498413, "learning_rate": 2.943572470086884e-05, "loss": 1.0398, "step": 198840 }, { "epoch": 1.2703959725540805, "grad_norm": 1.2208056449890137, "learning_rate": 2.9431151151561607e-05, "loss": 0.6542, "step": 198850 }, { "epoch": 1.2704598597038192, "grad_norm": 1.2347742319107056, "learning_rate": 2.942657780940048e-05, "loss": 0.9713, "step": 198860 }, { "epoch": 1.270523746853558, "grad_norm": 0.9808398485183716, "learning_rate": 2.9422004674431514e-05, "loss": 0.8772, "step": 198870 }, { "epoch": 1.2705876340032964, "grad_norm": 0.9095727801322937, "learning_rate": 2.941743174670076e-05, "loss": 1.0907, "step": 198880 }, { "epoch": 1.2706515211530354, "grad_norm": 0.946310818195343, "learning_rate": 2.9412859026254276e-05, "loss": 0.7978, "step": 198890 }, { "epoch": 1.2707154083027739, "grad_norm": 1.1127965450286865, "learning_rate": 2.9408286513138102e-05, "loss": 1.0265, "step": 198900 }, { "epoch": 1.2707792954525128, "grad_norm": 0.7210429310798645, "learning_rate": 2.9403714207398303e-05, "loss": 0.8068, "step": 198910 }, { "epoch": 1.2708431826022513, "grad_norm": 1.0879623889923096, "learning_rate": 2.939914210908093e-05, "loss": 0.852, "step": 198920 }, { "epoch": 1.2709070697519902, "grad_norm": 1.1112303733825684, "learning_rate": 2.939457021823201e-05, "loss": 1.1495, "step": 198930 }, { "epoch": 1.2709709569017287, "grad_norm": 0.6538977026939392, "learning_rate": 2.93899985348976e-05, "loss": 0.9615, "step": 198940 }, { "epoch": 1.2710348440514676, "grad_norm": 1.4446771144866943, "learning_rate": 2.9385427059123732e-05, "loss": 1.1292, "step": 198950 }, { "epoch": 1.271098731201206, "grad_norm": 1.2213329076766968, "learning_rate": 2.9380855790956448e-05, "loss": 0.9452, "step": 198960 }, { "epoch": 1.271162618350945, "grad_norm": 0.6651431322097778, "learning_rate": 2.9376284730441784e-05, "loss": 0.957, "step": 198970 }, { "epoch": 1.2712265055006835, "grad_norm": 1.4860223531723022, "learning_rate": 2.9371713877625772e-05, "loss": 0.8377, "step": 198980 }, { "epoch": 1.2712903926504224, "grad_norm": 0.955312192440033, "learning_rate": 2.936714323255445e-05, "loss": 0.9017, "step": 198990 }, { "epoch": 1.271354279800161, "grad_norm": 0.6875607967376709, "learning_rate": 2.9362572795273846e-05, "loss": 0.6367, "step": 199000 }, { "epoch": 1.2714181669498996, "grad_norm": 0.8261491060256958, "learning_rate": 2.935800256582999e-05, "loss": 0.8404, "step": 199010 }, { "epoch": 1.2714820540996383, "grad_norm": 1.1034409999847412, "learning_rate": 2.93534325442689e-05, "loss": 0.7114, "step": 199020 }, { "epoch": 1.271545941249377, "grad_norm": 0.9923743605613708, "learning_rate": 2.9348862730636616e-05, "loss": 0.9492, "step": 199030 }, { "epoch": 1.2716098283991157, "grad_norm": 1.0676493644714355, "learning_rate": 2.934429312497914e-05, "loss": 0.8737, "step": 199040 }, { "epoch": 1.2716737155488544, "grad_norm": 1.1333630084991455, "learning_rate": 2.9339723727342505e-05, "loss": 1.0908, "step": 199050 }, { "epoch": 1.2717376026985932, "grad_norm": 3.124783515930176, "learning_rate": 2.933515453777273e-05, "loss": 0.8235, "step": 199060 }, { "epoch": 1.2718014898483319, "grad_norm": 1.512219786643982, "learning_rate": 2.9330585556315833e-05, "loss": 0.8718, "step": 199070 }, { "epoch": 1.2718653769980706, "grad_norm": 0.9839432239532471, "learning_rate": 2.9326016783017806e-05, "loss": 0.9304, "step": 199080 }, { "epoch": 1.2719292641478093, "grad_norm": 0.8138784766197205, "learning_rate": 2.9321448217924686e-05, "loss": 0.884, "step": 199090 }, { "epoch": 1.271993151297548, "grad_norm": 0.846782386302948, "learning_rate": 2.931687986108247e-05, "loss": 0.9937, "step": 199100 }, { "epoch": 1.2720570384472867, "grad_norm": 1.5242908000946045, "learning_rate": 2.931231171253716e-05, "loss": 0.8606, "step": 199110 }, { "epoch": 1.2721209255970254, "grad_norm": 0.8409215211868286, "learning_rate": 2.9307743772334773e-05, "loss": 1.0581, "step": 199120 }, { "epoch": 1.272184812746764, "grad_norm": 0.7096886038780212, "learning_rate": 2.9303176040521306e-05, "loss": 0.6941, "step": 199130 }, { "epoch": 1.2722486998965028, "grad_norm": 0.7271231412887573, "learning_rate": 2.9298608517142762e-05, "loss": 0.9551, "step": 199140 }, { "epoch": 1.2723125870462415, "grad_norm": 0.9017435312271118, "learning_rate": 2.929404120224514e-05, "loss": 1.4412, "step": 199150 }, { "epoch": 1.2723764741959802, "grad_norm": 2.8430840969085693, "learning_rate": 2.9289474095874436e-05, "loss": 0.8965, "step": 199160 }, { "epoch": 1.272440361345719, "grad_norm": 0.8325692415237427, "learning_rate": 2.9284907198076643e-05, "loss": 0.7069, "step": 199170 }, { "epoch": 1.2725042484954576, "grad_norm": 0.8542130589485168, "learning_rate": 2.9280340508897765e-05, "loss": 0.8619, "step": 199180 }, { "epoch": 1.2725681356451963, "grad_norm": 0.831527829170227, "learning_rate": 2.9275774028383773e-05, "loss": 0.8887, "step": 199190 }, { "epoch": 1.272632022794935, "grad_norm": 1.026160478591919, "learning_rate": 2.9271207756580665e-05, "loss": 0.8894, "step": 199200 }, { "epoch": 1.2726959099446737, "grad_norm": 0.9291976094245911, "learning_rate": 2.9266641693534437e-05, "loss": 0.9255, "step": 199210 }, { "epoch": 1.2727597970944124, "grad_norm": 0.7681089043617249, "learning_rate": 2.9262075839291046e-05, "loss": 0.7491, "step": 199220 }, { "epoch": 1.2728236842441512, "grad_norm": 1.4214307069778442, "learning_rate": 2.9257510193896504e-05, "loss": 0.8776, "step": 199230 }, { "epoch": 1.2728875713938899, "grad_norm": 1.1611994504928589, "learning_rate": 2.9252944757396776e-05, "loss": 0.8153, "step": 199240 }, { "epoch": 1.2729514585436286, "grad_norm": 1.6443861722946167, "learning_rate": 2.924837952983785e-05, "loss": 0.7736, "step": 199250 }, { "epoch": 1.2730153456933673, "grad_norm": 0.7098362445831299, "learning_rate": 2.9243814511265686e-05, "loss": 0.8077, "step": 199260 }, { "epoch": 1.273079232843106, "grad_norm": 0.9944263696670532, "learning_rate": 2.923924970172628e-05, "loss": 0.8855, "step": 199270 }, { "epoch": 1.2731431199928447, "grad_norm": 0.6278926134109497, "learning_rate": 2.923468510126558e-05, "loss": 0.6761, "step": 199280 }, { "epoch": 1.2732070071425834, "grad_norm": 1.4945859909057617, "learning_rate": 2.9230120709929567e-05, "loss": 1.3176, "step": 199290 }, { "epoch": 1.273270894292322, "grad_norm": 0.8660402894020081, "learning_rate": 2.922555652776421e-05, "loss": 0.8399, "step": 199300 }, { "epoch": 1.2733347814420608, "grad_norm": 0.6661750674247742, "learning_rate": 2.922099255481547e-05, "loss": 0.8871, "step": 199310 }, { "epoch": 1.2733986685917995, "grad_norm": 1.4136496782302856, "learning_rate": 2.921642879112931e-05, "loss": 0.6499, "step": 199320 }, { "epoch": 1.2734625557415382, "grad_norm": 2.2149500846862793, "learning_rate": 2.921186523675169e-05, "loss": 0.7559, "step": 199330 }, { "epoch": 1.273526442891277, "grad_norm": 0.7235051989555359, "learning_rate": 2.920730189172858e-05, "loss": 0.8331, "step": 199340 }, { "epoch": 1.2735903300410156, "grad_norm": 0.7026883363723755, "learning_rate": 2.920273875610592e-05, "loss": 0.7484, "step": 199350 }, { "epoch": 1.2736542171907543, "grad_norm": 1.0162301063537598, "learning_rate": 2.9198175829929674e-05, "loss": 0.9959, "step": 199360 }, { "epoch": 1.2737181043404928, "grad_norm": 0.7359153628349304, "learning_rate": 2.9193613113245794e-05, "loss": 0.8109, "step": 199370 }, { "epoch": 1.2737819914902317, "grad_norm": 0.8668492436408997, "learning_rate": 2.918905060610022e-05, "loss": 1.1629, "step": 199380 }, { "epoch": 1.2738458786399702, "grad_norm": 1.0416288375854492, "learning_rate": 2.9184488308538933e-05, "loss": 1.0551, "step": 199390 }, { "epoch": 1.2739097657897092, "grad_norm": 1.0758754014968872, "learning_rate": 2.9179926220607833e-05, "loss": 0.8616, "step": 199400 }, { "epoch": 1.2739736529394476, "grad_norm": 1.3386660814285278, "learning_rate": 2.9175364342352906e-05, "loss": 0.8793, "step": 199410 }, { "epoch": 1.2740375400891866, "grad_norm": 0.882012665271759, "learning_rate": 2.9170802673820064e-05, "loss": 0.808, "step": 199420 }, { "epoch": 1.274101427238925, "grad_norm": 0.8384029865264893, "learning_rate": 2.916624121505528e-05, "loss": 1.3155, "step": 199430 }, { "epoch": 1.274165314388664, "grad_norm": 1.40079665184021, "learning_rate": 2.916167996610444e-05, "loss": 0.7067, "step": 199440 }, { "epoch": 1.2742292015384025, "grad_norm": 0.5675820112228394, "learning_rate": 2.9157118927013537e-05, "loss": 0.6397, "step": 199450 }, { "epoch": 1.2742930886881414, "grad_norm": 1.4623277187347412, "learning_rate": 2.9152558097828454e-05, "loss": 0.7759, "step": 199460 }, { "epoch": 1.2743569758378799, "grad_norm": 0.9035980701446533, "learning_rate": 2.914799747859517e-05, "loss": 0.9244, "step": 199470 }, { "epoch": 1.2744208629876188, "grad_norm": 1.0479638576507568, "learning_rate": 2.9143437069359568e-05, "loss": 0.6907, "step": 199480 }, { "epoch": 1.2744847501373573, "grad_norm": 1.0720633268356323, "learning_rate": 2.9138876870167624e-05, "loss": 1.023, "step": 199490 }, { "epoch": 1.274548637287096, "grad_norm": 1.4346104860305786, "learning_rate": 2.9134316881065217e-05, "loss": 0.9242, "step": 199500 }, { "epoch": 1.2746125244368347, "grad_norm": 0.823266863822937, "learning_rate": 2.9129757102098305e-05, "loss": 0.7421, "step": 199510 }, { "epoch": 1.2746764115865734, "grad_norm": 1.276611566543579, "learning_rate": 2.9125197533312776e-05, "loss": 1.0202, "step": 199520 }, { "epoch": 1.2747402987363121, "grad_norm": 1.1702380180358887, "learning_rate": 2.9120638174754567e-05, "loss": 1.0277, "step": 199530 }, { "epoch": 1.2748041858860508, "grad_norm": 1.1897376775741577, "learning_rate": 2.9116079026469617e-05, "loss": 1.0578, "step": 199540 }, { "epoch": 1.2748680730357895, "grad_norm": 0.6272988319396973, "learning_rate": 2.911152008850382e-05, "loss": 1.0736, "step": 199550 }, { "epoch": 1.2749319601855282, "grad_norm": 1.393921136856079, "learning_rate": 2.9106961360903084e-05, "loss": 0.923, "step": 199560 }, { "epoch": 1.274995847335267, "grad_norm": 0.8716452121734619, "learning_rate": 2.91024028437133e-05, "loss": 0.6029, "step": 199570 }, { "epoch": 1.2750597344850056, "grad_norm": 2.0013890266418457, "learning_rate": 2.9097844536980425e-05, "loss": 0.7438, "step": 199580 }, { "epoch": 1.2751236216347444, "grad_norm": 0.7331926822662354, "learning_rate": 2.909328644075031e-05, "loss": 0.9836, "step": 199590 }, { "epoch": 1.275187508784483, "grad_norm": 0.9569740891456604, "learning_rate": 2.908872855506891e-05, "loss": 1.0353, "step": 199600 }, { "epoch": 1.2752513959342218, "grad_norm": 0.787571907043457, "learning_rate": 2.9084170879982088e-05, "loss": 0.8915, "step": 199610 }, { "epoch": 1.2753152830839605, "grad_norm": 1.9850256443023682, "learning_rate": 2.9079613415535777e-05, "loss": 0.9739, "step": 199620 }, { "epoch": 1.2753791702336992, "grad_norm": 1.550048828125, "learning_rate": 2.9075056161775837e-05, "loss": 0.9745, "step": 199630 }, { "epoch": 1.2754430573834379, "grad_norm": 0.9888909459114075, "learning_rate": 2.9070499118748208e-05, "loss": 0.5989, "step": 199640 }, { "epoch": 1.2755069445331766, "grad_norm": 1.1229398250579834, "learning_rate": 2.906594228649873e-05, "loss": 1.1172, "step": 199650 }, { "epoch": 1.2755708316829153, "grad_norm": 0.9776995182037354, "learning_rate": 2.906138566507333e-05, "loss": 0.9073, "step": 199660 }, { "epoch": 1.275634718832654, "grad_norm": 1.1696735620498657, "learning_rate": 2.9056829254517916e-05, "loss": 0.723, "step": 199670 }, { "epoch": 1.2756986059823927, "grad_norm": 1.5058003664016724, "learning_rate": 2.9052273054878322e-05, "loss": 1.1077, "step": 199680 }, { "epoch": 1.2757624931321314, "grad_norm": 1.1655230522155762, "learning_rate": 2.9047717066200486e-05, "loss": 1.0817, "step": 199690 }, { "epoch": 1.2758263802818701, "grad_norm": 0.8833178281784058, "learning_rate": 2.904316128853024e-05, "loss": 0.6168, "step": 199700 }, { "epoch": 1.2758902674316088, "grad_norm": 0.8839700222015381, "learning_rate": 2.9038605721913513e-05, "loss": 0.7247, "step": 199710 }, { "epoch": 1.2759541545813475, "grad_norm": 1.2364578247070312, "learning_rate": 2.9034050366396143e-05, "loss": 0.9347, "step": 199720 }, { "epoch": 1.2760180417310862, "grad_norm": 0.8872677683830261, "learning_rate": 2.902949522202404e-05, "loss": 0.786, "step": 199730 }, { "epoch": 1.276081928880825, "grad_norm": 0.7343273758888245, "learning_rate": 2.902494028884305e-05, "loss": 1.0002, "step": 199740 }, { "epoch": 1.2761458160305637, "grad_norm": 1.0674399137496948, "learning_rate": 2.9020385566899067e-05, "loss": 0.7747, "step": 199750 }, { "epoch": 1.2762097031803024, "grad_norm": 1.0332404375076294, "learning_rate": 2.9015831056237935e-05, "loss": 0.82, "step": 199760 }, { "epoch": 1.276273590330041, "grad_norm": 1.2516356706619263, "learning_rate": 2.9011276756905557e-05, "loss": 1.0702, "step": 199770 }, { "epoch": 1.2763374774797798, "grad_norm": 0.9094335436820984, "learning_rate": 2.900672266894776e-05, "loss": 1.0714, "step": 199780 }, { "epoch": 1.2764013646295185, "grad_norm": 0.7820824980735779, "learning_rate": 2.9002168792410456e-05, "loss": 0.9105, "step": 199790 }, { "epoch": 1.2764652517792572, "grad_norm": 0.857791006565094, "learning_rate": 2.899761512733945e-05, "loss": 0.7399, "step": 199800 }, { "epoch": 1.2765291389289959, "grad_norm": 0.8450620770454407, "learning_rate": 2.8993061673780654e-05, "loss": 0.6665, "step": 199810 }, { "epoch": 1.2765930260787346, "grad_norm": 0.7609385251998901, "learning_rate": 2.898850843177987e-05, "loss": 0.8598, "step": 199820 }, { "epoch": 1.2766569132284733, "grad_norm": 4.130343914031982, "learning_rate": 2.898395540138301e-05, "loss": 0.8879, "step": 199830 }, { "epoch": 1.276720800378212, "grad_norm": 1.2083723545074463, "learning_rate": 2.8979402582635883e-05, "loss": 0.747, "step": 199840 }, { "epoch": 1.2767846875279507, "grad_norm": 1.0473322868347168, "learning_rate": 2.8974849975584356e-05, "loss": 0.8937, "step": 199850 }, { "epoch": 1.2768485746776892, "grad_norm": 2.8024537563323975, "learning_rate": 2.8970297580274298e-05, "loss": 0.9525, "step": 199860 }, { "epoch": 1.2769124618274281, "grad_norm": 0.6386640071868896, "learning_rate": 2.896574539675152e-05, "loss": 0.6814, "step": 199870 }, { "epoch": 1.2769763489771666, "grad_norm": 0.840392529964447, "learning_rate": 2.8961193425061893e-05, "loss": 0.8324, "step": 199880 }, { "epoch": 1.2770402361269055, "grad_norm": 1.0705910921096802, "learning_rate": 2.895664166525124e-05, "loss": 0.9371, "step": 199890 }, { "epoch": 1.277104123276644, "grad_norm": 1.714959979057312, "learning_rate": 2.8952090117365427e-05, "loss": 1.107, "step": 199900 }, { "epoch": 1.277168010426383, "grad_norm": 1.1649478673934937, "learning_rate": 2.8947538781450257e-05, "loss": 0.9453, "step": 199910 }, { "epoch": 1.2772318975761214, "grad_norm": 2.98532772064209, "learning_rate": 2.89429876575516e-05, "loss": 0.9335, "step": 199920 }, { "epoch": 1.2772957847258604, "grad_norm": 1.2889913320541382, "learning_rate": 2.893843674571526e-05, "loss": 0.8936, "step": 199930 }, { "epoch": 1.2773596718755988, "grad_norm": 0.9181677103042603, "learning_rate": 2.8933886045987102e-05, "loss": 0.76, "step": 199940 }, { "epoch": 1.2774235590253378, "grad_norm": 1.176107406616211, "learning_rate": 2.8929335558412918e-05, "loss": 0.8422, "step": 199950 }, { "epoch": 1.2774874461750763, "grad_norm": 1.2638664245605469, "learning_rate": 2.892478528303857e-05, "loss": 0.8383, "step": 199960 }, { "epoch": 1.277551333324815, "grad_norm": 1.9292041063308716, "learning_rate": 2.8920235219909842e-05, "loss": 1.1438, "step": 199970 }, { "epoch": 1.2776152204745537, "grad_norm": 0.9688105583190918, "learning_rate": 2.8915685369072608e-05, "loss": 1.0188, "step": 199980 }, { "epoch": 1.2776791076242924, "grad_norm": 0.9021857380867004, "learning_rate": 2.8911135730572643e-05, "loss": 0.698, "step": 199990 }, { "epoch": 1.277742994774031, "grad_norm": 1.147022008895874, "learning_rate": 2.890658630445581e-05, "loss": 0.8454, "step": 200000 }, { "epoch": 1.2778068819237698, "grad_norm": 0.6684355139732361, "learning_rate": 2.890203709076787e-05, "loss": 0.8473, "step": 200010 }, { "epoch": 1.2778707690735085, "grad_norm": 4.026229381561279, "learning_rate": 2.8897488089554692e-05, "loss": 0.7693, "step": 200020 }, { "epoch": 1.2779346562232472, "grad_norm": 1.4114011526107788, "learning_rate": 2.889293930086205e-05, "loss": 1.0826, "step": 200030 }, { "epoch": 1.277998543372986, "grad_norm": 1.0624281167984009, "learning_rate": 2.8888390724735788e-05, "loss": 0.7916, "step": 200040 }, { "epoch": 1.2780624305227246, "grad_norm": 0.7880474328994751, "learning_rate": 2.888384236122169e-05, "loss": 0.9133, "step": 200050 }, { "epoch": 1.2781263176724633, "grad_norm": 1.1904906034469604, "learning_rate": 2.887929421036556e-05, "loss": 0.9532, "step": 200060 }, { "epoch": 1.278190204822202, "grad_norm": 0.9358816146850586, "learning_rate": 2.8874746272213217e-05, "loss": 1.1367, "step": 200070 }, { "epoch": 1.2782540919719407, "grad_norm": 0.8555280566215515, "learning_rate": 2.887019854681044e-05, "loss": 0.7676, "step": 200080 }, { "epoch": 1.2783179791216794, "grad_norm": 0.6473132371902466, "learning_rate": 2.8865651034203068e-05, "loss": 0.9187, "step": 200090 }, { "epoch": 1.2783818662714181, "grad_norm": 5.737834453582764, "learning_rate": 2.8861103734436846e-05, "loss": 0.7291, "step": 200100 }, { "epoch": 1.2784457534211568, "grad_norm": 0.8243213891983032, "learning_rate": 2.885655664755762e-05, "loss": 0.8604, "step": 200110 }, { "epoch": 1.2785096405708956, "grad_norm": 1.6044676303863525, "learning_rate": 2.8852009773611137e-05, "loss": 0.7958, "step": 200120 }, { "epoch": 1.2785735277206343, "grad_norm": 0.990822434425354, "learning_rate": 2.8847463112643236e-05, "loss": 1.2024, "step": 200130 }, { "epoch": 1.278637414870373, "grad_norm": 0.9312714338302612, "learning_rate": 2.884291666469966e-05, "loss": 0.8222, "step": 200140 }, { "epoch": 1.2787013020201117, "grad_norm": 0.9203009605407715, "learning_rate": 2.8838370429826235e-05, "loss": 0.8486, "step": 200150 }, { "epoch": 1.2787651891698504, "grad_norm": 0.8465953469276428, "learning_rate": 2.883382440806871e-05, "loss": 0.7348, "step": 200160 }, { "epoch": 1.278829076319589, "grad_norm": 2.1238362789154053, "learning_rate": 2.8829278599472903e-05, "loss": 0.9623, "step": 200170 }, { "epoch": 1.2788929634693278, "grad_norm": 0.8500341773033142, "learning_rate": 2.8824733004084558e-05, "loss": 0.8904, "step": 200180 }, { "epoch": 1.2789568506190665, "grad_norm": 0.94657963514328, "learning_rate": 2.882018762194947e-05, "loss": 0.7947, "step": 200190 }, { "epoch": 1.2790207377688052, "grad_norm": 0.8682937622070312, "learning_rate": 2.8815642453113435e-05, "loss": 0.9038, "step": 200200 }, { "epoch": 1.279084624918544, "grad_norm": 0.8084182739257812, "learning_rate": 2.8811097497622185e-05, "loss": 0.9295, "step": 200210 }, { "epoch": 1.2791485120682826, "grad_norm": 1.071580171585083, "learning_rate": 2.8806552755521532e-05, "loss": 0.8428, "step": 200220 }, { "epoch": 1.2792123992180213, "grad_norm": 1.3232358694076538, "learning_rate": 2.8802008226857214e-05, "loss": 1.0202, "step": 200230 }, { "epoch": 1.27927628636776, "grad_norm": 1.0317996740341187, "learning_rate": 2.8797463911675028e-05, "loss": 1.0649, "step": 200240 }, { "epoch": 1.2793401735174987, "grad_norm": 1.255834937095642, "learning_rate": 2.8792919810020706e-05, "loss": 0.7789, "step": 200250 }, { "epoch": 1.2794040606672374, "grad_norm": 1.0513917207717896, "learning_rate": 2.8788375921940047e-05, "loss": 0.9803, "step": 200260 }, { "epoch": 1.2794679478169761, "grad_norm": 0.9030227065086365, "learning_rate": 2.8783832247478776e-05, "loss": 0.9298, "step": 200270 }, { "epoch": 1.2795318349667149, "grad_norm": 0.9306374788284302, "learning_rate": 2.8779288786682685e-05, "loss": 0.8644, "step": 200280 }, { "epoch": 1.2795957221164536, "grad_norm": 1.2579879760742188, "learning_rate": 2.8774745539597498e-05, "loss": 0.7549, "step": 200290 }, { "epoch": 1.2796596092661923, "grad_norm": 1.0946049690246582, "learning_rate": 2.8770202506269007e-05, "loss": 1.0356, "step": 200300 }, { "epoch": 1.279723496415931, "grad_norm": 0.580696702003479, "learning_rate": 2.876565968674292e-05, "loss": 0.9136, "step": 200310 }, { "epoch": 1.2797873835656697, "grad_norm": 0.6239465475082397, "learning_rate": 2.8761117081065025e-05, "loss": 0.6196, "step": 200320 }, { "epoch": 1.2798512707154084, "grad_norm": 1.3467539548873901, "learning_rate": 2.875657468928104e-05, "loss": 0.7165, "step": 200330 }, { "epoch": 1.279915157865147, "grad_norm": 0.8384279608726501, "learning_rate": 2.8752032511436745e-05, "loss": 0.8655, "step": 200340 }, { "epoch": 1.2799790450148856, "grad_norm": 1.4086518287658691, "learning_rate": 2.874749054757785e-05, "loss": 0.9132, "step": 200350 }, { "epoch": 1.2800429321646245, "grad_norm": 1.0973162651062012, "learning_rate": 2.8742948797750124e-05, "loss": 0.7084, "step": 200360 }, { "epoch": 1.280106819314363, "grad_norm": 0.7938587665557861, "learning_rate": 2.873840726199928e-05, "loss": 0.7629, "step": 200370 }, { "epoch": 1.280170706464102, "grad_norm": 0.7673661708831787, "learning_rate": 2.8733865940371062e-05, "loss": 0.7815, "step": 200380 }, { "epoch": 1.2802345936138404, "grad_norm": 0.8518702387809753, "learning_rate": 2.8729324832911236e-05, "loss": 0.8724, "step": 200390 }, { "epoch": 1.2802984807635793, "grad_norm": 1.0190377235412598, "learning_rate": 2.872478393966549e-05, "loss": 0.6522, "step": 200400 }, { "epoch": 1.2803623679133178, "grad_norm": 0.7286405563354492, "learning_rate": 2.8720243260679598e-05, "loss": 0.6123, "step": 200410 }, { "epoch": 1.2804262550630567, "grad_norm": 0.8743591904640198, "learning_rate": 2.8715702795999245e-05, "loss": 0.9299, "step": 200420 }, { "epoch": 1.2804901422127952, "grad_norm": 1.6980204582214355, "learning_rate": 2.8711162545670195e-05, "loss": 0.752, "step": 200430 }, { "epoch": 1.2805540293625342, "grad_norm": 0.8956275582313538, "learning_rate": 2.8706622509738133e-05, "loss": 0.862, "step": 200440 }, { "epoch": 1.2806179165122726, "grad_norm": 1.4533246755599976, "learning_rate": 2.8702082688248834e-05, "loss": 1.0016, "step": 200450 }, { "epoch": 1.2806818036620113, "grad_norm": 2.4909677505493164, "learning_rate": 2.8697543081247958e-05, "loss": 1.2088, "step": 200460 }, { "epoch": 1.28074569081175, "grad_norm": 1.0458720922470093, "learning_rate": 2.8693003688781283e-05, "loss": 0.8815, "step": 200470 }, { "epoch": 1.2808095779614888, "grad_norm": 0.624754011631012, "learning_rate": 2.8688464510894464e-05, "loss": 1.0686, "step": 200480 }, { "epoch": 1.2808734651112275, "grad_norm": 2.7535908222198486, "learning_rate": 2.868392554763327e-05, "loss": 0.7738, "step": 200490 }, { "epoch": 1.2809373522609662, "grad_norm": 1.5427417755126953, "learning_rate": 2.867938679904336e-05, "loss": 0.9673, "step": 200500 }, { "epoch": 1.2810012394107049, "grad_norm": 1.1224215030670166, "learning_rate": 2.8674848265170495e-05, "loss": 0.9368, "step": 200510 }, { "epoch": 1.2810651265604436, "grad_norm": 0.9177605509757996, "learning_rate": 2.867030994606036e-05, "loss": 0.715, "step": 200520 }, { "epoch": 1.2811290137101823, "grad_norm": 1.9078919887542725, "learning_rate": 2.8665771841758632e-05, "loss": 0.6437, "step": 200530 }, { "epoch": 1.281192900859921, "grad_norm": 0.9317499399185181, "learning_rate": 2.866123395231106e-05, "loss": 1.0355, "step": 200540 }, { "epoch": 1.2812567880096597, "grad_norm": 0.7947119474411011, "learning_rate": 2.86566962777633e-05, "loss": 0.9663, "step": 200550 }, { "epoch": 1.2813206751593984, "grad_norm": 1.0166733264923096, "learning_rate": 2.8652158818161096e-05, "loss": 1.0026, "step": 200560 }, { "epoch": 1.281384562309137, "grad_norm": 0.824131965637207, "learning_rate": 2.86476215735501e-05, "loss": 0.8661, "step": 200570 }, { "epoch": 1.2814484494588758, "grad_norm": 0.5033380389213562, "learning_rate": 2.864308454397605e-05, "loss": 0.7417, "step": 200580 }, { "epoch": 1.2815123366086145, "grad_norm": 0.7473288774490356, "learning_rate": 2.8638547729484587e-05, "loss": 0.8514, "step": 200590 }, { "epoch": 1.2815762237583532, "grad_norm": 1.0465642213821411, "learning_rate": 2.8634011130121456e-05, "loss": 0.8228, "step": 200600 }, { "epoch": 1.281640110908092, "grad_norm": 0.9337648749351501, "learning_rate": 2.8629474745932294e-05, "loss": 0.9006, "step": 200610 }, { "epoch": 1.2817039980578306, "grad_norm": 0.7650608420372009, "learning_rate": 2.8624938576962833e-05, "loss": 0.9708, "step": 200620 }, { "epoch": 1.2817678852075693, "grad_norm": 1.056187391281128, "learning_rate": 2.8620402623258715e-05, "loss": 0.8352, "step": 200630 }, { "epoch": 1.281831772357308, "grad_norm": 1.137027382850647, "learning_rate": 2.861586688486565e-05, "loss": 0.7863, "step": 200640 }, { "epoch": 1.2818956595070468, "grad_norm": 1.4955174922943115, "learning_rate": 2.861133136182929e-05, "loss": 0.9078, "step": 200650 }, { "epoch": 1.2819595466567855, "grad_norm": 0.8363558053970337, "learning_rate": 2.860679605419535e-05, "loss": 0.8063, "step": 200660 }, { "epoch": 1.2820234338065242, "grad_norm": 0.9371567368507385, "learning_rate": 2.8602260962009453e-05, "loss": 1.1893, "step": 200670 }, { "epoch": 1.2820873209562629, "grad_norm": 0.6184449195861816, "learning_rate": 2.8597726085317323e-05, "loss": 0.8258, "step": 200680 }, { "epoch": 1.2821512081060016, "grad_norm": 1.352858543395996, "learning_rate": 2.859319142416459e-05, "loss": 0.9911, "step": 200690 }, { "epoch": 1.2822150952557403, "grad_norm": 0.9840089678764343, "learning_rate": 2.858865697859694e-05, "loss": 0.8355, "step": 200700 }, { "epoch": 1.282278982405479, "grad_norm": 0.8363627195358276, "learning_rate": 2.858412274866006e-05, "loss": 0.7131, "step": 200710 }, { "epoch": 1.2823428695552177, "grad_norm": 0.9808521866798401, "learning_rate": 2.8579588734399565e-05, "loss": 0.7284, "step": 200720 }, { "epoch": 1.2824067567049564, "grad_norm": 1.230603814125061, "learning_rate": 2.8575054935861158e-05, "loss": 0.9945, "step": 200730 }, { "epoch": 1.2824706438546951, "grad_norm": 1.1982591152191162, "learning_rate": 2.8570521353090473e-05, "loss": 1.0323, "step": 200740 }, { "epoch": 1.2825345310044338, "grad_norm": 1.8492811918258667, "learning_rate": 2.856598798613319e-05, "loss": 0.8135, "step": 200750 }, { "epoch": 1.2825984181541725, "grad_norm": 1.0338141918182373, "learning_rate": 2.856145483503494e-05, "loss": 1.0249, "step": 200760 }, { "epoch": 1.2826623053039112, "grad_norm": 1.3097282648086548, "learning_rate": 2.8556921899841394e-05, "loss": 0.8617, "step": 200770 }, { "epoch": 1.28272619245365, "grad_norm": 0.9267164468765259, "learning_rate": 2.8552389180598183e-05, "loss": 0.9097, "step": 200780 }, { "epoch": 1.2827900796033886, "grad_norm": 3.339057683944702, "learning_rate": 2.8547856677350992e-05, "loss": 0.9968, "step": 200790 }, { "epoch": 1.2828539667531274, "grad_norm": 0.5326239466667175, "learning_rate": 2.8543324390145416e-05, "loss": 0.7836, "step": 200800 }, { "epoch": 1.282917853902866, "grad_norm": 1.0044302940368652, "learning_rate": 2.853924551641376e-05, "loss": 0.8953, "step": 200810 }, { "epoch": 1.2829817410526045, "grad_norm": 1.041451334953308, "learning_rate": 2.8534713639813047e-05, "loss": 0.7091, "step": 200820 }, { "epoch": 1.2830456282023435, "grad_norm": 0.7834107279777527, "learning_rate": 2.853018197938635e-05, "loss": 1.0314, "step": 200830 }, { "epoch": 1.283109515352082, "grad_norm": 0.8394607901573181, "learning_rate": 2.8525650535179306e-05, "loss": 0.8523, "step": 200840 }, { "epoch": 1.2831734025018209, "grad_norm": 1.2169694900512695, "learning_rate": 2.852111930723752e-05, "loss": 0.82, "step": 200850 }, { "epoch": 1.2832372896515594, "grad_norm": 0.7641561031341553, "learning_rate": 2.8516588295606673e-05, "loss": 0.9652, "step": 200860 }, { "epoch": 1.2833011768012983, "grad_norm": 1.29936683177948, "learning_rate": 2.8512057500332333e-05, "loss": 0.6658, "step": 200870 }, { "epoch": 1.2833650639510368, "grad_norm": 0.9101029634475708, "learning_rate": 2.8507526921460193e-05, "loss": 0.6327, "step": 200880 }, { "epoch": 1.2834289511007757, "grad_norm": 1.0572748184204102, "learning_rate": 2.8502996559035833e-05, "loss": 1.1782, "step": 200890 }, { "epoch": 1.2834928382505142, "grad_norm": 1.0327401161193848, "learning_rate": 2.8498466413104906e-05, "loss": 0.5408, "step": 200900 }, { "epoch": 1.2835567254002531, "grad_norm": 1.2162421941757202, "learning_rate": 2.849393648371301e-05, "loss": 1.127, "step": 200910 }, { "epoch": 1.2836206125499916, "grad_norm": 1.2150144577026367, "learning_rate": 2.8489406770905802e-05, "loss": 0.8937, "step": 200920 }, { "epoch": 1.2836844996997305, "grad_norm": 1.1148834228515625, "learning_rate": 2.848487727472885e-05, "loss": 0.9379, "step": 200930 }, { "epoch": 1.283748386849469, "grad_norm": 1.6276755332946777, "learning_rate": 2.8480347995227824e-05, "loss": 0.83, "step": 200940 }, { "epoch": 1.2838122739992077, "grad_norm": 0.8462209701538086, "learning_rate": 2.8475818932448284e-05, "loss": 0.9984, "step": 200950 }, { "epoch": 1.2838761611489464, "grad_norm": 0.7349916696548462, "learning_rate": 2.8471290086435896e-05, "loss": 0.9028, "step": 200960 }, { "epoch": 1.2839400482986851, "grad_norm": 1.0558744668960571, "learning_rate": 2.846676145723621e-05, "loss": 1.2691, "step": 200970 }, { "epoch": 1.2840039354484238, "grad_norm": 0.8234534859657288, "learning_rate": 2.8462233044894898e-05, "loss": 0.6873, "step": 200980 }, { "epoch": 1.2840678225981625, "grad_norm": 1.1932810544967651, "learning_rate": 2.8457704849457513e-05, "loss": 0.9897, "step": 200990 }, { "epoch": 1.2841317097479013, "grad_norm": 0.8133253455162048, "learning_rate": 2.845317687096969e-05, "loss": 0.6946, "step": 201000 }, { "epoch": 1.28419559689764, "grad_norm": 0.9386922717094421, "learning_rate": 2.8448649109476987e-05, "loss": 0.8382, "step": 201010 }, { "epoch": 1.2842594840473787, "grad_norm": 0.8915868997573853, "learning_rate": 2.8444121565025066e-05, "loss": 0.8555, "step": 201020 }, { "epoch": 1.2843233711971174, "grad_norm": 1.2032678127288818, "learning_rate": 2.8439594237659466e-05, "loss": 0.8164, "step": 201030 }, { "epoch": 1.284387258346856, "grad_norm": 0.8416345119476318, "learning_rate": 2.8435067127425808e-05, "loss": 1.003, "step": 201040 }, { "epoch": 1.2844511454965948, "grad_norm": 1.1380306482315063, "learning_rate": 2.8430540234369694e-05, "loss": 0.8604, "step": 201050 }, { "epoch": 1.2845150326463335, "grad_norm": 1.1704416275024414, "learning_rate": 2.842601355853668e-05, "loss": 0.831, "step": 201060 }, { "epoch": 1.2845789197960722, "grad_norm": 1.0205214023590088, "learning_rate": 2.84214870999724e-05, "loss": 0.7554, "step": 201070 }, { "epoch": 1.284642806945811, "grad_norm": 1.102547526359558, "learning_rate": 2.8416960858722385e-05, "loss": 1.1079, "step": 201080 }, { "epoch": 1.2847066940955496, "grad_norm": 1.2095087766647339, "learning_rate": 2.841243483483227e-05, "loss": 0.8655, "step": 201090 }, { "epoch": 1.2847705812452883, "grad_norm": 1.368201494216919, "learning_rate": 2.840790902834759e-05, "loss": 0.8103, "step": 201100 }, { "epoch": 1.284834468395027, "grad_norm": 0.7406502962112427, "learning_rate": 2.840338343931397e-05, "loss": 1.0044, "step": 201110 }, { "epoch": 1.2848983555447657, "grad_norm": 1.0951242446899414, "learning_rate": 2.8398858067776946e-05, "loss": 0.7321, "step": 201120 }, { "epoch": 1.2849622426945044, "grad_norm": 0.8413940072059631, "learning_rate": 2.839433291378212e-05, "loss": 0.9743, "step": 201130 }, { "epoch": 1.2850261298442431, "grad_norm": 0.8289148211479187, "learning_rate": 2.8389807977375037e-05, "loss": 0.7121, "step": 201140 }, { "epoch": 1.2850900169939818, "grad_norm": 2.189805030822754, "learning_rate": 2.8385283258601304e-05, "loss": 0.986, "step": 201150 }, { "epoch": 1.2851539041437205, "grad_norm": 1.2476427555084229, "learning_rate": 2.8380758757506463e-05, "loss": 1.1473, "step": 201160 }, { "epoch": 1.2852177912934593, "grad_norm": 1.2025208473205566, "learning_rate": 2.8376234474136065e-05, "loss": 0.8451, "step": 201170 }, { "epoch": 1.285281678443198, "grad_norm": 1.0035260915756226, "learning_rate": 2.8371710408535722e-05, "loss": 0.7409, "step": 201180 }, { "epoch": 1.2853455655929367, "grad_norm": 1.0282871723175049, "learning_rate": 2.8367186560750936e-05, "loss": 0.9917, "step": 201190 }, { "epoch": 1.2854094527426754, "grad_norm": 0.9853460192680359, "learning_rate": 2.8362662930827323e-05, "loss": 1.0054, "step": 201200 }, { "epoch": 1.285473339892414, "grad_norm": 0.8255294561386108, "learning_rate": 2.835813951881039e-05, "loss": 0.9538, "step": 201210 }, { "epoch": 1.2855372270421528, "grad_norm": 1.0002115964889526, "learning_rate": 2.8353616324745737e-05, "loss": 0.7627, "step": 201220 }, { "epoch": 1.2856011141918915, "grad_norm": 0.8844050765037537, "learning_rate": 2.834909334867888e-05, "loss": 0.5698, "step": 201230 }, { "epoch": 1.2856650013416302, "grad_norm": 0.8857330083847046, "learning_rate": 2.8344570590655394e-05, "loss": 0.9497, "step": 201240 }, { "epoch": 1.285728888491369, "grad_norm": 1.0770478248596191, "learning_rate": 2.83400480507208e-05, "loss": 0.8344, "step": 201250 }, { "epoch": 1.2857927756411076, "grad_norm": 1.0126256942749023, "learning_rate": 2.8335525728920676e-05, "loss": 0.9859, "step": 201260 }, { "epoch": 1.2858566627908463, "grad_norm": 1.0135153532028198, "learning_rate": 2.833100362530054e-05, "loss": 1.059, "step": 201270 }, { "epoch": 1.285920549940585, "grad_norm": 0.8402441740036011, "learning_rate": 2.8326481739905958e-05, "loss": 1.0928, "step": 201280 }, { "epoch": 1.2859844370903237, "grad_norm": 0.8969540596008301, "learning_rate": 2.832196007278244e-05, "loss": 0.8163, "step": 201290 }, { "epoch": 1.2860483242400624, "grad_norm": 1.4373033046722412, "learning_rate": 2.831743862397555e-05, "loss": 0.7645, "step": 201300 }, { "epoch": 1.286112211389801, "grad_norm": 2.0069103240966797, "learning_rate": 2.831291739353079e-05, "loss": 0.7204, "step": 201310 }, { "epoch": 1.2861760985395398, "grad_norm": 0.9032400846481323, "learning_rate": 2.8308396381493747e-05, "loss": 0.9155, "step": 201320 }, { "epoch": 1.2862399856892783, "grad_norm": 1.127811312675476, "learning_rate": 2.830387558790989e-05, "loss": 0.7727, "step": 201330 }, { "epoch": 1.2863038728390173, "grad_norm": 1.0291417837142944, "learning_rate": 2.829935501282479e-05, "loss": 0.886, "step": 201340 }, { "epoch": 1.2863677599887557, "grad_norm": 1.3500255346298218, "learning_rate": 2.8294834656283952e-05, "loss": 1.0935, "step": 201350 }, { "epoch": 1.2864316471384947, "grad_norm": 0.9647353291511536, "learning_rate": 2.8290314518332895e-05, "loss": 1.2346, "step": 201360 }, { "epoch": 1.2864955342882332, "grad_norm": 1.557719111442566, "learning_rate": 2.828579459901718e-05, "loss": 0.8093, "step": 201370 }, { "epoch": 1.286559421437972, "grad_norm": 0.6619776487350464, "learning_rate": 2.8281274898382275e-05, "loss": 0.8722, "step": 201380 }, { "epoch": 1.2866233085877106, "grad_norm": 0.9074650406837463, "learning_rate": 2.8276755416473744e-05, "loss": 0.6571, "step": 201390 }, { "epoch": 1.2866871957374495, "grad_norm": 1.4630404710769653, "learning_rate": 2.8272236153337055e-05, "loss": 1.0, "step": 201400 }, { "epoch": 1.286751082887188, "grad_norm": 0.8344331979751587, "learning_rate": 2.8267717109017765e-05, "loss": 0.7465, "step": 201410 }, { "epoch": 1.286814970036927, "grad_norm": 0.973581850528717, "learning_rate": 2.8263198283561347e-05, "loss": 0.8632, "step": 201420 }, { "epoch": 1.2868788571866654, "grad_norm": 1.26335608959198, "learning_rate": 2.825867967701335e-05, "loss": 0.8703, "step": 201430 }, { "epoch": 1.286942744336404, "grad_norm": 0.9915609955787659, "learning_rate": 2.8254161289419233e-05, "loss": 0.8515, "step": 201440 }, { "epoch": 1.2870066314861428, "grad_norm": 1.6091701984405518, "learning_rate": 2.824964312082455e-05, "loss": 1.0284, "step": 201450 }, { "epoch": 1.2870705186358815, "grad_norm": 0.7491897940635681, "learning_rate": 2.8245125171274755e-05, "loss": 0.9658, "step": 201460 }, { "epoch": 1.2871344057856202, "grad_norm": 1.554604411125183, "learning_rate": 2.8240607440815388e-05, "loss": 0.8597, "step": 201470 }, { "epoch": 1.287198292935359, "grad_norm": 0.7069583535194397, "learning_rate": 2.8236089929491912e-05, "loss": 0.7607, "step": 201480 }, { "epoch": 1.2872621800850976, "grad_norm": 0.6213060617446899, "learning_rate": 2.8231572637349856e-05, "loss": 0.9623, "step": 201490 }, { "epoch": 1.2873260672348363, "grad_norm": 1.7921992540359497, "learning_rate": 2.822705556443468e-05, "loss": 0.6583, "step": 201500 }, { "epoch": 1.287389954384575, "grad_norm": 0.7730182409286499, "learning_rate": 2.8222538710791903e-05, "loss": 0.6463, "step": 201510 }, { "epoch": 1.2874538415343137, "grad_norm": 0.7597857117652893, "learning_rate": 2.8218022076466987e-05, "loss": 0.8251, "step": 201520 }, { "epoch": 1.2875177286840525, "grad_norm": 1.0613079071044922, "learning_rate": 2.821350566150545e-05, "loss": 1.0157, "step": 201530 }, { "epoch": 1.2875816158337912, "grad_norm": 1.0213360786437988, "learning_rate": 2.820898946595274e-05, "loss": 0.807, "step": 201540 }, { "epoch": 1.2876455029835299, "grad_norm": 0.9103221297264099, "learning_rate": 2.820447348985436e-05, "loss": 0.7805, "step": 201550 }, { "epoch": 1.2877093901332686, "grad_norm": 1.3538568019866943, "learning_rate": 2.8199957733255806e-05, "loss": 0.8029, "step": 201560 }, { "epoch": 1.2877732772830073, "grad_norm": 0.81965571641922, "learning_rate": 2.8195442196202517e-05, "loss": 1.0121, "step": 201570 }, { "epoch": 1.287837164432746, "grad_norm": 0.8054186105728149, "learning_rate": 2.819092687874001e-05, "loss": 0.7716, "step": 201580 }, { "epoch": 1.2879010515824847, "grad_norm": 1.1727731227874756, "learning_rate": 2.8186411780913713e-05, "loss": 1.0159, "step": 201590 }, { "epoch": 1.2879649387322234, "grad_norm": 0.9019149541854858, "learning_rate": 2.8181896902769146e-05, "loss": 1.0994, "step": 201600 }, { "epoch": 1.288028825881962, "grad_norm": 1.1492351293563843, "learning_rate": 2.8177382244351736e-05, "loss": 1.0837, "step": 201610 }, { "epoch": 1.2880927130317008, "grad_norm": 0.8392195105552673, "learning_rate": 2.817286780570698e-05, "loss": 1.1313, "step": 201620 }, { "epoch": 1.2881566001814395, "grad_norm": 2.194507598876953, "learning_rate": 2.8168353586880304e-05, "loss": 0.9563, "step": 201630 }, { "epoch": 1.2882204873311782, "grad_norm": 1.052587866783142, "learning_rate": 2.816383958791724e-05, "loss": 0.7306, "step": 201640 }, { "epoch": 1.288284374480917, "grad_norm": 3.5647919178009033, "learning_rate": 2.8159325808863164e-05, "loss": 0.7595, "step": 201650 }, { "epoch": 1.2883482616306556, "grad_norm": 0.9285479187965393, "learning_rate": 2.8154812249763592e-05, "loss": 0.7623, "step": 201660 }, { "epoch": 1.2884121487803943, "grad_norm": 0.8144568800926208, "learning_rate": 2.8150298910663934e-05, "loss": 0.7509, "step": 201670 }, { "epoch": 1.288476035930133, "grad_norm": 0.8013102412223816, "learning_rate": 2.8145785791609674e-05, "loss": 0.7349, "step": 201680 }, { "epoch": 1.2885399230798718, "grad_norm": 1.1229387521743774, "learning_rate": 2.8141272892646276e-05, "loss": 1.0336, "step": 201690 }, { "epoch": 1.2886038102296105, "grad_norm": 1.2837129831314087, "learning_rate": 2.8136760213819148e-05, "loss": 0.8879, "step": 201700 }, { "epoch": 1.2886676973793492, "grad_norm": 2.003021717071533, "learning_rate": 2.813224775517378e-05, "loss": 0.7515, "step": 201710 }, { "epoch": 1.2887315845290879, "grad_norm": 1.0468463897705078, "learning_rate": 2.812773551675558e-05, "loss": 1.0192, "step": 201720 }, { "epoch": 1.2887954716788266, "grad_norm": 1.0019460916519165, "learning_rate": 2.812322349861002e-05, "loss": 1.0245, "step": 201730 }, { "epoch": 1.2888593588285653, "grad_norm": 1.5761841535568237, "learning_rate": 2.8118711700782506e-05, "loss": 0.7242, "step": 201740 }, { "epoch": 1.288923245978304, "grad_norm": 0.9731926918029785, "learning_rate": 2.811420012331852e-05, "loss": 0.9004, "step": 201750 }, { "epoch": 1.2889871331280427, "grad_norm": 1.1265745162963867, "learning_rate": 2.8109688766263446e-05, "loss": 0.6219, "step": 201760 }, { "epoch": 1.2890510202777814, "grad_norm": 1.1114789247512817, "learning_rate": 2.8105177629662772e-05, "loss": 0.8625, "step": 201770 }, { "epoch": 1.28911490742752, "grad_norm": 1.0359795093536377, "learning_rate": 2.8100666713561873e-05, "loss": 0.6437, "step": 201780 }, { "epoch": 1.2891787945772588, "grad_norm": 1.1450669765472412, "learning_rate": 2.8096156018006226e-05, "loss": 0.9989, "step": 201790 }, { "epoch": 1.2892426817269973, "grad_norm": 0.6831559538841248, "learning_rate": 2.8091645543041222e-05, "loss": 0.7288, "step": 201800 }, { "epoch": 1.2893065688767362, "grad_norm": 1.1486716270446777, "learning_rate": 2.808713528871232e-05, "loss": 0.8573, "step": 201810 }, { "epoch": 1.2893704560264747, "grad_norm": 0.6990073323249817, "learning_rate": 2.8082625255064903e-05, "loss": 0.9046, "step": 201820 }, { "epoch": 1.2894343431762136, "grad_norm": 0.7559660077095032, "learning_rate": 2.807811544214443e-05, "loss": 0.7093, "step": 201830 }, { "epoch": 1.2894982303259521, "grad_norm": 1.0000022649765015, "learning_rate": 2.807360584999628e-05, "loss": 1.1475, "step": 201840 }, { "epoch": 1.289562117475691, "grad_norm": 2.2029664516448975, "learning_rate": 2.8069096478665912e-05, "loss": 0.8044, "step": 201850 }, { "epoch": 1.2896260046254295, "grad_norm": 1.135852575302124, "learning_rate": 2.8064587328198687e-05, "loss": 0.7948, "step": 201860 }, { "epoch": 1.2896898917751685, "grad_norm": 1.2041714191436768, "learning_rate": 2.806007839864005e-05, "loss": 0.6547, "step": 201870 }, { "epoch": 1.289753778924907, "grad_norm": 0.8794607520103455, "learning_rate": 2.8055569690035422e-05, "loss": 0.8086, "step": 201880 }, { "epoch": 1.2898176660746459, "grad_norm": 1.313482642173767, "learning_rate": 2.8051061202430174e-05, "loss": 1.059, "step": 201890 }, { "epoch": 1.2898815532243844, "grad_norm": 1.017690896987915, "learning_rate": 2.8046552935869748e-05, "loss": 0.8971, "step": 201900 }, { "epoch": 1.2899454403741233, "grad_norm": 0.9315025806427002, "learning_rate": 2.8042044890399503e-05, "loss": 0.8328, "step": 201910 }, { "epoch": 1.2900093275238618, "grad_norm": 1.0727083683013916, "learning_rate": 2.803753706606489e-05, "loss": 0.8185, "step": 201920 }, { "epoch": 1.2900732146736005, "grad_norm": 0.7859057784080505, "learning_rate": 2.8033029462911254e-05, "loss": 0.8365, "step": 201930 }, { "epoch": 1.2901371018233392, "grad_norm": 0.6739004254341125, "learning_rate": 2.8028522080984032e-05, "loss": 1.0226, "step": 201940 }, { "epoch": 1.290200988973078, "grad_norm": 0.8840377926826477, "learning_rate": 2.8024014920328585e-05, "loss": 0.9998, "step": 201950 }, { "epoch": 1.2902648761228166, "grad_norm": 0.8898053169250488, "learning_rate": 2.8019507980990335e-05, "loss": 0.8163, "step": 201960 }, { "epoch": 1.2903287632725553, "grad_norm": 0.9021614789962769, "learning_rate": 2.801500126301464e-05, "loss": 0.9241, "step": 201970 }, { "epoch": 1.290392650422294, "grad_norm": 0.8183213472366333, "learning_rate": 2.801049476644692e-05, "loss": 1.012, "step": 201980 }, { "epoch": 1.2904565375720327, "grad_norm": 0.7091869115829468, "learning_rate": 2.8005988491332523e-05, "loss": 0.7812, "step": 201990 }, { "epoch": 1.2905204247217714, "grad_norm": 0.7986821532249451, "learning_rate": 2.800148243771687e-05, "loss": 0.67, "step": 202000 }, { "epoch": 1.2905843118715101, "grad_norm": 1.1651382446289062, "learning_rate": 2.79969766056453e-05, "loss": 0.8387, "step": 202010 }, { "epoch": 1.2906481990212488, "grad_norm": 0.9483960866928101, "learning_rate": 2.799247099516323e-05, "loss": 0.8228, "step": 202020 }, { "epoch": 1.2907120861709875, "grad_norm": 0.8649300932884216, "learning_rate": 2.798796560631599e-05, "loss": 0.8634, "step": 202030 }, { "epoch": 1.2907759733207262, "grad_norm": 0.8618859052658081, "learning_rate": 2.7983460439149013e-05, "loss": 0.6676, "step": 202040 }, { "epoch": 1.290839860470465, "grad_norm": 0.7927425503730774, "learning_rate": 2.7978955493707605e-05, "loss": 1.071, "step": 202050 }, { "epoch": 1.2909037476202037, "grad_norm": 1.109482765197754, "learning_rate": 2.7974450770037193e-05, "loss": 0.8577, "step": 202060 }, { "epoch": 1.2909676347699424, "grad_norm": 0.8940995931625366, "learning_rate": 2.796994626818309e-05, "loss": 0.9325, "step": 202070 }, { "epoch": 1.291031521919681, "grad_norm": 2.62735652923584, "learning_rate": 2.7965441988190694e-05, "loss": 1.0661, "step": 202080 }, { "epoch": 1.2910954090694198, "grad_norm": 0.9144634008407593, "learning_rate": 2.7960937930105378e-05, "loss": 0.7727, "step": 202090 }, { "epoch": 1.2911592962191585, "grad_norm": 2.9492197036743164, "learning_rate": 2.795643409397246e-05, "loss": 0.943, "step": 202100 }, { "epoch": 1.2912231833688972, "grad_norm": 1.3305987119674683, "learning_rate": 2.7951930479837342e-05, "loss": 1.0945, "step": 202110 }, { "epoch": 1.291287070518636, "grad_norm": 0.779472291469574, "learning_rate": 2.7947427087745336e-05, "loss": 0.8876, "step": 202120 }, { "epoch": 1.2913509576683746, "grad_norm": 0.6640340685844421, "learning_rate": 2.7942923917741864e-05, "loss": 0.6774, "step": 202130 }, { "epoch": 1.2914148448181133, "grad_norm": 2.7910993099212646, "learning_rate": 2.7938420969872187e-05, "loss": 0.8141, "step": 202140 }, { "epoch": 1.291478731967852, "grad_norm": 4.312468528747559, "learning_rate": 2.7933918244181716e-05, "loss": 0.9578, "step": 202150 }, { "epoch": 1.2915426191175907, "grad_norm": 1.027847170829773, "learning_rate": 2.792941574071576e-05, "loss": 0.9301, "step": 202160 }, { "epoch": 1.2916065062673294, "grad_norm": 0.7103813886642456, "learning_rate": 2.7924913459519697e-05, "loss": 0.9317, "step": 202170 }, { "epoch": 1.2916703934170681, "grad_norm": 1.4262120723724365, "learning_rate": 2.792041140063884e-05, "loss": 0.7602, "step": 202180 }, { "epoch": 1.2917342805668068, "grad_norm": 1.1603199243545532, "learning_rate": 2.791590956411856e-05, "loss": 0.6214, "step": 202190 }, { "epoch": 1.2917981677165455, "grad_norm": 0.8781431913375854, "learning_rate": 2.7911407950004155e-05, "loss": 0.8384, "step": 202200 }, { "epoch": 1.2918620548662842, "grad_norm": 0.8929979205131531, "learning_rate": 2.790690655834098e-05, "loss": 0.8108, "step": 202210 }, { "epoch": 1.291925942016023, "grad_norm": 0.9821604490280151, "learning_rate": 2.790240538917439e-05, "loss": 0.886, "step": 202220 }, { "epoch": 1.2919898291657617, "grad_norm": 0.861404299736023, "learning_rate": 2.789790444254967e-05, "loss": 0.8588, "step": 202230 }, { "epoch": 1.2920537163155004, "grad_norm": 1.100280523300171, "learning_rate": 2.7893403718512202e-05, "loss": 0.9536, "step": 202240 }, { "epoch": 1.292117603465239, "grad_norm": 1.1146948337554932, "learning_rate": 2.7888903217107258e-05, "loss": 0.8328, "step": 202250 }, { "epoch": 1.2921814906149778, "grad_norm": 0.7769910097122192, "learning_rate": 2.7884402938380205e-05, "loss": 0.9671, "step": 202260 }, { "epoch": 1.2922453777647165, "grad_norm": 0.9938095211982727, "learning_rate": 2.7879902882376335e-05, "loss": 0.9469, "step": 202270 }, { "epoch": 1.2923092649144552, "grad_norm": 0.9267186522483826, "learning_rate": 2.7875403049140998e-05, "loss": 0.9497, "step": 202280 }, { "epoch": 1.2923731520641937, "grad_norm": 0.7188292741775513, "learning_rate": 2.7870903438719464e-05, "loss": 0.9467, "step": 202290 }, { "epoch": 1.2924370392139326, "grad_norm": 0.8244433999061584, "learning_rate": 2.786640405115711e-05, "loss": 0.7807, "step": 202300 }, { "epoch": 1.292500926363671, "grad_norm": 1.0395586490631104, "learning_rate": 2.7861904886499186e-05, "loss": 1.056, "step": 202310 }, { "epoch": 1.29256481351341, "grad_norm": 0.6310107707977295, "learning_rate": 2.7857405944791055e-05, "loss": 0.951, "step": 202320 }, { "epoch": 1.2926287006631485, "grad_norm": 1.7220028638839722, "learning_rate": 2.7852907226077984e-05, "loss": 0.9764, "step": 202330 }, { "epoch": 1.2926925878128874, "grad_norm": 1.0622987747192383, "learning_rate": 2.784840873040531e-05, "loss": 0.6364, "step": 202340 }, { "epoch": 1.292756474962626, "grad_norm": 2.590120792388916, "learning_rate": 2.7843910457818313e-05, "loss": 0.7963, "step": 202350 }, { "epoch": 1.2928203621123648, "grad_norm": 0.7834983468055725, "learning_rate": 2.7839412408362318e-05, "loss": 0.926, "step": 202360 }, { "epoch": 1.2928842492621033, "grad_norm": 0.9251220226287842, "learning_rate": 2.7834914582082595e-05, "loss": 0.8344, "step": 202370 }, { "epoch": 1.2929481364118423, "grad_norm": 0.9570052027702332, "learning_rate": 2.7830416979024476e-05, "loss": 0.8473, "step": 202380 }, { "epoch": 1.2930120235615807, "grad_norm": 0.9787630438804626, "learning_rate": 2.7825919599233217e-05, "loss": 0.8888, "step": 202390 }, { "epoch": 1.2930759107113194, "grad_norm": 0.9614351391792297, "learning_rate": 2.782142244275414e-05, "loss": 0.8092, "step": 202400 }, { "epoch": 1.2931397978610581, "grad_norm": 0.7499952912330627, "learning_rate": 2.781692550963254e-05, "loss": 0.7004, "step": 202410 }, { "epoch": 1.2932036850107969, "grad_norm": 1.7899943590164185, "learning_rate": 2.781242879991367e-05, "loss": 1.1293, "step": 202420 }, { "epoch": 1.2932675721605356, "grad_norm": 1.5646029710769653, "learning_rate": 2.780793231364286e-05, "loss": 0.8839, "step": 202430 }, { "epoch": 1.2933314593102743, "grad_norm": 0.5450949668884277, "learning_rate": 2.7803436050865346e-05, "loss": 0.8994, "step": 202440 }, { "epoch": 1.293395346460013, "grad_norm": 0.9725117087364197, "learning_rate": 2.779894001162645e-05, "loss": 0.7777, "step": 202450 }, { "epoch": 1.2934592336097517, "grad_norm": 1.1340949535369873, "learning_rate": 2.7794444195971426e-05, "loss": 0.7504, "step": 202460 }, { "epoch": 1.2935231207594904, "grad_norm": 0.9177945852279663, "learning_rate": 2.778994860394557e-05, "loss": 0.7452, "step": 202470 }, { "epoch": 1.293587007909229, "grad_norm": 1.466374158859253, "learning_rate": 2.778545323559413e-05, "loss": 0.7265, "step": 202480 }, { "epoch": 1.2936508950589678, "grad_norm": 1.491048812866211, "learning_rate": 2.7780958090962416e-05, "loss": 0.8083, "step": 202490 }, { "epoch": 1.2937147822087065, "grad_norm": 0.8648363947868347, "learning_rate": 2.7776463170095657e-05, "loss": 0.8844, "step": 202500 }, { "epoch": 1.2937786693584452, "grad_norm": 0.8939248919487, "learning_rate": 2.7771968473039156e-05, "loss": 0.7783, "step": 202510 }, { "epoch": 1.293842556508184, "grad_norm": 1.2932512760162354, "learning_rate": 2.7767473999838146e-05, "loss": 0.8104, "step": 202520 }, { "epoch": 1.2939064436579226, "grad_norm": 2.392772912979126, "learning_rate": 2.776297975053792e-05, "loss": 0.9477, "step": 202530 }, { "epoch": 1.2939703308076613, "grad_norm": 0.9002924561500549, "learning_rate": 2.7758485725183715e-05, "loss": 0.7206, "step": 202540 }, { "epoch": 1.2940342179574, "grad_norm": 0.7732874751091003, "learning_rate": 2.775399192382081e-05, "loss": 0.7847, "step": 202550 }, { "epoch": 1.2940981051071387, "grad_norm": 0.7313796877861023, "learning_rate": 2.774949834649444e-05, "loss": 0.7352, "step": 202560 }, { "epoch": 1.2941619922568774, "grad_norm": 0.8885713815689087, "learning_rate": 2.774500499324989e-05, "loss": 0.5844, "step": 202570 }, { "epoch": 1.2942258794066162, "grad_norm": 1.0987149477005005, "learning_rate": 2.7740511864132367e-05, "loss": 0.8516, "step": 202580 }, { "epoch": 1.2942897665563549, "grad_norm": 1.1210942268371582, "learning_rate": 2.773601895918717e-05, "loss": 0.9402, "step": 202590 }, { "epoch": 1.2943536537060936, "grad_norm": 0.91703200340271, "learning_rate": 2.7731526278459508e-05, "loss": 1.0145, "step": 202600 }, { "epoch": 1.2944175408558323, "grad_norm": 1.1130025386810303, "learning_rate": 2.7727033821994658e-05, "loss": 0.9353, "step": 202610 }, { "epoch": 1.294481428005571, "grad_norm": 1.0177847146987915, "learning_rate": 2.7722541589837847e-05, "loss": 0.9186, "step": 202620 }, { "epoch": 1.2945453151553097, "grad_norm": 1.2630997896194458, "learning_rate": 2.771804958203429e-05, "loss": 0.938, "step": 202630 }, { "epoch": 1.2946092023050484, "grad_norm": 0.7808921337127686, "learning_rate": 2.7713557798629274e-05, "loss": 0.8576, "step": 202640 }, { "epoch": 1.294673089454787, "grad_norm": 0.8194741010665894, "learning_rate": 2.7709066239667992e-05, "loss": 0.6407, "step": 202650 }, { "epoch": 1.2947369766045258, "grad_norm": 0.8097875714302063, "learning_rate": 2.770457490519572e-05, "loss": 1.0252, "step": 202660 }, { "epoch": 1.2948008637542645, "grad_norm": 1.127073884010315, "learning_rate": 2.770008379525764e-05, "loss": 0.8304, "step": 202670 }, { "epoch": 1.2948647509040032, "grad_norm": 1.1275410652160645, "learning_rate": 2.769559290989904e-05, "loss": 0.8219, "step": 202680 }, { "epoch": 1.294928638053742, "grad_norm": 1.8037633895874023, "learning_rate": 2.7691102249165084e-05, "loss": 1.0246, "step": 202690 }, { "epoch": 1.2949925252034806, "grad_norm": 1.1666561365127563, "learning_rate": 2.7686611813101048e-05, "loss": 0.7787, "step": 202700 }, { "epoch": 1.2950564123532193, "grad_norm": 1.1435728073120117, "learning_rate": 2.768212160175212e-05, "loss": 0.8529, "step": 202710 }, { "epoch": 1.295120299502958, "grad_norm": 1.3430248498916626, "learning_rate": 2.7677631615163535e-05, "loss": 1.0102, "step": 202720 }, { "epoch": 1.2951841866526967, "grad_norm": 2.008981704711914, "learning_rate": 2.7673141853380534e-05, "loss": 0.9389, "step": 202730 }, { "epoch": 1.2952480738024355, "grad_norm": 1.6296296119689941, "learning_rate": 2.7668652316448284e-05, "loss": 1.104, "step": 202740 }, { "epoch": 1.2953119609521742, "grad_norm": 0.7582008838653564, "learning_rate": 2.7664163004412046e-05, "loss": 0.8314, "step": 202750 }, { "epoch": 1.2953758481019129, "grad_norm": 0.9481918215751648, "learning_rate": 2.7659673917317e-05, "loss": 0.798, "step": 202760 }, { "epoch": 1.2954397352516516, "grad_norm": 0.6187413930892944, "learning_rate": 2.7655185055208365e-05, "loss": 0.9045, "step": 202770 }, { "epoch": 1.29550362240139, "grad_norm": 0.6845240592956543, "learning_rate": 2.7650696418131338e-05, "loss": 0.8514, "step": 202780 }, { "epoch": 1.295567509551129, "grad_norm": 1.132522702217102, "learning_rate": 2.7646208006131158e-05, "loss": 0.9722, "step": 202790 }, { "epoch": 1.2956313967008675, "grad_norm": 1.2485088109970093, "learning_rate": 2.7641719819252976e-05, "loss": 0.9917, "step": 202800 }, { "epoch": 1.2956952838506064, "grad_norm": 0.7616110444068909, "learning_rate": 2.763723185754204e-05, "loss": 0.745, "step": 202810 }, { "epoch": 1.2957591710003449, "grad_norm": 1.2537869215011597, "learning_rate": 2.7632744121043506e-05, "loss": 0.8795, "step": 202820 }, { "epoch": 1.2958230581500838, "grad_norm": 0.6776690483093262, "learning_rate": 2.7628256609802604e-05, "loss": 1.0657, "step": 202830 }, { "epoch": 1.2958869452998223, "grad_norm": 1.1423174142837524, "learning_rate": 2.76237693238645e-05, "loss": 0.8462, "step": 202840 }, { "epoch": 1.2959508324495612, "grad_norm": 1.0729666948318481, "learning_rate": 2.7619282263274414e-05, "loss": 0.6347, "step": 202850 }, { "epoch": 1.2960147195992997, "grad_norm": 0.7181875109672546, "learning_rate": 2.76147954280775e-05, "loss": 0.8179, "step": 202860 }, { "epoch": 1.2960786067490386, "grad_norm": 1.6496679782867432, "learning_rate": 2.7610308818318975e-05, "loss": 0.7566, "step": 202870 }, { "epoch": 1.2961424938987771, "grad_norm": 0.7662792205810547, "learning_rate": 2.760582243404399e-05, "loss": 0.8662, "step": 202880 }, { "epoch": 1.2962063810485158, "grad_norm": 1.0914605855941772, "learning_rate": 2.7601336275297774e-05, "loss": 1.068, "step": 202890 }, { "epoch": 1.2962702681982545, "grad_norm": 1.0450533628463745, "learning_rate": 2.7596850342125457e-05, "loss": 0.7495, "step": 202900 }, { "epoch": 1.2963341553479932, "grad_norm": 0.9505026936531067, "learning_rate": 2.759236463457226e-05, "loss": 0.7982, "step": 202910 }, { "epoch": 1.296398042497732, "grad_norm": 0.6942030787467957, "learning_rate": 2.7587879152683316e-05, "loss": 1.0429, "step": 202920 }, { "epoch": 1.2964619296474706, "grad_norm": 0.8163788914680481, "learning_rate": 2.7583393896503817e-05, "loss": 1.1042, "step": 202930 }, { "epoch": 1.2965258167972094, "grad_norm": 1.385325312614441, "learning_rate": 2.7578908866078957e-05, "loss": 0.8366, "step": 202940 }, { "epoch": 1.296589703946948, "grad_norm": 0.7578178644180298, "learning_rate": 2.757442406145385e-05, "loss": 1.1626, "step": 202950 }, { "epoch": 1.2966535910966868, "grad_norm": 0.9005945324897766, "learning_rate": 2.7569939482673724e-05, "loss": 0.8325, "step": 202960 }, { "epoch": 1.2967174782464255, "grad_norm": 0.9879159331321716, "learning_rate": 2.7565455129783692e-05, "loss": 1.0183, "step": 202970 }, { "epoch": 1.2967813653961642, "grad_norm": 0.6550315618515015, "learning_rate": 2.7560971002828954e-05, "loss": 0.6492, "step": 202980 }, { "epoch": 1.2968452525459029, "grad_norm": 0.9671928882598877, "learning_rate": 2.7556487101854632e-05, "loss": 0.997, "step": 202990 }, { "epoch": 1.2969091396956416, "grad_norm": 1.200095772743225, "learning_rate": 2.755200342690592e-05, "loss": 0.9367, "step": 203000 }, { "epoch": 1.2969730268453803, "grad_norm": 0.5706303715705872, "learning_rate": 2.7547519978027936e-05, "loss": 0.8241, "step": 203010 }, { "epoch": 1.297036913995119, "grad_norm": 3.405540704727173, "learning_rate": 2.7543036755265868e-05, "loss": 0.7368, "step": 203020 }, { "epoch": 1.2971008011448577, "grad_norm": 0.9094310998916626, "learning_rate": 2.7538553758664825e-05, "loss": 0.755, "step": 203030 }, { "epoch": 1.2971646882945964, "grad_norm": 0.6244282126426697, "learning_rate": 2.7534070988269998e-05, "loss": 0.7651, "step": 203040 }, { "epoch": 1.2972285754443351, "grad_norm": 0.7120745182037354, "learning_rate": 2.7529588444126498e-05, "loss": 1.2066, "step": 203050 }, { "epoch": 1.2972924625940738, "grad_norm": 0.8211696743965149, "learning_rate": 2.75251061262795e-05, "loss": 0.8375, "step": 203060 }, { "epoch": 1.2973563497438125, "grad_norm": 1.2300297021865845, "learning_rate": 2.7520624034774102e-05, "loss": 0.9132, "step": 203070 }, { "epoch": 1.2974202368935512, "grad_norm": 0.8189200758934021, "learning_rate": 2.751614216965549e-05, "loss": 0.7244, "step": 203080 }, { "epoch": 1.29748412404329, "grad_norm": 0.8568248152732849, "learning_rate": 2.7511660530968763e-05, "loss": 0.9051, "step": 203090 }, { "epoch": 1.2975480111930286, "grad_norm": 0.8858509659767151, "learning_rate": 2.7507179118759087e-05, "loss": 0.7753, "step": 203100 }, { "epoch": 1.2976118983427674, "grad_norm": 1.5307356119155884, "learning_rate": 2.750269793307157e-05, "loss": 1.024, "step": 203110 }, { "epoch": 1.297675785492506, "grad_norm": 1.075430989265442, "learning_rate": 2.7498216973951328e-05, "loss": 0.9816, "step": 203120 }, { "epoch": 1.2977396726422448, "grad_norm": 0.9090355634689331, "learning_rate": 2.7493736241443536e-05, "loss": 0.7039, "step": 203130 }, { "epoch": 1.2978035597919835, "grad_norm": 0.9631739854812622, "learning_rate": 2.7489703775977403e-05, "loss": 0.9271, "step": 203140 }, { "epoch": 1.2978674469417222, "grad_norm": 0.9455099105834961, "learning_rate": 2.748522347415753e-05, "loss": 0.9177, "step": 203150 }, { "epoch": 1.2979313340914609, "grad_norm": 0.8888641595840454, "learning_rate": 2.7480743399080912e-05, "loss": 1.0949, "step": 203160 }, { "epoch": 1.2979952212411996, "grad_norm": 0.8014704585075378, "learning_rate": 2.7476263550792703e-05, "loss": 0.776, "step": 203170 }, { "epoch": 1.2980591083909383, "grad_norm": 0.8085066080093384, "learning_rate": 2.747178392933799e-05, "loss": 1.003, "step": 203180 }, { "epoch": 1.298122995540677, "grad_norm": 1.0630215406417847, "learning_rate": 2.746730453476193e-05, "loss": 0.9934, "step": 203190 }, { "epoch": 1.2981868826904157, "grad_norm": 0.7368289232254028, "learning_rate": 2.7462825367109574e-05, "loss": 0.9137, "step": 203200 }, { "epoch": 1.2982507698401544, "grad_norm": 0.7954838275909424, "learning_rate": 2.745834642642609e-05, "loss": 0.7045, "step": 203210 }, { "epoch": 1.2983146569898931, "grad_norm": 0.7497609853744507, "learning_rate": 2.7453867712756542e-05, "loss": 1.1313, "step": 203220 }, { "epoch": 1.2983785441396318, "grad_norm": 1.072345495223999, "learning_rate": 2.7449389226146066e-05, "loss": 0.8416, "step": 203230 }, { "epoch": 1.2984424312893705, "grad_norm": 0.7450870871543884, "learning_rate": 2.7444910966639735e-05, "loss": 0.8321, "step": 203240 }, { "epoch": 1.298506318439109, "grad_norm": 1.326153039932251, "learning_rate": 2.7440432934282677e-05, "loss": 0.9216, "step": 203250 }, { "epoch": 1.298570205588848, "grad_norm": 1.1570442914962769, "learning_rate": 2.7435955129119984e-05, "loss": 0.8476, "step": 203260 }, { "epoch": 1.2986340927385864, "grad_norm": 1.0663647651672363, "learning_rate": 2.7431477551196716e-05, "loss": 0.9252, "step": 203270 }, { "epoch": 1.2986979798883254, "grad_norm": 0.7784600257873535, "learning_rate": 2.742700020055802e-05, "loss": 0.9039, "step": 203280 }, { "epoch": 1.2987618670380638, "grad_norm": 1.4422059059143066, "learning_rate": 2.742252307724893e-05, "loss": 0.875, "step": 203290 }, { "epoch": 1.2988257541878028, "grad_norm": 1.8490077257156372, "learning_rate": 2.7418046181314595e-05, "loss": 1.1439, "step": 203300 }, { "epoch": 1.2988896413375413, "grad_norm": 0.5135200619697571, "learning_rate": 2.7413569512800048e-05, "loss": 1.1235, "step": 203310 }, { "epoch": 1.2989535284872802, "grad_norm": 1.370787501335144, "learning_rate": 2.7409093071750415e-05, "loss": 0.7495, "step": 203320 }, { "epoch": 1.2990174156370187, "grad_norm": 1.1378347873687744, "learning_rate": 2.7404616858210742e-05, "loss": 0.7128, "step": 203330 }, { "epoch": 1.2990813027867576, "grad_norm": 0.702448308467865, "learning_rate": 2.7400140872226145e-05, "loss": 0.6843, "step": 203340 }, { "epoch": 1.299145189936496, "grad_norm": 0.7693853378295898, "learning_rate": 2.7395665113841655e-05, "loss": 1.0961, "step": 203350 }, { "epoch": 1.299209077086235, "grad_norm": 0.9960201978683472, "learning_rate": 2.7391189583102393e-05, "loss": 1.2116, "step": 203360 }, { "epoch": 1.2992729642359735, "grad_norm": 1.000061273574829, "learning_rate": 2.73867142800534e-05, "loss": 1.019, "step": 203370 }, { "epoch": 1.2993368513857122, "grad_norm": 0.744613766670227, "learning_rate": 2.7382239204739752e-05, "loss": 0.8507, "step": 203380 }, { "epoch": 1.299400738535451, "grad_norm": 1.8533433675765991, "learning_rate": 2.7377764357206542e-05, "loss": 0.6657, "step": 203390 }, { "epoch": 1.2994646256851896, "grad_norm": 0.7121914029121399, "learning_rate": 2.7373289737498798e-05, "loss": 1.226, "step": 203400 }, { "epoch": 1.2995285128349283, "grad_norm": 0.8955390453338623, "learning_rate": 2.736881534566162e-05, "loss": 0.8282, "step": 203410 }, { "epoch": 1.299592399984667, "grad_norm": 1.5612226724624634, "learning_rate": 2.7364341181740027e-05, "loss": 0.7724, "step": 203420 }, { "epoch": 1.2996562871344057, "grad_norm": 0.8913554549217224, "learning_rate": 2.7359867245779124e-05, "loss": 0.7936, "step": 203430 }, { "epoch": 1.2997201742841444, "grad_norm": 0.7717186808586121, "learning_rate": 2.7355393537823925e-05, "loss": 1.1769, "step": 203440 }, { "epoch": 1.2997840614338831, "grad_norm": 1.203664779663086, "learning_rate": 2.7350920057919527e-05, "loss": 0.7913, "step": 203450 }, { "epoch": 1.2998479485836218, "grad_norm": 1.1021778583526611, "learning_rate": 2.7346446806110938e-05, "loss": 0.7538, "step": 203460 }, { "epoch": 1.2999118357333606, "grad_norm": 1.160457730293274, "learning_rate": 2.7341973782443242e-05, "loss": 0.7314, "step": 203470 }, { "epoch": 1.2999757228830993, "grad_norm": 0.8621507287025452, "learning_rate": 2.7337500986961452e-05, "loss": 0.7591, "step": 203480 }, { "epoch": 1.300039610032838, "grad_norm": 0.8076770901679993, "learning_rate": 2.7333028419710654e-05, "loss": 0.8353, "step": 203490 }, { "epoch": 1.3001034971825767, "grad_norm": 0.6418294310569763, "learning_rate": 2.732855608073585e-05, "loss": 0.8851, "step": 203500 }, { "epoch": 1.3001673843323154, "grad_norm": 0.958096444606781, "learning_rate": 2.7324083970082127e-05, "loss": 0.8157, "step": 203510 }, { "epoch": 1.300231271482054, "grad_norm": 0.9913421869277954, "learning_rate": 2.7319612087794465e-05, "loss": 0.7058, "step": 203520 }, { "epoch": 1.3002951586317928, "grad_norm": 0.5462644100189209, "learning_rate": 2.731514043391795e-05, "loss": 0.8737, "step": 203530 }, { "epoch": 1.3003590457815315, "grad_norm": 0.8152220845222473, "learning_rate": 2.7310669008497585e-05, "loss": 0.9175, "step": 203540 }, { "epoch": 1.3004229329312702, "grad_norm": 0.9578920602798462, "learning_rate": 2.7306197811578426e-05, "loss": 0.9223, "step": 203550 }, { "epoch": 1.300486820081009, "grad_norm": 0.9416980743408203, "learning_rate": 2.730172684320547e-05, "loss": 0.7785, "step": 203560 }, { "epoch": 1.3005507072307476, "grad_norm": 1.7427188158035278, "learning_rate": 2.7297256103423763e-05, "loss": 0.7719, "step": 203570 }, { "epoch": 1.3006145943804863, "grad_norm": 0.680845320224762, "learning_rate": 2.729278559227835e-05, "loss": 0.6889, "step": 203580 }, { "epoch": 1.300678481530225, "grad_norm": 0.7768273949623108, "learning_rate": 2.7288315309814205e-05, "loss": 1.0102, "step": 203590 }, { "epoch": 1.3007423686799637, "grad_norm": 1.1281657218933105, "learning_rate": 2.728384525607639e-05, "loss": 0.8399, "step": 203600 }, { "epoch": 1.3008062558297024, "grad_norm": 1.1821376085281372, "learning_rate": 2.7279375431109894e-05, "loss": 0.8216, "step": 203610 }, { "epoch": 1.3008701429794411, "grad_norm": 0.8288279175758362, "learning_rate": 2.7274905834959762e-05, "loss": 0.7453, "step": 203620 }, { "epoch": 1.3009340301291799, "grad_norm": 0.7814729809761047, "learning_rate": 2.7270436467670967e-05, "loss": 0.9919, "step": 203630 }, { "epoch": 1.3009979172789186, "grad_norm": 0.95287024974823, "learning_rate": 2.7265967329288557e-05, "loss": 0.8718, "step": 203640 }, { "epoch": 1.3010618044286573, "grad_norm": 1.3219366073608398, "learning_rate": 2.7261498419857513e-05, "loss": 1.092, "step": 203650 }, { "epoch": 1.301125691578396, "grad_norm": 0.7606185078620911, "learning_rate": 2.7257029739422855e-05, "loss": 0.995, "step": 203660 }, { "epoch": 1.3011895787281347, "grad_norm": 1.000482201576233, "learning_rate": 2.7252561288029577e-05, "loss": 0.8025, "step": 203670 }, { "epoch": 1.3012534658778734, "grad_norm": 1.1020547151565552, "learning_rate": 2.72480930657227e-05, "loss": 0.7672, "step": 203680 }, { "epoch": 1.301317353027612, "grad_norm": 1.0821468830108643, "learning_rate": 2.7243625072547196e-05, "loss": 0.6712, "step": 203690 }, { "epoch": 1.3013812401773508, "grad_norm": 0.9761326313018799, "learning_rate": 2.7239157308548093e-05, "loss": 0.96, "step": 203700 }, { "epoch": 1.3014451273270895, "grad_norm": 0.9703685641288757, "learning_rate": 2.723468977377034e-05, "loss": 0.6992, "step": 203710 }, { "epoch": 1.3015090144768282, "grad_norm": 0.81026691198349, "learning_rate": 2.7230222468258982e-05, "loss": 0.8019, "step": 203720 }, { "epoch": 1.301572901626567, "grad_norm": 1.1991046667099, "learning_rate": 2.7225755392058977e-05, "loss": 1.0236, "step": 203730 }, { "epoch": 1.3016367887763054, "grad_norm": 0.9381265044212341, "learning_rate": 2.72212885452153e-05, "loss": 0.6681, "step": 203740 }, { "epoch": 1.3017006759260443, "grad_norm": 0.7405054569244385, "learning_rate": 2.7216821927772972e-05, "loss": 0.8605, "step": 203750 }, { "epoch": 1.3017645630757828, "grad_norm": 1.3777233362197876, "learning_rate": 2.7212355539776947e-05, "loss": 1.4742, "step": 203760 }, { "epoch": 1.3018284502255217, "grad_norm": 1.1950933933258057, "learning_rate": 2.7207889381272224e-05, "loss": 0.794, "step": 203770 }, { "epoch": 1.3018923373752602, "grad_norm": 0.6299750804901123, "learning_rate": 2.720342345230376e-05, "loss": 0.6164, "step": 203780 }, { "epoch": 1.3019562245249992, "grad_norm": 0.9684973955154419, "learning_rate": 2.7198957752916566e-05, "loss": 1.3586, "step": 203790 }, { "epoch": 1.3020201116747376, "grad_norm": 0.8507335782051086, "learning_rate": 2.7194492283155566e-05, "loss": 0.9001, "step": 203800 }, { "epoch": 1.3020839988244766, "grad_norm": 1.953037977218628, "learning_rate": 2.719002704306578e-05, "loss": 1.129, "step": 203810 }, { "epoch": 1.302147885974215, "grad_norm": 0.7484040260314941, "learning_rate": 2.7185562032692137e-05, "loss": 1.016, "step": 203820 }, { "epoch": 1.302211773123954, "grad_norm": 1.1943165063858032, "learning_rate": 2.7181097252079636e-05, "loss": 0.9488, "step": 203830 }, { "epoch": 1.3022756602736925, "grad_norm": 1.1450726985931396, "learning_rate": 2.7176632701273215e-05, "loss": 0.8312, "step": 203840 }, { "epoch": 1.3023395474234314, "grad_norm": 0.670720636844635, "learning_rate": 2.7172168380317864e-05, "loss": 0.9008, "step": 203850 }, { "epoch": 1.3024034345731699, "grad_norm": 1.0023084878921509, "learning_rate": 2.7167704289258504e-05, "loss": 0.8328, "step": 203860 }, { "epoch": 1.3024673217229086, "grad_norm": 0.6697094440460205, "learning_rate": 2.7163240428140136e-05, "loss": 0.6307, "step": 203870 }, { "epoch": 1.3025312088726473, "grad_norm": 0.6451807022094727, "learning_rate": 2.715877679700768e-05, "loss": 0.7335, "step": 203880 }, { "epoch": 1.302595096022386, "grad_norm": 1.0496819019317627, "learning_rate": 2.7154313395906118e-05, "loss": 0.7024, "step": 203890 }, { "epoch": 1.3026589831721247, "grad_norm": 1.360062599182129, "learning_rate": 2.714985022488036e-05, "loss": 0.8303, "step": 203900 }, { "epoch": 1.3027228703218634, "grad_norm": 1.0181471109390259, "learning_rate": 2.7145387283975375e-05, "loss": 0.9936, "step": 203910 }, { "epoch": 1.302786757471602, "grad_norm": 0.9711088538169861, "learning_rate": 2.7140924573236142e-05, "loss": 0.8272, "step": 203920 }, { "epoch": 1.3028506446213408, "grad_norm": 1.731081247329712, "learning_rate": 2.7136462092707556e-05, "loss": 0.9242, "step": 203930 }, { "epoch": 1.3029145317710795, "grad_norm": 1.1637697219848633, "learning_rate": 2.7131999842434586e-05, "loss": 1.0044, "step": 203940 }, { "epoch": 1.3029784189208182, "grad_norm": 0.9074548482894897, "learning_rate": 2.7127537822462146e-05, "loss": 0.7322, "step": 203950 }, { "epoch": 1.303042306070557, "grad_norm": 1.384960651397705, "learning_rate": 2.7123076032835214e-05, "loss": 1.0899, "step": 203960 }, { "epoch": 1.3031061932202956, "grad_norm": 1.675883412361145, "learning_rate": 2.711861447359867e-05, "loss": 0.9829, "step": 203970 }, { "epoch": 1.3031700803700343, "grad_norm": 1.1326029300689697, "learning_rate": 2.7114153144797494e-05, "loss": 0.7603, "step": 203980 }, { "epoch": 1.303233967519773, "grad_norm": 0.8392053842544556, "learning_rate": 2.7109692046476576e-05, "loss": 0.947, "step": 203990 }, { "epoch": 1.3032978546695118, "grad_norm": 0.8956617712974548, "learning_rate": 2.7105231178680883e-05, "loss": 0.9789, "step": 204000 }, { "epoch": 1.3033617418192505, "grad_norm": 0.7766060829162598, "learning_rate": 2.71007705414553e-05, "loss": 0.7753, "step": 204010 }, { "epoch": 1.3034256289689892, "grad_norm": 0.7224856019020081, "learning_rate": 2.7096310134844783e-05, "loss": 0.815, "step": 204020 }, { "epoch": 1.3034895161187279, "grad_norm": 1.0599384307861328, "learning_rate": 2.7091849958894223e-05, "loss": 0.866, "step": 204030 }, { "epoch": 1.3035534032684666, "grad_norm": 2.4030120372772217, "learning_rate": 2.7087390013648574e-05, "loss": 0.8354, "step": 204040 }, { "epoch": 1.3036172904182053, "grad_norm": 1.1198670864105225, "learning_rate": 2.7082930299152704e-05, "loss": 0.6503, "step": 204050 }, { "epoch": 1.303681177567944, "grad_norm": 1.2229379415512085, "learning_rate": 2.7078470815451574e-05, "loss": 1.0611, "step": 204060 }, { "epoch": 1.3037450647176827, "grad_norm": 0.9799173474311829, "learning_rate": 2.7074011562590053e-05, "loss": 0.837, "step": 204070 }, { "epoch": 1.3038089518674214, "grad_norm": 1.169978380203247, "learning_rate": 2.7069552540613084e-05, "loss": 1.0953, "step": 204080 }, { "epoch": 1.3038728390171601, "grad_norm": 0.8275418877601624, "learning_rate": 2.706509374956555e-05, "loss": 0.8888, "step": 204090 }, { "epoch": 1.3039367261668988, "grad_norm": 0.7325239777565002, "learning_rate": 2.7060635189492355e-05, "loss": 0.6354, "step": 204100 }, { "epoch": 1.3040006133166375, "grad_norm": 1.0800108909606934, "learning_rate": 2.705617686043843e-05, "loss": 0.8502, "step": 204110 }, { "epoch": 1.3040645004663762, "grad_norm": 1.2910447120666504, "learning_rate": 2.705171876244864e-05, "loss": 0.7463, "step": 204120 }, { "epoch": 1.304128387616115, "grad_norm": 0.8230818510055542, "learning_rate": 2.7047260895567906e-05, "loss": 0.7637, "step": 204130 }, { "epoch": 1.3041922747658536, "grad_norm": 1.0600754022598267, "learning_rate": 2.70428032598411e-05, "loss": 0.8405, "step": 204140 }, { "epoch": 1.3042561619155923, "grad_norm": 1.061686635017395, "learning_rate": 2.703834585531314e-05, "loss": 0.8679, "step": 204150 }, { "epoch": 1.304320049065331, "grad_norm": 0.8024836778640747, "learning_rate": 2.7033888682028885e-05, "loss": 1.0255, "step": 204160 }, { "epoch": 1.3043839362150698, "grad_norm": 0.7017011642456055, "learning_rate": 2.7029431740033252e-05, "loss": 0.5799, "step": 204170 }, { "epoch": 1.3044478233648085, "grad_norm": 0.8407653570175171, "learning_rate": 2.7024975029371102e-05, "loss": 0.6548, "step": 204180 }, { "epoch": 1.3045117105145472, "grad_norm": 1.2621287107467651, "learning_rate": 2.702051855008735e-05, "loss": 0.821, "step": 204190 }, { "epoch": 1.3045755976642859, "grad_norm": 0.8817234039306641, "learning_rate": 2.701606230222683e-05, "loss": 0.8289, "step": 204200 }, { "epoch": 1.3046394848140246, "grad_norm": 1.5565587282180786, "learning_rate": 2.7011606285834467e-05, "loss": 1.0017, "step": 204210 }, { "epoch": 1.3047033719637633, "grad_norm": 0.8994980454444885, "learning_rate": 2.7007150500955124e-05, "loss": 1.0877, "step": 204220 }, { "epoch": 1.3047672591135018, "grad_norm": 1.5029292106628418, "learning_rate": 2.7002694947633643e-05, "loss": 0.7565, "step": 204230 }, { "epoch": 1.3048311462632407, "grad_norm": 0.6795468330383301, "learning_rate": 2.6998239625914934e-05, "loss": 0.7578, "step": 204240 }, { "epoch": 1.3048950334129792, "grad_norm": 2.024456262588501, "learning_rate": 2.6993784535843842e-05, "loss": 0.8655, "step": 204250 }, { "epoch": 1.3049589205627181, "grad_norm": 1.2158528566360474, "learning_rate": 2.6989329677465257e-05, "loss": 0.7881, "step": 204260 }, { "epoch": 1.3050228077124566, "grad_norm": 1.367469310760498, "learning_rate": 2.698487505082401e-05, "loss": 0.8068, "step": 204270 }, { "epoch": 1.3050866948621955, "grad_norm": 0.9547830820083618, "learning_rate": 2.6980420655965e-05, "loss": 1.0195, "step": 204280 }, { "epoch": 1.305150582011934, "grad_norm": 1.4500901699066162, "learning_rate": 2.6975966492933047e-05, "loss": 0.8647, "step": 204290 }, { "epoch": 1.305214469161673, "grad_norm": 0.7481394410133362, "learning_rate": 2.697151256177306e-05, "loss": 0.9651, "step": 204300 }, { "epoch": 1.3052783563114114, "grad_norm": 1.3653556108474731, "learning_rate": 2.696705886252984e-05, "loss": 0.8868, "step": 204310 }, { "epoch": 1.3053422434611504, "grad_norm": 0.5968084335327148, "learning_rate": 2.6962605395248276e-05, "loss": 1.1226, "step": 204320 }, { "epoch": 1.3054061306108888, "grad_norm": 0.7193194627761841, "learning_rate": 2.6958152159973195e-05, "loss": 0.8533, "step": 204330 }, { "epoch": 1.3054700177606278, "grad_norm": 0.8077860474586487, "learning_rate": 2.6953699156749467e-05, "loss": 0.7623, "step": 204340 }, { "epoch": 1.3055339049103662, "grad_norm": 1.3645210266113281, "learning_rate": 2.6949246385621912e-05, "loss": 0.7674, "step": 204350 }, { "epoch": 1.305597792060105, "grad_norm": 0.9736807942390442, "learning_rate": 2.694479384663541e-05, "loss": 0.8348, "step": 204360 }, { "epoch": 1.3056616792098437, "grad_norm": 0.9883267283439636, "learning_rate": 2.694034153983475e-05, "loss": 0.8904, "step": 204370 }, { "epoch": 1.3057255663595824, "grad_norm": 1.2597261667251587, "learning_rate": 2.693588946526483e-05, "loss": 0.7717, "step": 204380 }, { "epoch": 1.305789453509321, "grad_norm": 1.2701901197433472, "learning_rate": 2.6931437622970434e-05, "loss": 0.9456, "step": 204390 }, { "epoch": 1.3058533406590598, "grad_norm": 0.84967440366745, "learning_rate": 2.6926986012996436e-05, "loss": 0.7379, "step": 204400 }, { "epoch": 1.3059172278087985, "grad_norm": 0.866371750831604, "learning_rate": 2.692253463538763e-05, "loss": 1.0045, "step": 204410 }, { "epoch": 1.3059811149585372, "grad_norm": 0.6633437871932983, "learning_rate": 2.6918083490188865e-05, "loss": 0.8149, "step": 204420 }, { "epoch": 1.306045002108276, "grad_norm": 1.0880591869354248, "learning_rate": 2.6913632577444993e-05, "loss": 0.7891, "step": 204430 }, { "epoch": 1.3061088892580146, "grad_norm": 0.9105168581008911, "learning_rate": 2.6909181897200785e-05, "loss": 0.7461, "step": 204440 }, { "epoch": 1.3061727764077533, "grad_norm": 1.9122934341430664, "learning_rate": 2.690473144950112e-05, "loss": 0.7552, "step": 204450 }, { "epoch": 1.306236663557492, "grad_norm": 0.7733789682388306, "learning_rate": 2.6900281234390763e-05, "loss": 0.7394, "step": 204460 }, { "epoch": 1.3063005507072307, "grad_norm": 0.8355092406272888, "learning_rate": 2.6895831251914584e-05, "loss": 0.8982, "step": 204470 }, { "epoch": 1.3063644378569694, "grad_norm": 0.7982395887374878, "learning_rate": 2.6891381502117346e-05, "loss": 0.7494, "step": 204480 }, { "epoch": 1.3064283250067081, "grad_norm": 1.543968677520752, "learning_rate": 2.688693198504391e-05, "loss": 0.8552, "step": 204490 }, { "epoch": 1.3064922121564468, "grad_norm": 0.8467128276824951, "learning_rate": 2.6882482700739047e-05, "loss": 0.7735, "step": 204500 }, { "epoch": 1.3065560993061855, "grad_norm": 0.7143219709396362, "learning_rate": 2.6878033649247603e-05, "loss": 0.9257, "step": 204510 }, { "epoch": 1.3066199864559243, "grad_norm": 0.6512458324432373, "learning_rate": 2.687358483061434e-05, "loss": 0.886, "step": 204520 }, { "epoch": 1.306683873605663, "grad_norm": 1.0182723999023438, "learning_rate": 2.6869136244884108e-05, "loss": 0.7258, "step": 204530 }, { "epoch": 1.3067477607554017, "grad_norm": 0.6728656888008118, "learning_rate": 2.6864687892101658e-05, "loss": 0.8172, "step": 204540 }, { "epoch": 1.3068116479051404, "grad_norm": 3.287576675415039, "learning_rate": 2.6860239772311845e-05, "loss": 0.6821, "step": 204550 }, { "epoch": 1.306875535054879, "grad_norm": 0.7472635507583618, "learning_rate": 2.685579188555941e-05, "loss": 0.9219, "step": 204560 }, { "epoch": 1.3069394222046178, "grad_norm": 0.9223730564117432, "learning_rate": 2.6851344231889196e-05, "loss": 0.7348, "step": 204570 }, { "epoch": 1.3070033093543565, "grad_norm": 0.9666236042976379, "learning_rate": 2.6846896811345956e-05, "loss": 1.095, "step": 204580 }, { "epoch": 1.3070671965040952, "grad_norm": 0.946058452129364, "learning_rate": 2.6842449623974508e-05, "loss": 0.8144, "step": 204590 }, { "epoch": 1.307131083653834, "grad_norm": 0.9167945981025696, "learning_rate": 2.6838002669819616e-05, "loss": 1.1801, "step": 204600 }, { "epoch": 1.3071949708035726, "grad_norm": 1.565016746520996, "learning_rate": 2.6833555948926088e-05, "loss": 0.8639, "step": 204610 }, { "epoch": 1.3072588579533113, "grad_norm": 1.3720842599868774, "learning_rate": 2.6829109461338675e-05, "loss": 0.6832, "step": 204620 }, { "epoch": 1.30732274510305, "grad_norm": 0.6299474835395813, "learning_rate": 2.6824663207102174e-05, "loss": 0.6936, "step": 204630 }, { "epoch": 1.3073866322527887, "grad_norm": 5.6102752685546875, "learning_rate": 2.6820217186261387e-05, "loss": 0.8758, "step": 204640 }, { "epoch": 1.3074505194025274, "grad_norm": 0.8517172336578369, "learning_rate": 2.6815771398861044e-05, "loss": 0.8169, "step": 204650 }, { "epoch": 1.3075144065522661, "grad_norm": 0.85049968957901, "learning_rate": 2.6811325844945957e-05, "loss": 0.8638, "step": 204660 }, { "epoch": 1.3075782937020048, "grad_norm": 1.035893201828003, "learning_rate": 2.680688052456086e-05, "loss": 0.7654, "step": 204670 }, { "epoch": 1.3076421808517436, "grad_norm": 1.053261399269104, "learning_rate": 2.6802435437750573e-05, "loss": 0.8929, "step": 204680 }, { "epoch": 1.3077060680014823, "grad_norm": 0.8119083046913147, "learning_rate": 2.6797990584559794e-05, "loss": 0.825, "step": 204690 }, { "epoch": 1.307769955151221, "grad_norm": 1.3222981691360474, "learning_rate": 2.6793545965033375e-05, "loss": 0.7915, "step": 204700 }, { "epoch": 1.3078338423009597, "grad_norm": 1.0780036449432373, "learning_rate": 2.678910157921598e-05, "loss": 0.7689, "step": 204710 }, { "epoch": 1.3078977294506982, "grad_norm": 0.8080828785896301, "learning_rate": 2.6784657427152427e-05, "loss": 0.8974, "step": 204720 }, { "epoch": 1.307961616600437, "grad_norm": 1.2234289646148682, "learning_rate": 2.678021350888744e-05, "loss": 0.7415, "step": 204730 }, { "epoch": 1.3080255037501756, "grad_norm": 2.8002803325653076, "learning_rate": 2.67757698244658e-05, "loss": 0.8183, "step": 204740 }, { "epoch": 1.3080893908999145, "grad_norm": 1.115430474281311, "learning_rate": 2.677132637393226e-05, "loss": 1.0415, "step": 204750 }, { "epoch": 1.308153278049653, "grad_norm": 1.0875638723373413, "learning_rate": 2.6766883157331545e-05, "loss": 0.8105, "step": 204760 }, { "epoch": 1.308217165199392, "grad_norm": 1.2501674890518188, "learning_rate": 2.6762440174708432e-05, "loss": 0.8226, "step": 204770 }, { "epoch": 1.3082810523491304, "grad_norm": 0.7301515340805054, "learning_rate": 2.6757997426107627e-05, "loss": 0.8192, "step": 204780 }, { "epoch": 1.3083449394988693, "grad_norm": 1.4526420831680298, "learning_rate": 2.675355491157392e-05, "loss": 0.9808, "step": 204790 }, { "epoch": 1.3084088266486078, "grad_norm": 1.0374690294265747, "learning_rate": 2.6749112631152005e-05, "loss": 0.8933, "step": 204800 }, { "epoch": 1.3084727137983467, "grad_norm": 1.1937415599822998, "learning_rate": 2.6744670584886665e-05, "loss": 0.6994, "step": 204810 }, { "epoch": 1.3085366009480852, "grad_norm": 0.8651727437973022, "learning_rate": 2.6740228772822583e-05, "loss": 0.8431, "step": 204820 }, { "epoch": 1.308600488097824, "grad_norm": 0.766620397567749, "learning_rate": 2.6735787195004545e-05, "loss": 0.9116, "step": 204830 }, { "epoch": 1.3086643752475626, "grad_norm": 2.3932688236236572, "learning_rate": 2.6731345851477237e-05, "loss": 0.937, "step": 204840 }, { "epoch": 1.3087282623973013, "grad_norm": 0.8344107866287231, "learning_rate": 2.6726904742285425e-05, "loss": 0.7876, "step": 204850 }, { "epoch": 1.30879214954704, "grad_norm": 0.6391538381576538, "learning_rate": 2.6722463867473796e-05, "loss": 1.1981, "step": 204860 }, { "epoch": 1.3088560366967787, "grad_norm": 0.9423249363899231, "learning_rate": 2.6718023227087114e-05, "loss": 1.0876, "step": 204870 }, { "epoch": 1.3089199238465175, "grad_norm": 2.713463306427002, "learning_rate": 2.671358282117006e-05, "loss": 0.8284, "step": 204880 }, { "epoch": 1.3089838109962562, "grad_norm": 0.9338700175285339, "learning_rate": 2.6709142649767393e-05, "loss": 0.7454, "step": 204890 }, { "epoch": 1.3090476981459949, "grad_norm": 0.5116655230522156, "learning_rate": 2.6704702712923783e-05, "loss": 0.6845, "step": 204900 }, { "epoch": 1.3091115852957336, "grad_norm": 1.5850673913955688, "learning_rate": 2.6700263010684e-05, "loss": 1.094, "step": 204910 }, { "epoch": 1.3091754724454723, "grad_norm": 0.7147300839424133, "learning_rate": 2.6695823543092703e-05, "loss": 0.7666, "step": 204920 }, { "epoch": 1.309239359595211, "grad_norm": 0.9105631709098816, "learning_rate": 2.6691384310194632e-05, "loss": 1.0769, "step": 204930 }, { "epoch": 1.3093032467449497, "grad_norm": 0.7940059304237366, "learning_rate": 2.6686945312034483e-05, "loss": 0.7876, "step": 204940 }, { "epoch": 1.3093671338946884, "grad_norm": 0.9063543081283569, "learning_rate": 2.6682506548656956e-05, "loss": 0.8683, "step": 204950 }, { "epoch": 1.309431021044427, "grad_norm": 0.8429856300354004, "learning_rate": 2.6678068020106777e-05, "loss": 0.8658, "step": 204960 }, { "epoch": 1.3094949081941658, "grad_norm": 0.8013402819633484, "learning_rate": 2.6673629726428616e-05, "loss": 1.0001, "step": 204970 }, { "epoch": 1.3095587953439045, "grad_norm": 1.4775573015213013, "learning_rate": 2.66691916676672e-05, "loss": 0.6973, "step": 204980 }, { "epoch": 1.3096226824936432, "grad_norm": 0.7938587665557861, "learning_rate": 2.6664753843867186e-05, "loss": 0.8371, "step": 204990 }, { "epoch": 1.309686569643382, "grad_norm": 1.0357427597045898, "learning_rate": 2.6660316255073313e-05, "loss": 0.6869, "step": 205000 }, { "epoch": 1.3097504567931206, "grad_norm": 1.279012680053711, "learning_rate": 2.6655878901330222e-05, "loss": 0.9997, "step": 205010 }, { "epoch": 1.3098143439428593, "grad_norm": 1.9539459943771362, "learning_rate": 2.665144178268265e-05, "loss": 0.7651, "step": 205020 }, { "epoch": 1.309878231092598, "grad_norm": 0.8567860126495361, "learning_rate": 2.6647004899175244e-05, "loss": 0.8133, "step": 205030 }, { "epoch": 1.3099421182423368, "grad_norm": 0.858010470867157, "learning_rate": 2.664256825085271e-05, "loss": 0.9452, "step": 205040 }, { "epoch": 1.3100060053920755, "grad_norm": 1.1622644662857056, "learning_rate": 2.6638131837759705e-05, "loss": 0.7909, "step": 205050 }, { "epoch": 1.3100698925418142, "grad_norm": 1.1447198390960693, "learning_rate": 2.663369565994095e-05, "loss": 1.0433, "step": 205060 }, { "epoch": 1.3101337796915529, "grad_norm": 1.075270175933838, "learning_rate": 2.6629259717441067e-05, "loss": 0.9877, "step": 205070 }, { "epoch": 1.3101976668412916, "grad_norm": 0.7950900197029114, "learning_rate": 2.6624824010304778e-05, "loss": 0.7474, "step": 205080 }, { "epoch": 1.3102615539910303, "grad_norm": 0.9394268989562988, "learning_rate": 2.662038853857671e-05, "loss": 0.8652, "step": 205090 }, { "epoch": 1.310325441140769, "grad_norm": 0.8892121911048889, "learning_rate": 2.661595330230159e-05, "loss": 0.628, "step": 205100 }, { "epoch": 1.3103893282905077, "grad_norm": 1.6581279039382935, "learning_rate": 2.6611518301524017e-05, "loss": 1.0087, "step": 205110 }, { "epoch": 1.3104532154402464, "grad_norm": 1.5723330974578857, "learning_rate": 2.6607083536288714e-05, "loss": 0.8922, "step": 205120 }, { "epoch": 1.310517102589985, "grad_norm": 1.2891085147857666, "learning_rate": 2.6602649006640297e-05, "loss": 1.057, "step": 205130 }, { "epoch": 1.3105809897397238, "grad_norm": 0.9596815705299377, "learning_rate": 2.6598214712623466e-05, "loss": 0.9955, "step": 205140 }, { "epoch": 1.3106448768894625, "grad_norm": 1.300654649734497, "learning_rate": 2.6593780654282836e-05, "loss": 0.6487, "step": 205150 }, { "epoch": 1.3107087640392012, "grad_norm": 0.7440917491912842, "learning_rate": 2.6589346831663086e-05, "loss": 1.0035, "step": 205160 }, { "epoch": 1.31077265118894, "grad_norm": 1.055013656616211, "learning_rate": 2.6584913244808886e-05, "loss": 0.8536, "step": 205170 }, { "epoch": 1.3108365383386786, "grad_norm": 0.8633022308349609, "learning_rate": 2.6580479893764842e-05, "loss": 0.8186, "step": 205180 }, { "epoch": 1.3109004254884173, "grad_norm": 0.9468743801116943, "learning_rate": 2.6576046778575673e-05, "loss": 1.0472, "step": 205190 }, { "epoch": 1.310964312638156, "grad_norm": 1.479753851890564, "learning_rate": 2.657161389928594e-05, "loss": 1.029, "step": 205200 }, { "epoch": 1.3110281997878945, "grad_norm": 0.9764959812164307, "learning_rate": 2.6567181255940333e-05, "loss": 0.7093, "step": 205210 }, { "epoch": 1.3110920869376335, "grad_norm": 0.9881764054298401, "learning_rate": 2.6563192078698378e-05, "loss": 1.1946, "step": 205220 }, { "epoch": 1.311155974087372, "grad_norm": 0.648379385471344, "learning_rate": 2.655875988376958e-05, "loss": 1.0451, "step": 205230 }, { "epoch": 1.3112198612371109, "grad_norm": 0.9674725532531738, "learning_rate": 2.6554327924914335e-05, "loss": 1.0126, "step": 205240 }, { "epoch": 1.3112837483868494, "grad_norm": 0.6536328792572021, "learning_rate": 2.6549896202177305e-05, "loss": 0.7331, "step": 205250 }, { "epoch": 1.3113476355365883, "grad_norm": 0.9326198101043701, "learning_rate": 2.654546471560309e-05, "loss": 0.8201, "step": 205260 }, { "epoch": 1.3114115226863268, "grad_norm": 0.8836464285850525, "learning_rate": 2.6541033465236335e-05, "loss": 0.7147, "step": 205270 }, { "epoch": 1.3114754098360657, "grad_norm": 0.9043899178504944, "learning_rate": 2.653660245112169e-05, "loss": 0.8064, "step": 205280 }, { "epoch": 1.3115392969858042, "grad_norm": 0.5837283134460449, "learning_rate": 2.6532171673303736e-05, "loss": 1.2369, "step": 205290 }, { "epoch": 1.311603184135543, "grad_norm": 1.130934238433838, "learning_rate": 2.652774113182713e-05, "loss": 0.8727, "step": 205300 }, { "epoch": 1.3116670712852816, "grad_norm": 1.0953987836837769, "learning_rate": 2.652331082673647e-05, "loss": 1.0385, "step": 205310 }, { "epoch": 1.3117309584350203, "grad_norm": 0.9843935966491699, "learning_rate": 2.651888075807639e-05, "loss": 0.8724, "step": 205320 }, { "epoch": 1.311794845584759, "grad_norm": 1.0219281911849976, "learning_rate": 2.6514450925891476e-05, "loss": 0.8231, "step": 205330 }, { "epoch": 1.3118587327344977, "grad_norm": 0.6493175625801086, "learning_rate": 2.65100213302264e-05, "loss": 1.2553, "step": 205340 }, { "epoch": 1.3119226198842364, "grad_norm": 1.3615765571594238, "learning_rate": 2.6505591971125694e-05, "loss": 1.0284, "step": 205350 }, { "epoch": 1.3119865070339751, "grad_norm": 1.0743480920791626, "learning_rate": 2.650116284863402e-05, "loss": 0.8086, "step": 205360 }, { "epoch": 1.3120503941837138, "grad_norm": 0.7754031419754028, "learning_rate": 2.6496733962795944e-05, "loss": 0.8282, "step": 205370 }, { "epoch": 1.3121142813334525, "grad_norm": 1.0241520404815674, "learning_rate": 2.6492305313656108e-05, "loss": 1.0423, "step": 205380 }, { "epoch": 1.3121781684831912, "grad_norm": 0.8662487864494324, "learning_rate": 2.648787690125908e-05, "loss": 0.9171, "step": 205390 }, { "epoch": 1.31224205563293, "grad_norm": 1.0339150428771973, "learning_rate": 2.6483448725649473e-05, "loss": 0.9033, "step": 205400 }, { "epoch": 1.3123059427826687, "grad_norm": 0.7531540393829346, "learning_rate": 2.64790207868719e-05, "loss": 0.8135, "step": 205410 }, { "epoch": 1.3123698299324074, "grad_norm": 0.8095221519470215, "learning_rate": 2.6474593084970913e-05, "loss": 0.7887, "step": 205420 }, { "epoch": 1.312433717082146, "grad_norm": 1.9569956064224243, "learning_rate": 2.647016561999115e-05, "loss": 0.6692, "step": 205430 }, { "epoch": 1.3124976042318848, "grad_norm": 1.539527177810669, "learning_rate": 2.6465738391977158e-05, "loss": 1.0463, "step": 205440 }, { "epoch": 1.3125614913816235, "grad_norm": 0.9961333870887756, "learning_rate": 2.6461311400973553e-05, "loss": 0.868, "step": 205450 }, { "epoch": 1.3126253785313622, "grad_norm": 0.5948057174682617, "learning_rate": 2.6456884647024894e-05, "loss": 0.6536, "step": 205460 }, { "epoch": 1.312689265681101, "grad_norm": 1.2141871452331543, "learning_rate": 2.6452458130175784e-05, "loss": 0.9659, "step": 205470 }, { "epoch": 1.3127531528308396, "grad_norm": 0.9608542919158936, "learning_rate": 2.644803185047078e-05, "loss": 0.7377, "step": 205480 }, { "epoch": 1.3128170399805783, "grad_norm": 1.4741836786270142, "learning_rate": 2.6443605807954492e-05, "loss": 0.9431, "step": 205490 }, { "epoch": 1.312880927130317, "grad_norm": 1.3734500408172607, "learning_rate": 2.6439180002671448e-05, "loss": 0.8823, "step": 205500 }, { "epoch": 1.3129448142800557, "grad_norm": 1.110561490058899, "learning_rate": 2.6434754434666264e-05, "loss": 0.8809, "step": 205510 }, { "epoch": 1.3130087014297944, "grad_norm": 0.6751860976219177, "learning_rate": 2.6430329103983475e-05, "loss": 0.9978, "step": 205520 }, { "epoch": 1.3130725885795331, "grad_norm": 0.9452330470085144, "learning_rate": 2.642590401066768e-05, "loss": 0.893, "step": 205530 }, { "epoch": 1.3131364757292718, "grad_norm": 1.2708438634872437, "learning_rate": 2.6421479154763406e-05, "loss": 0.8002, "step": 205540 }, { "epoch": 1.3132003628790105, "grad_norm": 1.1103713512420654, "learning_rate": 2.6417054536315254e-05, "loss": 0.832, "step": 205550 }, { "epoch": 1.3132642500287492, "grad_norm": 1.1159241199493408, "learning_rate": 2.6412630155367747e-05, "loss": 0.9068, "step": 205560 }, { "epoch": 1.313328137178488, "grad_norm": 1.3691654205322266, "learning_rate": 2.6408206011965474e-05, "loss": 0.8826, "step": 205570 }, { "epoch": 1.3133920243282267, "grad_norm": 0.7377268075942993, "learning_rate": 2.6403782106152964e-05, "loss": 0.774, "step": 205580 }, { "epoch": 1.3134559114779654, "grad_norm": 1.0901238918304443, "learning_rate": 2.6399358437974776e-05, "loss": 0.798, "step": 205590 }, { "epoch": 1.313519798627704, "grad_norm": 0.6787892580032349, "learning_rate": 2.6394935007475492e-05, "loss": 0.8506, "step": 205600 }, { "epoch": 1.3135836857774428, "grad_norm": 0.7032554149627686, "learning_rate": 2.639051181469961e-05, "loss": 0.8875, "step": 205610 }, { "epoch": 1.3136475729271815, "grad_norm": 0.7459577918052673, "learning_rate": 2.6386088859691714e-05, "loss": 0.9456, "step": 205620 }, { "epoch": 1.3137114600769202, "grad_norm": 1.2270710468292236, "learning_rate": 2.6381666142496324e-05, "loss": 0.8654, "step": 205630 }, { "epoch": 1.313775347226659, "grad_norm": 1.247017741203308, "learning_rate": 2.6377243663158e-05, "loss": 0.9573, "step": 205640 }, { "epoch": 1.3138392343763976, "grad_norm": 1.0125858783721924, "learning_rate": 2.6372821421721255e-05, "loss": 0.9125, "step": 205650 }, { "epoch": 1.3139031215261363, "grad_norm": 0.7184951901435852, "learning_rate": 2.6368399418230656e-05, "loss": 1.0898, "step": 205660 }, { "epoch": 1.313967008675875, "grad_norm": 0.9848329424858093, "learning_rate": 2.6363977652730698e-05, "loss": 1.1097, "step": 205670 }, { "epoch": 1.3140308958256135, "grad_norm": 1.3619470596313477, "learning_rate": 2.6359556125265948e-05, "loss": 0.9542, "step": 205680 }, { "epoch": 1.3140947829753524, "grad_norm": 1.8601772785186768, "learning_rate": 2.6355134835880912e-05, "loss": 1.1239, "step": 205690 }, { "epoch": 1.314158670125091, "grad_norm": 1.0715014934539795, "learning_rate": 2.6350713784620135e-05, "loss": 0.8293, "step": 205700 }, { "epoch": 1.3142225572748298, "grad_norm": 1.2173364162445068, "learning_rate": 2.634629297152811e-05, "loss": 0.6619, "step": 205710 }, { "epoch": 1.3142864444245683, "grad_norm": 0.6807637214660645, "learning_rate": 2.6341872396649404e-05, "loss": 0.8755, "step": 205720 }, { "epoch": 1.3143503315743073, "grad_norm": 1.259724736213684, "learning_rate": 2.6337452060028485e-05, "loss": 0.9458, "step": 205730 }, { "epoch": 1.3144142187240457, "grad_norm": 3.6456003189086914, "learning_rate": 2.6333031961709914e-05, "loss": 0.9435, "step": 205740 }, { "epoch": 1.3144781058737847, "grad_norm": 1.5074976682662964, "learning_rate": 2.632861210173817e-05, "loss": 0.8298, "step": 205750 }, { "epoch": 1.3145419930235231, "grad_norm": 1.0483545064926147, "learning_rate": 2.6324192480157793e-05, "loss": 0.8742, "step": 205760 }, { "epoch": 1.314605880173262, "grad_norm": 0.7130438089370728, "learning_rate": 2.6319773097013266e-05, "loss": 1.0824, "step": 205770 }, { "epoch": 1.3146697673230006, "grad_norm": 0.7423893809318542, "learning_rate": 2.631535395234913e-05, "loss": 0.9463, "step": 205780 }, { "epoch": 1.3147336544727395, "grad_norm": 0.800720751285553, "learning_rate": 2.6310935046209857e-05, "loss": 0.9104, "step": 205790 }, { "epoch": 1.314797541622478, "grad_norm": 1.0936715602874756, "learning_rate": 2.6306516378639957e-05, "loss": 0.7183, "step": 205800 }, { "epoch": 1.3148614287722167, "grad_norm": 1.0465011596679688, "learning_rate": 2.6302097949683947e-05, "loss": 0.7369, "step": 205810 }, { "epoch": 1.3149253159219554, "grad_norm": 0.7417481541633606, "learning_rate": 2.6297679759386318e-05, "loss": 0.7971, "step": 205820 }, { "epoch": 1.314989203071694, "grad_norm": 0.9712346792221069, "learning_rate": 2.629326180779156e-05, "loss": 0.5761, "step": 205830 }, { "epoch": 1.3150530902214328, "grad_norm": 1.158448576927185, "learning_rate": 2.6288844094944142e-05, "loss": 0.8257, "step": 205840 }, { "epoch": 1.3151169773711715, "grad_norm": 0.7787433862686157, "learning_rate": 2.62844266208886e-05, "loss": 0.7885, "step": 205850 }, { "epoch": 1.3151808645209102, "grad_norm": 1.6316027641296387, "learning_rate": 2.628000938566938e-05, "loss": 0.9431, "step": 205860 }, { "epoch": 1.315244751670649, "grad_norm": 1.5881561040878296, "learning_rate": 2.6275592389331004e-05, "loss": 0.5653, "step": 205870 }, { "epoch": 1.3153086388203876, "grad_norm": 1.7636117935180664, "learning_rate": 2.627117563191792e-05, "loss": 0.6936, "step": 205880 }, { "epoch": 1.3153725259701263, "grad_norm": 1.057289719581604, "learning_rate": 2.6266759113474644e-05, "loss": 0.6354, "step": 205890 }, { "epoch": 1.315436413119865, "grad_norm": 1.322198510169983, "learning_rate": 2.6262342834045617e-05, "loss": 0.964, "step": 205900 }, { "epoch": 1.3155003002696037, "grad_norm": 1.3690789937973022, "learning_rate": 2.6257926793675346e-05, "loss": 0.7329, "step": 205910 }, { "epoch": 1.3155641874193424, "grad_norm": 0.6922479271888733, "learning_rate": 2.625351099240828e-05, "loss": 0.8405, "step": 205920 }, { "epoch": 1.3156280745690812, "grad_norm": 0.8695868849754333, "learning_rate": 2.62490954302889e-05, "loss": 0.9644, "step": 205930 }, { "epoch": 1.3156919617188199, "grad_norm": 1.797025442123413, "learning_rate": 2.6244680107361695e-05, "loss": 0.8523, "step": 205940 }, { "epoch": 1.3157558488685586, "grad_norm": 0.9863460063934326, "learning_rate": 2.6240265023671096e-05, "loss": 0.754, "step": 205950 }, { "epoch": 1.3158197360182973, "grad_norm": 1.008402943611145, "learning_rate": 2.62358501792616e-05, "loss": 0.8783, "step": 205960 }, { "epoch": 1.315883623168036, "grad_norm": 3.882580518722534, "learning_rate": 2.623143557417763e-05, "loss": 0.753, "step": 205970 }, { "epoch": 1.3159475103177747, "grad_norm": 1.0112920999526978, "learning_rate": 2.622702120846369e-05, "loss": 0.8311, "step": 205980 }, { "epoch": 1.3160113974675134, "grad_norm": 1.3579504489898682, "learning_rate": 2.6222607082164196e-05, "loss": 1.0214, "step": 205990 }, { "epoch": 1.316075284617252, "grad_norm": 0.6648353934288025, "learning_rate": 2.6218193195323636e-05, "loss": 0.9964, "step": 206000 }, { "epoch": 1.3161391717669908, "grad_norm": 1.2353508472442627, "learning_rate": 2.6213779547986422e-05, "loss": 0.9256, "step": 206010 }, { "epoch": 1.3162030589167295, "grad_norm": 0.7448508143424988, "learning_rate": 2.6209366140197045e-05, "loss": 1.0117, "step": 206020 }, { "epoch": 1.3162669460664682, "grad_norm": 1.2980650663375854, "learning_rate": 2.620495297199992e-05, "loss": 0.8087, "step": 206030 }, { "epoch": 1.316330833216207, "grad_norm": 1.022688388824463, "learning_rate": 2.6200540043439516e-05, "loss": 1.1366, "step": 206040 }, { "epoch": 1.3163947203659456, "grad_norm": 1.5544421672821045, "learning_rate": 2.6196127354560252e-05, "loss": 0.7394, "step": 206050 }, { "epoch": 1.3164586075156843, "grad_norm": 1.0281933546066284, "learning_rate": 2.6191714905406596e-05, "loss": 0.8466, "step": 206060 }, { "epoch": 1.316522494665423, "grad_norm": 1.1553758382797241, "learning_rate": 2.6187302696022942e-05, "loss": 0.9573, "step": 206070 }, { "epoch": 1.3165863818151617, "grad_norm": 0.6869220733642578, "learning_rate": 2.6182890726453774e-05, "loss": 0.9941, "step": 206080 }, { "epoch": 1.3166502689649004, "grad_norm": 0.8922250866889954, "learning_rate": 2.6178478996743483e-05, "loss": 0.8216, "step": 206090 }, { "epoch": 1.3167141561146392, "grad_norm": 1.8326857089996338, "learning_rate": 2.6174067506936534e-05, "loss": 0.7848, "step": 206100 }, { "epoch": 1.3167780432643779, "grad_norm": 1.1187126636505127, "learning_rate": 2.6169656257077323e-05, "loss": 0.8663, "step": 206110 }, { "epoch": 1.3168419304141166, "grad_norm": 1.288608193397522, "learning_rate": 2.6165245247210284e-05, "loss": 0.9236, "step": 206120 }, { "epoch": 1.3169058175638553, "grad_norm": 0.970199704170227, "learning_rate": 2.616083447737987e-05, "loss": 0.9909, "step": 206130 }, { "epoch": 1.316969704713594, "grad_norm": 1.1030552387237549, "learning_rate": 2.6156423947630448e-05, "loss": 0.8332, "step": 206140 }, { "epoch": 1.3170335918633327, "grad_norm": 1.9698622226715088, "learning_rate": 2.615201365800649e-05, "loss": 0.8564, "step": 206150 }, { "epoch": 1.3170974790130714, "grad_norm": 0.5434449315071106, "learning_rate": 2.6147603608552374e-05, "loss": 0.834, "step": 206160 }, { "epoch": 1.3171613661628099, "grad_norm": 0.7695097327232361, "learning_rate": 2.614319379931254e-05, "loss": 0.984, "step": 206170 }, { "epoch": 1.3172252533125488, "grad_norm": 0.7618531584739685, "learning_rate": 2.613878423033136e-05, "loss": 0.8815, "step": 206180 }, { "epoch": 1.3172891404622873, "grad_norm": 1.5356720685958862, "learning_rate": 2.613437490165328e-05, "loss": 0.7811, "step": 206190 }, { "epoch": 1.3173530276120262, "grad_norm": 0.938105583190918, "learning_rate": 2.6129965813322676e-05, "loss": 0.8789, "step": 206200 }, { "epoch": 1.3174169147617647, "grad_norm": 0.8890140652656555, "learning_rate": 2.612555696538399e-05, "loss": 1.1062, "step": 206210 }, { "epoch": 1.3174808019115036, "grad_norm": 0.8505265116691589, "learning_rate": 2.6121148357881582e-05, "loss": 1.0629, "step": 206220 }, { "epoch": 1.3175446890612421, "grad_norm": 0.7777497172355652, "learning_rate": 2.6116739990859884e-05, "loss": 0.8503, "step": 206230 }, { "epoch": 1.317608576210981, "grad_norm": 1.8511273860931396, "learning_rate": 2.611233186436326e-05, "loss": 0.874, "step": 206240 }, { "epoch": 1.3176724633607195, "grad_norm": 0.8401346206665039, "learning_rate": 2.6107923978436134e-05, "loss": 0.6313, "step": 206250 }, { "epoch": 1.3177363505104585, "grad_norm": 0.7148954272270203, "learning_rate": 2.6103516333122868e-05, "loss": 1.1435, "step": 206260 }, { "epoch": 1.317800237660197, "grad_norm": 1.3933757543563843, "learning_rate": 2.6099108928467885e-05, "loss": 0.8873, "step": 206270 }, { "epoch": 1.3178641248099359, "grad_norm": 0.9285461902618408, "learning_rate": 2.6094701764515528e-05, "loss": 0.8671, "step": 206280 }, { "epoch": 1.3179280119596744, "grad_norm": 0.5819171667098999, "learning_rate": 2.6090294841310227e-05, "loss": 1.0112, "step": 206290 }, { "epoch": 1.317991899109413, "grad_norm": 0.757918119430542, "learning_rate": 2.6085888158896328e-05, "loss": 0.814, "step": 206300 }, { "epoch": 1.3180557862591518, "grad_norm": 0.880970299243927, "learning_rate": 2.6081481717318236e-05, "loss": 0.7497, "step": 206310 }, { "epoch": 1.3181196734088905, "grad_norm": 1.6692066192626953, "learning_rate": 2.607707551662032e-05, "loss": 0.9548, "step": 206320 }, { "epoch": 1.3181835605586292, "grad_norm": 0.8854753971099854, "learning_rate": 2.607266955684693e-05, "loss": 1.0597, "step": 206330 }, { "epoch": 1.3182474477083679, "grad_norm": 0.6867426037788391, "learning_rate": 2.6068263838042473e-05, "loss": 0.7143, "step": 206340 }, { "epoch": 1.3183113348581066, "grad_norm": 0.73616623878479, "learning_rate": 2.6063858360251293e-05, "loss": 0.752, "step": 206350 }, { "epoch": 1.3183752220078453, "grad_norm": 1.009392261505127, "learning_rate": 2.6059453123517775e-05, "loss": 0.662, "step": 206360 }, { "epoch": 1.318439109157584, "grad_norm": 0.6935415267944336, "learning_rate": 2.6055048127886263e-05, "loss": 1.0914, "step": 206370 }, { "epoch": 1.3185029963073227, "grad_norm": 0.9365649819374084, "learning_rate": 2.605064337340115e-05, "loss": 0.7753, "step": 206380 }, { "epoch": 1.3185668834570614, "grad_norm": 1.1650468111038208, "learning_rate": 2.6046238860106754e-05, "loss": 0.7423, "step": 206390 }, { "epoch": 1.3186307706068001, "grad_norm": 1.0778311491012573, "learning_rate": 2.6041834588047475e-05, "loss": 0.9013, "step": 206400 }, { "epoch": 1.3186946577565388, "grad_norm": 0.8691341876983643, "learning_rate": 2.603743055726764e-05, "loss": 0.6258, "step": 206410 }, { "epoch": 1.3187585449062775, "grad_norm": 0.8959952592849731, "learning_rate": 2.603302676781162e-05, "loss": 0.9495, "step": 206420 }, { "epoch": 1.3188224320560162, "grad_norm": 0.8454727530479431, "learning_rate": 2.6028623219723734e-05, "loss": 1.005, "step": 206430 }, { "epoch": 1.318886319205755, "grad_norm": 1.0210155248641968, "learning_rate": 2.6024219913048353e-05, "loss": 1.0072, "step": 206440 }, { "epoch": 1.3189502063554936, "grad_norm": 1.145677089691162, "learning_rate": 2.601981684782984e-05, "loss": 1.0031, "step": 206450 }, { "epoch": 1.3190140935052324, "grad_norm": 1.3724578619003296, "learning_rate": 2.6015414024112494e-05, "loss": 0.9964, "step": 206460 }, { "epoch": 1.319077980654971, "grad_norm": 1.4045820236206055, "learning_rate": 2.6011011441940703e-05, "loss": 0.9096, "step": 206470 }, { "epoch": 1.3191418678047098, "grad_norm": 1.1010231971740723, "learning_rate": 2.6006609101358758e-05, "loss": 0.731, "step": 206480 }, { "epoch": 1.3192057549544485, "grad_norm": 0.9111044406890869, "learning_rate": 2.6002207002411038e-05, "loss": 0.9891, "step": 206490 }, { "epoch": 1.3192696421041872, "grad_norm": 1.2783539295196533, "learning_rate": 2.5997805145141834e-05, "loss": 0.8415, "step": 206500 }, { "epoch": 1.3193335292539259, "grad_norm": 1.8971495628356934, "learning_rate": 2.5993403529595518e-05, "loss": 0.746, "step": 206510 }, { "epoch": 1.3193974164036646, "grad_norm": 1.0325102806091309, "learning_rate": 2.5989002155816376e-05, "loss": 0.716, "step": 206520 }, { "epoch": 1.3194613035534033, "grad_norm": 0.8873870372772217, "learning_rate": 2.5984601023848776e-05, "loss": 0.8466, "step": 206530 }, { "epoch": 1.319525190703142, "grad_norm": 0.7020831108093262, "learning_rate": 2.5980200133736998e-05, "loss": 1.012, "step": 206540 }, { "epoch": 1.3195890778528807, "grad_norm": 1.0816673040390015, "learning_rate": 2.5975799485525404e-05, "loss": 1.2432, "step": 206550 }, { "epoch": 1.3196529650026194, "grad_norm": 1.4802582263946533, "learning_rate": 2.5971399079258273e-05, "loss": 0.7478, "step": 206560 }, { "epoch": 1.3197168521523581, "grad_norm": 0.6974547505378723, "learning_rate": 2.5966998914979964e-05, "loss": 0.7853, "step": 206570 }, { "epoch": 1.3197807393020968, "grad_norm": 1.2791091203689575, "learning_rate": 2.5962598992734743e-05, "loss": 1.0, "step": 206580 }, { "epoch": 1.3198446264518355, "grad_norm": 1.0931358337402344, "learning_rate": 2.5958199312566967e-05, "loss": 0.7855, "step": 206590 }, { "epoch": 1.3199085136015742, "grad_norm": 0.5698302388191223, "learning_rate": 2.5953799874520907e-05, "loss": 0.6276, "step": 206600 }, { "epoch": 1.319972400751313, "grad_norm": 1.2574687004089355, "learning_rate": 2.59494006786409e-05, "loss": 1.0715, "step": 206610 }, { "epoch": 1.3200362879010517, "grad_norm": 0.8968929648399353, "learning_rate": 2.594500172497122e-05, "loss": 0.9868, "step": 206620 }, { "epoch": 1.3201001750507904, "grad_norm": 0.8700588345527649, "learning_rate": 2.59406030135562e-05, "loss": 0.9474, "step": 206630 }, { "epoch": 1.320164062200529, "grad_norm": 0.9864295125007629, "learning_rate": 2.5936204544440102e-05, "loss": 0.7433, "step": 206640 }, { "epoch": 1.3202279493502678, "grad_norm": 0.8823724389076233, "learning_rate": 2.593180631766724e-05, "loss": 0.5835, "step": 206650 }, { "epoch": 1.3202918365000063, "grad_norm": 0.9572489261627197, "learning_rate": 2.5927408333281932e-05, "loss": 0.9716, "step": 206660 }, { "epoch": 1.3203557236497452, "grad_norm": 1.1817903518676758, "learning_rate": 2.5923010591328435e-05, "loss": 1.1069, "step": 206670 }, { "epoch": 1.3204196107994837, "grad_norm": 0.7431257367134094, "learning_rate": 2.5918613091851062e-05, "loss": 0.7068, "step": 206680 }, { "epoch": 1.3204834979492226, "grad_norm": 1.2763636112213135, "learning_rate": 2.5914215834894072e-05, "loss": 0.9138, "step": 206690 }, { "epoch": 1.320547385098961, "grad_norm": 1.2803268432617188, "learning_rate": 2.5909818820501785e-05, "loss": 0.863, "step": 206700 }, { "epoch": 1.3206112722487, "grad_norm": 1.223968267440796, "learning_rate": 2.5905422048718443e-05, "loss": 0.9294, "step": 206710 }, { "epoch": 1.3206751593984385, "grad_norm": 1.168885588645935, "learning_rate": 2.5901025519588363e-05, "loss": 1.1139, "step": 206720 }, { "epoch": 1.3207390465481774, "grad_norm": 0.922627866268158, "learning_rate": 2.5896629233155788e-05, "loss": 0.8245, "step": 206730 }, { "epoch": 1.320802933697916, "grad_norm": 1.188184380531311, "learning_rate": 2.589223318946503e-05, "loss": 0.9801, "step": 206740 }, { "epoch": 1.3208668208476548, "grad_norm": 0.7618042230606079, "learning_rate": 2.588783738856032e-05, "loss": 0.855, "step": 206750 }, { "epoch": 1.3209307079973933, "grad_norm": 1.4691100120544434, "learning_rate": 2.5883441830485965e-05, "loss": 0.986, "step": 206760 }, { "epoch": 1.3209945951471322, "grad_norm": 0.8927893042564392, "learning_rate": 2.5879046515286198e-05, "loss": 0.8979, "step": 206770 }, { "epoch": 1.3210584822968707, "grad_norm": 0.6572344303131104, "learning_rate": 2.5874651443005317e-05, "loss": 0.7915, "step": 206780 }, { "epoch": 1.3211223694466094, "grad_norm": 1.3234648704528809, "learning_rate": 2.5870256613687573e-05, "loss": 1.2454, "step": 206790 }, { "epoch": 1.3211862565963481, "grad_norm": 1.0111384391784668, "learning_rate": 2.586586202737719e-05, "loss": 0.983, "step": 206800 }, { "epoch": 1.3212501437460868, "grad_norm": 0.7284995317459106, "learning_rate": 2.586146768411848e-05, "loss": 0.597, "step": 206810 }, { "epoch": 1.3213140308958256, "grad_norm": 0.5933718681335449, "learning_rate": 2.5857073583955654e-05, "loss": 0.9336, "step": 206820 }, { "epoch": 1.3213779180455643, "grad_norm": 1.2970045804977417, "learning_rate": 2.5852679726933006e-05, "loss": 0.9286, "step": 206830 }, { "epoch": 1.321441805195303, "grad_norm": 1.1350326538085938, "learning_rate": 2.5848286113094744e-05, "loss": 0.9883, "step": 206840 }, { "epoch": 1.3215056923450417, "grad_norm": 0.8951261043548584, "learning_rate": 2.5843892742485148e-05, "loss": 0.8113, "step": 206850 }, { "epoch": 1.3215695794947804, "grad_norm": 1.196889042854309, "learning_rate": 2.583949961514843e-05, "loss": 0.6894, "step": 206860 }, { "epoch": 1.321633466644519, "grad_norm": 1.293573260307312, "learning_rate": 2.5835106731128885e-05, "loss": 1.0643, "step": 206870 }, { "epoch": 1.3216973537942578, "grad_norm": 0.8331345915794373, "learning_rate": 2.5830714090470686e-05, "loss": 0.7408, "step": 206880 }, { "epoch": 1.3217612409439965, "grad_norm": 2.4497036933898926, "learning_rate": 2.5826321693218135e-05, "loss": 0.9284, "step": 206890 }, { "epoch": 1.3218251280937352, "grad_norm": 0.882736086845398, "learning_rate": 2.582192953941542e-05, "loss": 0.8527, "step": 206900 }, { "epoch": 1.321889015243474, "grad_norm": 0.72117680311203, "learning_rate": 2.5817537629106802e-05, "loss": 0.8667, "step": 206910 }, { "epoch": 1.3219529023932126, "grad_norm": 1.2809302806854248, "learning_rate": 2.5813145962336488e-05, "loss": 0.9314, "step": 206920 }, { "epoch": 1.3220167895429513, "grad_norm": 1.0508041381835938, "learning_rate": 2.580875453914874e-05, "loss": 0.8098, "step": 206930 }, { "epoch": 1.32208067669269, "grad_norm": 1.1330662965774536, "learning_rate": 2.5804363359587735e-05, "loss": 0.8803, "step": 206940 }, { "epoch": 1.3221445638424287, "grad_norm": 0.9646334052085876, "learning_rate": 2.579997242369775e-05, "loss": 0.8861, "step": 206950 }, { "epoch": 1.3222084509921674, "grad_norm": 0.8501851558685303, "learning_rate": 2.579558173152295e-05, "loss": 0.9793, "step": 206960 }, { "epoch": 1.3222723381419061, "grad_norm": 1.1417040824890137, "learning_rate": 2.5791191283107586e-05, "loss": 0.8623, "step": 206970 }, { "epoch": 1.3223362252916449, "grad_norm": 1.8951231241226196, "learning_rate": 2.5786801078495893e-05, "loss": 0.9489, "step": 206980 }, { "epoch": 1.3224001124413836, "grad_norm": 1.1195141077041626, "learning_rate": 2.5782411117732043e-05, "loss": 1.0253, "step": 206990 }, { "epoch": 1.3224639995911223, "grad_norm": 0.8514233827590942, "learning_rate": 2.5778021400860274e-05, "loss": 1.1297, "step": 207000 }, { "epoch": 1.322527886740861, "grad_norm": 0.8984963297843933, "learning_rate": 2.5773631927924768e-05, "loss": 0.8752, "step": 207010 }, { "epoch": 1.3225917738905997, "grad_norm": 0.8698433637619019, "learning_rate": 2.5769242698969774e-05, "loss": 0.6839, "step": 207020 }, { "epoch": 1.3226556610403384, "grad_norm": 1.0516668558120728, "learning_rate": 2.576485371403944e-05, "loss": 0.8271, "step": 207030 }, { "epoch": 1.322719548190077, "grad_norm": 0.9294512867927551, "learning_rate": 2.5760464973178023e-05, "loss": 1.1857, "step": 207040 }, { "epoch": 1.3227834353398158, "grad_norm": 0.8164847493171692, "learning_rate": 2.575607647642967e-05, "loss": 0.8921, "step": 207050 }, { "epoch": 1.3228473224895545, "grad_norm": 0.8850873112678528, "learning_rate": 2.575168822383862e-05, "loss": 0.9315, "step": 207060 }, { "epoch": 1.3229112096392932, "grad_norm": 0.9530342817306519, "learning_rate": 2.5747300215449027e-05, "loss": 0.8774, "step": 207070 }, { "epoch": 1.322975096789032, "grad_norm": 0.5504788756370544, "learning_rate": 2.5742912451305124e-05, "loss": 1.1385, "step": 207080 }, { "epoch": 1.3230389839387706, "grad_norm": 0.5386658310890198, "learning_rate": 2.573852493145106e-05, "loss": 0.9027, "step": 207090 }, { "epoch": 1.3231028710885093, "grad_norm": 0.6537347435951233, "learning_rate": 2.5734137655931055e-05, "loss": 1.0127, "step": 207100 }, { "epoch": 1.323166758238248, "grad_norm": 0.8527598977088928, "learning_rate": 2.572975062478926e-05, "loss": 0.8942, "step": 207110 }, { "epoch": 1.3232306453879867, "grad_norm": 0.6852341294288635, "learning_rate": 2.5725363838069887e-05, "loss": 0.9198, "step": 207120 }, { "epoch": 1.3232945325377254, "grad_norm": 0.8571737408638, "learning_rate": 2.572097729581709e-05, "loss": 0.8471, "step": 207130 }, { "epoch": 1.3233584196874641, "grad_norm": 0.8678430914878845, "learning_rate": 2.571659099807507e-05, "loss": 0.77, "step": 207140 }, { "epoch": 1.3234223068372026, "grad_norm": 0.5002248883247375, "learning_rate": 2.5712204944887962e-05, "loss": 0.666, "step": 207150 }, { "epoch": 1.3234861939869416, "grad_norm": 2.0342235565185547, "learning_rate": 2.570781913629999e-05, "loss": 0.5886, "step": 207160 }, { "epoch": 1.32355008113668, "grad_norm": 1.233255386352539, "learning_rate": 2.5703433572355272e-05, "loss": 0.8278, "step": 207170 }, { "epoch": 1.323613968286419, "grad_norm": 0.849678099155426, "learning_rate": 2.5699048253098002e-05, "loss": 0.7862, "step": 207180 }, { "epoch": 1.3236778554361575, "grad_norm": 1.1559913158416748, "learning_rate": 2.5694663178572352e-05, "loss": 0.847, "step": 207190 }, { "epoch": 1.3237417425858964, "grad_norm": 0.5825319290161133, "learning_rate": 2.5690278348822462e-05, "loss": 0.9429, "step": 207200 }, { "epoch": 1.3238056297356349, "grad_norm": 1.2116233110427856, "learning_rate": 2.568589376389251e-05, "loss": 0.8407, "step": 207210 }, { "epoch": 1.3238695168853738, "grad_norm": 0.8955128788948059, "learning_rate": 2.5681509423826626e-05, "loss": 0.8776, "step": 207220 }, { "epoch": 1.3239334040351123, "grad_norm": 1.1272040605545044, "learning_rate": 2.5677125328669e-05, "loss": 0.7028, "step": 207230 }, { "epoch": 1.3239972911848512, "grad_norm": 1.5354359149932861, "learning_rate": 2.5672741478463746e-05, "loss": 0.9998, "step": 207240 }, { "epoch": 1.3240611783345897, "grad_norm": 1.0237005949020386, "learning_rate": 2.5668357873255043e-05, "loss": 1.1591, "step": 207250 }, { "epoch": 1.3241250654843284, "grad_norm": 0.8075418472290039, "learning_rate": 2.566397451308702e-05, "loss": 0.7585, "step": 207260 }, { "epoch": 1.324188952634067, "grad_norm": 0.9698383808135986, "learning_rate": 2.5659591398003834e-05, "loss": 0.8286, "step": 207270 }, { "epoch": 1.3242528397838058, "grad_norm": 0.953010618686676, "learning_rate": 2.565520852804963e-05, "loss": 0.7961, "step": 207280 }, { "epoch": 1.3243167269335445, "grad_norm": 1.0039968490600586, "learning_rate": 2.5650825903268515e-05, "loss": 0.836, "step": 207290 }, { "epoch": 1.3243806140832832, "grad_norm": 0.7803800702095032, "learning_rate": 2.5646443523704666e-05, "loss": 1.1, "step": 207300 }, { "epoch": 1.324444501233022, "grad_norm": 0.7452454566955566, "learning_rate": 2.5642061389402177e-05, "loss": 0.8007, "step": 207310 }, { "epoch": 1.3245083883827606, "grad_norm": 1.357801914215088, "learning_rate": 2.5637679500405225e-05, "loss": 1.034, "step": 207320 }, { "epoch": 1.3245722755324993, "grad_norm": 0.8555988669395447, "learning_rate": 2.5633297856757898e-05, "loss": 0.7433, "step": 207330 }, { "epoch": 1.324636162682238, "grad_norm": 0.8428859114646912, "learning_rate": 2.562891645850436e-05, "loss": 0.8675, "step": 207340 }, { "epoch": 1.3247000498319768, "grad_norm": 1.2644280195236206, "learning_rate": 2.5624535305688703e-05, "loss": 0.9043, "step": 207350 }, { "epoch": 1.3247639369817155, "grad_norm": 0.636249303817749, "learning_rate": 2.562015439835508e-05, "loss": 1.1278, "step": 207360 }, { "epoch": 1.3248278241314542, "grad_norm": 2.0940535068511963, "learning_rate": 2.5615773736547564e-05, "loss": 1.1164, "step": 207370 }, { "epoch": 1.3248917112811929, "grad_norm": 0.7779254913330078, "learning_rate": 2.5611393320310335e-05, "loss": 1.0982, "step": 207380 }, { "epoch": 1.3249555984309316, "grad_norm": 1.049269676208496, "learning_rate": 2.5607013149687452e-05, "loss": 0.8899, "step": 207390 }, { "epoch": 1.3250194855806703, "grad_norm": 0.7360807061195374, "learning_rate": 2.5602633224723067e-05, "loss": 0.9655, "step": 207400 }, { "epoch": 1.325083372730409, "grad_norm": 1.3488494157791138, "learning_rate": 2.559825354546125e-05, "loss": 0.8571, "step": 207410 }, { "epoch": 1.3251472598801477, "grad_norm": 2.69454026222229, "learning_rate": 2.5593874111946152e-05, "loss": 0.9587, "step": 207420 }, { "epoch": 1.3252111470298864, "grad_norm": 1.0082372426986694, "learning_rate": 2.5589494924221836e-05, "loss": 0.7106, "step": 207430 }, { "epoch": 1.325275034179625, "grad_norm": 0.892281174659729, "learning_rate": 2.558511598233245e-05, "loss": 0.8623, "step": 207440 }, { "epoch": 1.3253389213293638, "grad_norm": 1.0475205183029175, "learning_rate": 2.5580737286322044e-05, "loss": 0.901, "step": 207450 }, { "epoch": 1.3254028084791025, "grad_norm": 0.6185365319252014, "learning_rate": 2.5576358836234754e-05, "loss": 0.8671, "step": 207460 }, { "epoch": 1.3254666956288412, "grad_norm": 1.0132888555526733, "learning_rate": 2.5571980632114644e-05, "loss": 0.9183, "step": 207470 }, { "epoch": 1.32553058277858, "grad_norm": 1.1423455476760864, "learning_rate": 2.5567602674005842e-05, "loss": 0.8692, "step": 207480 }, { "epoch": 1.3255944699283186, "grad_norm": 1.2508022785186768, "learning_rate": 2.55632249619524e-05, "loss": 0.9008, "step": 207490 }, { "epoch": 1.3256583570780573, "grad_norm": 1.0700236558914185, "learning_rate": 2.5558847495998416e-05, "loss": 0.9502, "step": 207500 }, { "epoch": 1.325722244227796, "grad_norm": 0.6072807908058167, "learning_rate": 2.5554470276188013e-05, "loss": 0.8926, "step": 207510 }, { "epoch": 1.3257861313775348, "grad_norm": 1.2106003761291504, "learning_rate": 2.5550093302565216e-05, "loss": 0.7399, "step": 207520 }, { "epoch": 1.3258500185272735, "grad_norm": 0.6438818573951721, "learning_rate": 2.5545716575174154e-05, "loss": 0.8666, "step": 207530 }, { "epoch": 1.3259139056770122, "grad_norm": 0.6634151935577393, "learning_rate": 2.5541340094058863e-05, "loss": 0.9733, "step": 207540 }, { "epoch": 1.3259777928267509, "grad_norm": 0.9782295227050781, "learning_rate": 2.553696385926345e-05, "loss": 0.8119, "step": 207550 }, { "epoch": 1.3260416799764896, "grad_norm": 1.1243504285812378, "learning_rate": 2.5532587870831965e-05, "loss": 0.8401, "step": 207560 }, { "epoch": 1.3261055671262283, "grad_norm": 1.0808581113815308, "learning_rate": 2.5528212128808504e-05, "loss": 0.9154, "step": 207570 }, { "epoch": 1.326169454275967, "grad_norm": 1.1705023050308228, "learning_rate": 2.5523836633237098e-05, "loss": 0.9226, "step": 207580 }, { "epoch": 1.3262333414257057, "grad_norm": 0.9224709272384644, "learning_rate": 2.5519461384161848e-05, "loss": 0.7217, "step": 207590 }, { "epoch": 1.3262972285754444, "grad_norm": 0.6616361141204834, "learning_rate": 2.5515086381626785e-05, "loss": 0.928, "step": 207600 }, { "epoch": 1.3263611157251831, "grad_norm": 1.029366374015808, "learning_rate": 2.5510711625675997e-05, "loss": 0.9758, "step": 207610 }, { "epoch": 1.3264250028749218, "grad_norm": 1.4462729692459106, "learning_rate": 2.550633711635352e-05, "loss": 0.9038, "step": 207620 }, { "epoch": 1.3264888900246605, "grad_norm": 1.2146345376968384, "learning_rate": 2.550196285370343e-05, "loss": 0.8447, "step": 207630 }, { "epoch": 1.326552777174399, "grad_norm": 0.9503288865089417, "learning_rate": 2.549758883776975e-05, "loss": 0.7413, "step": 207640 }, { "epoch": 1.326616664324138, "grad_norm": 0.978327751159668, "learning_rate": 2.549321506859657e-05, "loss": 0.8494, "step": 207650 }, { "epoch": 1.3266805514738764, "grad_norm": 0.7345404028892517, "learning_rate": 2.5488841546227893e-05, "loss": 0.7034, "step": 207660 }, { "epoch": 1.3267444386236154, "grad_norm": 0.9454354643821716, "learning_rate": 2.5484468270707812e-05, "loss": 0.7887, "step": 207670 }, { "epoch": 1.3268083257733538, "grad_norm": 0.9941151142120361, "learning_rate": 2.5480095242080322e-05, "loss": 0.792, "step": 207680 }, { "epoch": 1.3268722129230928, "grad_norm": 1.8501225709915161, "learning_rate": 2.54757224603895e-05, "loss": 0.7835, "step": 207690 }, { "epoch": 1.3269361000728312, "grad_norm": 1.1302766799926758, "learning_rate": 2.5471349925679355e-05, "loss": 0.953, "step": 207700 }, { "epoch": 1.3269999872225702, "grad_norm": 1.4207725524902344, "learning_rate": 2.546697763799394e-05, "loss": 1.0259, "step": 207710 }, { "epoch": 1.3270638743723087, "grad_norm": 0.6076548099517822, "learning_rate": 2.5462605597377297e-05, "loss": 0.8364, "step": 207720 }, { "epoch": 1.3271277615220476, "grad_norm": 0.9731430411338806, "learning_rate": 2.5458233803873427e-05, "loss": 0.8385, "step": 207730 }, { "epoch": 1.327191648671786, "grad_norm": 1.4916883707046509, "learning_rate": 2.5453862257526395e-05, "loss": 0.8845, "step": 207740 }, { "epoch": 1.3272555358215248, "grad_norm": 0.616757869720459, "learning_rate": 2.5449490958380185e-05, "loss": 0.7308, "step": 207750 }, { "epoch": 1.3273194229712635, "grad_norm": 1.0742515325546265, "learning_rate": 2.5445119906478855e-05, "loss": 1.1297, "step": 207760 }, { "epoch": 1.3273833101210022, "grad_norm": 1.1675541400909424, "learning_rate": 2.544074910186641e-05, "loss": 0.8094, "step": 207770 }, { "epoch": 1.327447197270741, "grad_norm": 1.1597243547439575, "learning_rate": 2.543637854458688e-05, "loss": 0.8895, "step": 207780 }, { "epoch": 1.3275110844204796, "grad_norm": 1.3750035762786865, "learning_rate": 2.5432008234684236e-05, "loss": 0.9638, "step": 207790 }, { "epoch": 1.3275749715702183, "grad_norm": 0.6728596687316895, "learning_rate": 2.542763817220255e-05, "loss": 0.7371, "step": 207800 }, { "epoch": 1.327638858719957, "grad_norm": 0.9936996698379517, "learning_rate": 2.5423268357185788e-05, "loss": 1.0607, "step": 207810 }, { "epoch": 1.3277027458696957, "grad_norm": 1.2502750158309937, "learning_rate": 2.541889878967797e-05, "loss": 0.758, "step": 207820 }, { "epoch": 1.3277666330194344, "grad_norm": 2.021584987640381, "learning_rate": 2.5414529469723132e-05, "loss": 0.7352, "step": 207830 }, { "epoch": 1.3278305201691731, "grad_norm": 1.755197525024414, "learning_rate": 2.5410160397365222e-05, "loss": 0.8273, "step": 207840 }, { "epoch": 1.3278944073189118, "grad_norm": 1.087971568107605, "learning_rate": 2.54057915726483e-05, "loss": 0.7056, "step": 207850 }, { "epoch": 1.3279582944686505, "grad_norm": 0.8218057155609131, "learning_rate": 2.5401422995616313e-05, "loss": 0.9178, "step": 207860 }, { "epoch": 1.3280221816183893, "grad_norm": 0.8078576922416687, "learning_rate": 2.5397054666313293e-05, "loss": 0.8177, "step": 207870 }, { "epoch": 1.328086068768128, "grad_norm": 1.413970947265625, "learning_rate": 2.53926865847832e-05, "loss": 0.9126, "step": 207880 }, { "epoch": 1.3281499559178667, "grad_norm": 0.8575447797775269, "learning_rate": 2.5388318751070062e-05, "loss": 0.9419, "step": 207890 }, { "epoch": 1.3282138430676054, "grad_norm": 0.9678406715393066, "learning_rate": 2.5383951165217824e-05, "loss": 0.5925, "step": 207900 }, { "epoch": 1.328277730217344, "grad_norm": 0.5872654914855957, "learning_rate": 2.5379583827270513e-05, "loss": 0.6653, "step": 207910 }, { "epoch": 1.3283416173670828, "grad_norm": 0.8543681502342224, "learning_rate": 2.5375216737272078e-05, "loss": 0.9675, "step": 207920 }, { "epoch": 1.3284055045168215, "grad_norm": 0.8474524021148682, "learning_rate": 2.5370849895266525e-05, "loss": 0.9284, "step": 207930 }, { "epoch": 1.3284693916665602, "grad_norm": 1.456979751586914, "learning_rate": 2.5366483301297804e-05, "loss": 1.139, "step": 207940 }, { "epoch": 1.328533278816299, "grad_norm": 0.748672366142273, "learning_rate": 2.536211695540993e-05, "loss": 0.8772, "step": 207950 }, { "epoch": 1.3285971659660376, "grad_norm": 1.5592775344848633, "learning_rate": 2.5357750857646832e-05, "loss": 1.3006, "step": 207960 }, { "epoch": 1.3286610531157763, "grad_norm": 1.2369399070739746, "learning_rate": 2.535338500805252e-05, "loss": 0.7419, "step": 207970 }, { "epoch": 1.328724940265515, "grad_norm": 1.2126493453979492, "learning_rate": 2.5349019406670932e-05, "loss": 0.9519, "step": 207980 }, { "epoch": 1.3287888274152537, "grad_norm": 1.2261261940002441, "learning_rate": 2.5344654053546057e-05, "loss": 0.6931, "step": 207990 }, { "epoch": 1.3288527145649924, "grad_norm": 0.8413811326026917, "learning_rate": 2.5340288948721823e-05, "loss": 0.9658, "step": 208000 }, { "epoch": 1.3289166017147311, "grad_norm": 1.077040433883667, "learning_rate": 2.5335924092242235e-05, "loss": 0.8505, "step": 208010 }, { "epoch": 1.3289804888644698, "grad_norm": 0.8440773487091064, "learning_rate": 2.5331559484151213e-05, "loss": 0.8107, "step": 208020 }, { "epoch": 1.3290443760142086, "grad_norm": 0.4764833450317383, "learning_rate": 2.5327195124492725e-05, "loss": 0.8189, "step": 208030 }, { "epoch": 1.3291082631639473, "grad_norm": 1.1367080211639404, "learning_rate": 2.5322831013310742e-05, "loss": 0.9064, "step": 208040 }, { "epoch": 1.329172150313686, "grad_norm": 1.0161020755767822, "learning_rate": 2.5318467150649195e-05, "loss": 0.9505, "step": 208050 }, { "epoch": 1.3292360374634247, "grad_norm": 1.983578085899353, "learning_rate": 2.531410353655204e-05, "loss": 0.8435, "step": 208060 }, { "epoch": 1.3292999246131634, "grad_norm": 1.1380125284194946, "learning_rate": 2.5309740171063205e-05, "loss": 0.9795, "step": 208070 }, { "epoch": 1.329363811762902, "grad_norm": 1.1479755640029907, "learning_rate": 2.5305377054226663e-05, "loss": 1.4879, "step": 208080 }, { "epoch": 1.3294276989126408, "grad_norm": 1.1302154064178467, "learning_rate": 2.5301014186086324e-05, "loss": 0.8148, "step": 208090 }, { "epoch": 1.3294915860623795, "grad_norm": 0.8572195768356323, "learning_rate": 2.5296651566686157e-05, "loss": 0.9606, "step": 208100 }, { "epoch": 1.329555473212118, "grad_norm": 0.7420187592506409, "learning_rate": 2.5292289196070063e-05, "loss": 0.8116, "step": 208110 }, { "epoch": 1.329619360361857, "grad_norm": 0.8453270196914673, "learning_rate": 2.5287927074282007e-05, "loss": 0.6985, "step": 208120 }, { "epoch": 1.3296832475115954, "grad_norm": 1.6888545751571655, "learning_rate": 2.528356520136589e-05, "loss": 1.1483, "step": 208130 }, { "epoch": 1.3297471346613343, "grad_norm": 0.7026654481887817, "learning_rate": 2.527920357736567e-05, "loss": 0.8764, "step": 208140 }, { "epoch": 1.3298110218110728, "grad_norm": 0.9377880096435547, "learning_rate": 2.527484220232524e-05, "loss": 0.8799, "step": 208150 }, { "epoch": 1.3298749089608117, "grad_norm": 1.017162561416626, "learning_rate": 2.5270481076288554e-05, "loss": 0.8685, "step": 208160 }, { "epoch": 1.3299387961105502, "grad_norm": 1.0247211456298828, "learning_rate": 2.5266120199299504e-05, "loss": 0.7848, "step": 208170 }, { "epoch": 1.3300026832602891, "grad_norm": 0.6646064519882202, "learning_rate": 2.5261759571402033e-05, "loss": 0.9928, "step": 208180 }, { "epoch": 1.3300665704100276, "grad_norm": 1.235183596611023, "learning_rate": 2.525739919264003e-05, "loss": 0.8701, "step": 208190 }, { "epoch": 1.3301304575597666, "grad_norm": 0.9530540108680725, "learning_rate": 2.5253039063057443e-05, "loss": 0.8677, "step": 208200 }, { "epoch": 1.330194344709505, "grad_norm": 1.1182188987731934, "learning_rate": 2.5248679182698143e-05, "loss": 0.9195, "step": 208210 }, { "epoch": 1.330258231859244, "grad_norm": 0.6461467146873474, "learning_rate": 2.524431955160607e-05, "loss": 0.9312, "step": 208220 }, { "epoch": 1.3303221190089825, "grad_norm": 0.6050586700439453, "learning_rate": 2.5239960169825105e-05, "loss": 0.8003, "step": 208230 }, { "epoch": 1.3303860061587212, "grad_norm": 1.3474839925765991, "learning_rate": 2.523560103739916e-05, "loss": 0.9264, "step": 208240 }, { "epoch": 1.3304498933084599, "grad_norm": 0.6612098813056946, "learning_rate": 2.523124215437217e-05, "loss": 0.7328, "step": 208250 }, { "epoch": 1.3305137804581986, "grad_norm": 0.7815212607383728, "learning_rate": 2.522688352078796e-05, "loss": 0.7472, "step": 208260 }, { "epoch": 1.3305776676079373, "grad_norm": 1.1188582181930542, "learning_rate": 2.522252513669049e-05, "loss": 0.9148, "step": 208270 }, { "epoch": 1.330641554757676, "grad_norm": 0.8124456405639648, "learning_rate": 2.5218167002123605e-05, "loss": 0.7962, "step": 208280 }, { "epoch": 1.3307054419074147, "grad_norm": 1.276268720626831, "learning_rate": 2.5213809117131237e-05, "loss": 0.7497, "step": 208290 }, { "epoch": 1.3307693290571534, "grad_norm": 0.8970780968666077, "learning_rate": 2.5209451481757242e-05, "loss": 0.9581, "step": 208300 }, { "epoch": 1.330833216206892, "grad_norm": 2.7623441219329834, "learning_rate": 2.5205094096045524e-05, "loss": 0.9534, "step": 208310 }, { "epoch": 1.3308971033566308, "grad_norm": 1.1253652572631836, "learning_rate": 2.520073696003995e-05, "loss": 1.0629, "step": 208320 }, { "epoch": 1.3309609905063695, "grad_norm": 0.8003132939338684, "learning_rate": 2.519638007378442e-05, "loss": 0.7378, "step": 208330 }, { "epoch": 1.3310248776561082, "grad_norm": 1.132372498512268, "learning_rate": 2.5192023437322787e-05, "loss": 0.7477, "step": 208340 }, { "epoch": 1.331088764805847, "grad_norm": 1.6704978942871094, "learning_rate": 2.5187667050698936e-05, "loss": 0.8368, "step": 208350 }, { "epoch": 1.3311526519555856, "grad_norm": 1.237809419631958, "learning_rate": 2.518331091395677e-05, "loss": 0.8616, "step": 208360 }, { "epoch": 1.3312165391053243, "grad_norm": 1.075128436088562, "learning_rate": 2.5178955027140112e-05, "loss": 0.8805, "step": 208370 }, { "epoch": 1.331280426255063, "grad_norm": 1.0682311058044434, "learning_rate": 2.5174599390292865e-05, "loss": 1.074, "step": 208380 }, { "epoch": 1.3313443134048017, "grad_norm": 0.772784948348999, "learning_rate": 2.5170244003458864e-05, "loss": 0.8137, "step": 208390 }, { "epoch": 1.3314082005545405, "grad_norm": 0.9435346126556396, "learning_rate": 2.5165888866682004e-05, "loss": 1.0276, "step": 208400 }, { "epoch": 1.3314720877042792, "grad_norm": 1.0530027151107788, "learning_rate": 2.51615339800061e-05, "loss": 0.8332, "step": 208410 }, { "epoch": 1.3315359748540179, "grad_norm": 0.9279080629348755, "learning_rate": 2.5157179343475068e-05, "loss": 0.6883, "step": 208420 }, { "epoch": 1.3315998620037566, "grad_norm": 0.7981129884719849, "learning_rate": 2.5152824957132715e-05, "loss": 0.7635, "step": 208430 }, { "epoch": 1.3316637491534953, "grad_norm": 0.923291027545929, "learning_rate": 2.514847082102292e-05, "loss": 1.2482, "step": 208440 }, { "epoch": 1.331727636303234, "grad_norm": 1.3127217292785645, "learning_rate": 2.514411693518951e-05, "loss": 0.8983, "step": 208450 }, { "epoch": 1.3317915234529727, "grad_norm": 0.8736202716827393, "learning_rate": 2.5139763299676362e-05, "loss": 0.8583, "step": 208460 }, { "epoch": 1.3318554106027114, "grad_norm": 0.777912437915802, "learning_rate": 2.5135409914527285e-05, "loss": 0.8301, "step": 208470 }, { "epoch": 1.33191929775245, "grad_norm": 1.0512322187423706, "learning_rate": 2.5131056779786165e-05, "loss": 1.0007, "step": 208480 }, { "epoch": 1.3319831849021888, "grad_norm": 0.9216824769973755, "learning_rate": 2.5126703895496794e-05, "loss": 0.7905, "step": 208490 }, { "epoch": 1.3320470720519275, "grad_norm": 1.1229199171066284, "learning_rate": 2.5122351261703058e-05, "loss": 0.8234, "step": 208500 }, { "epoch": 1.3321109592016662, "grad_norm": 0.9037683606147766, "learning_rate": 2.5117998878448746e-05, "loss": 0.955, "step": 208510 }, { "epoch": 1.332174846351405, "grad_norm": 0.6385436058044434, "learning_rate": 2.5113646745777726e-05, "loss": 1.1041, "step": 208520 }, { "epoch": 1.3322387335011436, "grad_norm": 0.9948113560676575, "learning_rate": 2.5109294863733795e-05, "loss": 0.8906, "step": 208530 }, { "epoch": 1.3323026206508823, "grad_norm": 0.6039278507232666, "learning_rate": 2.510494323236082e-05, "loss": 0.8755, "step": 208540 }, { "epoch": 1.332366507800621, "grad_norm": 1.256967306137085, "learning_rate": 2.5100591851702583e-05, "loss": 0.8707, "step": 208550 }, { "epoch": 1.3324303949503598, "grad_norm": 0.8238212466239929, "learning_rate": 2.5096240721802926e-05, "loss": 0.9786, "step": 208560 }, { "epoch": 1.3324942821000985, "grad_norm": 1.652024507522583, "learning_rate": 2.5091889842705696e-05, "loss": 0.9173, "step": 208570 }, { "epoch": 1.3325581692498372, "grad_norm": 1.1673933267593384, "learning_rate": 2.5087539214454658e-05, "loss": 0.8108, "step": 208580 }, { "epoch": 1.3326220563995759, "grad_norm": 0.9864590167999268, "learning_rate": 2.5083188837093674e-05, "loss": 0.8847, "step": 208590 }, { "epoch": 1.3326859435493144, "grad_norm": 1.695041298866272, "learning_rate": 2.507883871066651e-05, "loss": 0.7886, "step": 208600 }, { "epoch": 1.3327498306990533, "grad_norm": 0.8688923120498657, "learning_rate": 2.5074488835217026e-05, "loss": 1.2028, "step": 208610 }, { "epoch": 1.3328137178487918, "grad_norm": 2.205828905105591, "learning_rate": 2.507013921078898e-05, "loss": 0.7474, "step": 208620 }, { "epoch": 1.3328776049985307, "grad_norm": 0.9385578632354736, "learning_rate": 2.5065789837426225e-05, "loss": 0.7994, "step": 208630 }, { "epoch": 1.3329414921482692, "grad_norm": 1.1013610363006592, "learning_rate": 2.5061440715172513e-05, "loss": 0.9858, "step": 208640 }, { "epoch": 1.333005379298008, "grad_norm": 0.8305615186691284, "learning_rate": 2.5057091844071683e-05, "loss": 0.7782, "step": 208650 }, { "epoch": 1.3330692664477466, "grad_norm": 0.9627121686935425, "learning_rate": 2.505274322416751e-05, "loss": 0.8877, "step": 208660 }, { "epoch": 1.3331331535974855, "grad_norm": 1.865303635597229, "learning_rate": 2.5048394855503798e-05, "loss": 0.8416, "step": 208670 }, { "epoch": 1.333197040747224, "grad_norm": 1.4973645210266113, "learning_rate": 2.5044046738124326e-05, "loss": 0.9163, "step": 208680 }, { "epoch": 1.333260927896963, "grad_norm": 0.8996737003326416, "learning_rate": 2.5039698872072913e-05, "loss": 0.8238, "step": 208690 }, { "epoch": 1.3333248150467014, "grad_norm": 1.8005520105361938, "learning_rate": 2.503535125739331e-05, "loss": 0.653, "step": 208700 }, { "epoch": 1.3333887021964403, "grad_norm": 1.0006129741668701, "learning_rate": 2.503100389412933e-05, "loss": 0.8249, "step": 208710 }, { "epoch": 1.3334525893461788, "grad_norm": 1.0600533485412598, "learning_rate": 2.5026656782324724e-05, "loss": 1.1548, "step": 208720 }, { "epoch": 1.3335164764959175, "grad_norm": 1.0672171115875244, "learning_rate": 2.5022309922023312e-05, "loss": 1.0526, "step": 208730 }, { "epoch": 1.3335803636456562, "grad_norm": 0.8205702900886536, "learning_rate": 2.5017963313268843e-05, "loss": 0.7387, "step": 208740 }, { "epoch": 1.333644250795395, "grad_norm": 0.9449298977851868, "learning_rate": 2.501361695610508e-05, "loss": 1.11, "step": 208750 }, { "epoch": 1.3337081379451337, "grad_norm": 0.9433137774467468, "learning_rate": 2.5009270850575828e-05, "loss": 0.8742, "step": 208760 }, { "epoch": 1.3337720250948724, "grad_norm": 1.927285075187683, "learning_rate": 2.5004924996724822e-05, "loss": 0.7368, "step": 208770 }, { "epoch": 1.333835912244611, "grad_norm": 1.1574492454528809, "learning_rate": 2.5000579394595864e-05, "loss": 1.0002, "step": 208780 }, { "epoch": 1.3338997993943498, "grad_norm": 0.8089993596076965, "learning_rate": 2.499623404423268e-05, "loss": 0.9104, "step": 208790 }, { "epoch": 1.3339636865440885, "grad_norm": 1.1149111986160278, "learning_rate": 2.4991888945679066e-05, "loss": 0.926, "step": 208800 }, { "epoch": 1.3340275736938272, "grad_norm": 1.1031670570373535, "learning_rate": 2.498754409897875e-05, "loss": 0.7789, "step": 208810 }, { "epoch": 1.334091460843566, "grad_norm": 0.6810122728347778, "learning_rate": 2.498319950417552e-05, "loss": 0.9843, "step": 208820 }, { "epoch": 1.3341553479933046, "grad_norm": 1.146023154258728, "learning_rate": 2.4978855161313097e-05, "loss": 0.8365, "step": 208830 }, { "epoch": 1.3342192351430433, "grad_norm": 1.486267328262329, "learning_rate": 2.4974511070435268e-05, "loss": 0.8662, "step": 208840 }, { "epoch": 1.334283122292782, "grad_norm": 0.8784306049346924, "learning_rate": 2.497016723158574e-05, "loss": 0.8443, "step": 208850 }, { "epoch": 1.3343470094425207, "grad_norm": 1.1221381425857544, "learning_rate": 2.4965823644808307e-05, "loss": 0.9196, "step": 208860 }, { "epoch": 1.3344108965922594, "grad_norm": 1.6030218601226807, "learning_rate": 2.4961480310146663e-05, "loss": 0.8168, "step": 208870 }, { "epoch": 1.3344747837419981, "grad_norm": 1.1350743770599365, "learning_rate": 2.4957137227644577e-05, "loss": 0.8172, "step": 208880 }, { "epoch": 1.3345386708917368, "grad_norm": 0.664034903049469, "learning_rate": 2.49527943973458e-05, "loss": 0.72, "step": 208890 }, { "epoch": 1.3346025580414755, "grad_norm": 1.3050806522369385, "learning_rate": 2.4948451819294034e-05, "loss": 0.9105, "step": 208900 }, { "epoch": 1.3346664451912142, "grad_norm": 0.4084964096546173, "learning_rate": 2.4944109493533052e-05, "loss": 0.764, "step": 208910 }, { "epoch": 1.334730332340953, "grad_norm": 0.9133265614509583, "learning_rate": 2.4939767420106547e-05, "loss": 0.7507, "step": 208920 }, { "epoch": 1.3347942194906917, "grad_norm": 0.8358080983161926, "learning_rate": 2.4935425599058275e-05, "loss": 0.8011, "step": 208930 }, { "epoch": 1.3348581066404304, "grad_norm": 0.9465192556381226, "learning_rate": 2.493108403043194e-05, "loss": 1.0144, "step": 208940 }, { "epoch": 1.334921993790169, "grad_norm": 1.0894412994384766, "learning_rate": 2.4926742714271295e-05, "loss": 0.9632, "step": 208950 }, { "epoch": 1.3349858809399078, "grad_norm": 0.7485235929489136, "learning_rate": 2.492240165062002e-05, "loss": 0.6174, "step": 208960 }, { "epoch": 1.3350497680896465, "grad_norm": 1.1640737056732178, "learning_rate": 2.4918060839521874e-05, "loss": 1.0708, "step": 208970 }, { "epoch": 1.3351136552393852, "grad_norm": 0.9745331406593323, "learning_rate": 2.4913720281020537e-05, "loss": 0.9084, "step": 208980 }, { "epoch": 1.335177542389124, "grad_norm": 0.9204380512237549, "learning_rate": 2.4909379975159764e-05, "loss": 0.9714, "step": 208990 }, { "epoch": 1.3352414295388626, "grad_norm": 1.0989751815795898, "learning_rate": 2.4905039921983213e-05, "loss": 0.7627, "step": 209000 }, { "epoch": 1.3353053166886013, "grad_norm": 1.1908149719238281, "learning_rate": 2.4900700121534642e-05, "loss": 0.8905, "step": 209010 }, { "epoch": 1.33536920383834, "grad_norm": 1.3680340051651, "learning_rate": 2.489636057385772e-05, "loss": 0.8965, "step": 209020 }, { "epoch": 1.3354330909880787, "grad_norm": 0.7258527874946594, "learning_rate": 2.4892021278996182e-05, "loss": 0.8439, "step": 209030 }, { "epoch": 1.3354969781378174, "grad_norm": 1.1028822660446167, "learning_rate": 2.4887682236993686e-05, "loss": 0.8114, "step": 209040 }, { "epoch": 1.3355608652875561, "grad_norm": 0.7421342730522156, "learning_rate": 2.488334344789398e-05, "loss": 0.7569, "step": 209050 }, { "epoch": 1.3356247524372948, "grad_norm": 0.9636204242706299, "learning_rate": 2.4879004911740716e-05, "loss": 0.8479, "step": 209060 }, { "epoch": 1.3356886395870335, "grad_norm": 1.148268222808838, "learning_rate": 2.487466662857762e-05, "loss": 0.9857, "step": 209070 }, { "epoch": 1.3357525267367722, "grad_norm": 0.6279143691062927, "learning_rate": 2.4870328598448352e-05, "loss": 0.6136, "step": 209080 }, { "epoch": 1.3358164138865107, "grad_norm": 0.8338180780410767, "learning_rate": 2.486599082139662e-05, "loss": 0.9912, "step": 209090 }, { "epoch": 1.3358803010362497, "grad_norm": 0.9658007621765137, "learning_rate": 2.4861653297466114e-05, "loss": 1.1272, "step": 209100 }, { "epoch": 1.3359441881859881, "grad_norm": 1.018320083618164, "learning_rate": 2.4857316026700488e-05, "loss": 0.6616, "step": 209110 }, { "epoch": 1.336008075335727, "grad_norm": 1.224833607673645, "learning_rate": 2.4852979009143467e-05, "loss": 0.8024, "step": 209120 }, { "epoch": 1.3360719624854656, "grad_norm": 0.8727512955665588, "learning_rate": 2.484864224483868e-05, "loss": 0.7102, "step": 209130 }, { "epoch": 1.3361358496352045, "grad_norm": 0.7761167883872986, "learning_rate": 2.484430573382985e-05, "loss": 1.2143, "step": 209140 }, { "epoch": 1.336199736784943, "grad_norm": 1.4156831502914429, "learning_rate": 2.4839969476160596e-05, "loss": 0.6088, "step": 209150 }, { "epoch": 1.336263623934682, "grad_norm": 0.7810295820236206, "learning_rate": 2.4835633471874635e-05, "loss": 0.787, "step": 209160 }, { "epoch": 1.3363275110844204, "grad_norm": 0.8101314306259155, "learning_rate": 2.48312977210156e-05, "loss": 1.043, "step": 209170 }, { "epoch": 1.3363913982341593, "grad_norm": 0.8262963891029358, "learning_rate": 2.4826962223627192e-05, "loss": 0.9108, "step": 209180 }, { "epoch": 1.3364552853838978, "grad_norm": 1.052907109260559, "learning_rate": 2.4822626979753028e-05, "loss": 0.891, "step": 209190 }, { "epoch": 1.3365191725336365, "grad_norm": 0.8523666262626648, "learning_rate": 2.4818291989436815e-05, "loss": 0.6906, "step": 209200 }, { "epoch": 1.3365830596833752, "grad_norm": 1.5421022176742554, "learning_rate": 2.4813957252722165e-05, "loss": 0.85, "step": 209210 }, { "epoch": 1.336646946833114, "grad_norm": 1.0445187091827393, "learning_rate": 2.4809622769652775e-05, "loss": 0.8081, "step": 209220 }, { "epoch": 1.3367108339828526, "grad_norm": 1.041160225868225, "learning_rate": 2.4805288540272276e-05, "loss": 1.0529, "step": 209230 }, { "epoch": 1.3367747211325913, "grad_norm": 1.4466737508773804, "learning_rate": 2.480138795076988e-05, "loss": 0.8208, "step": 209240 }, { "epoch": 1.33683860828233, "grad_norm": 0.7779701948165894, "learning_rate": 2.4797054203518528e-05, "loss": 0.7357, "step": 209250 }, { "epoch": 1.3369024954320687, "grad_norm": 0.940628707408905, "learning_rate": 2.4792720710082623e-05, "loss": 1.0128, "step": 209260 }, { "epoch": 1.3369663825818074, "grad_norm": 1.1309727430343628, "learning_rate": 2.478882078303863e-05, "loss": 1.0985, "step": 209270 }, { "epoch": 1.3370302697315462, "grad_norm": 1.0964093208312988, "learning_rate": 2.4784487771972347e-05, "loss": 0.6566, "step": 209280 }, { "epoch": 1.3370941568812849, "grad_norm": 1.1902695894241333, "learning_rate": 2.4780155014848105e-05, "loss": 0.9892, "step": 209290 }, { "epoch": 1.3371580440310236, "grad_norm": 1.3145884275436401, "learning_rate": 2.47758225117095e-05, "loss": 0.8356, "step": 209300 }, { "epoch": 1.3372219311807623, "grad_norm": 0.8866621851921082, "learning_rate": 2.4771490262600205e-05, "loss": 0.9547, "step": 209310 }, { "epoch": 1.337285818330501, "grad_norm": 1.3845595121383667, "learning_rate": 2.476715826756381e-05, "loss": 0.9326, "step": 209320 }, { "epoch": 1.3373497054802397, "grad_norm": 1.0303796529769897, "learning_rate": 2.4762826526643962e-05, "loss": 0.9338, "step": 209330 }, { "epoch": 1.3374135926299784, "grad_norm": 0.978714644908905, "learning_rate": 2.4758495039884305e-05, "loss": 0.8114, "step": 209340 }, { "epoch": 1.337477479779717, "grad_norm": 0.685970664024353, "learning_rate": 2.4754163807328416e-05, "loss": 0.9129, "step": 209350 }, { "epoch": 1.3375413669294558, "grad_norm": 0.3362213969230652, "learning_rate": 2.474983282901996e-05, "loss": 0.79, "step": 209360 }, { "epoch": 1.3376052540791945, "grad_norm": 0.7439454793930054, "learning_rate": 2.474550210500251e-05, "loss": 1.0737, "step": 209370 }, { "epoch": 1.3376691412289332, "grad_norm": 1.3877562284469604, "learning_rate": 2.4741171635319722e-05, "loss": 0.6211, "step": 209380 }, { "epoch": 1.337733028378672, "grad_norm": 0.8978985548019409, "learning_rate": 2.4736841420015174e-05, "loss": 0.9921, "step": 209390 }, { "epoch": 1.3377969155284106, "grad_norm": 0.743375301361084, "learning_rate": 2.4732511459132502e-05, "loss": 0.7833, "step": 209400 }, { "epoch": 1.3378608026781493, "grad_norm": 1.1048918962478638, "learning_rate": 2.472818175271528e-05, "loss": 0.8042, "step": 209410 }, { "epoch": 1.337924689827888, "grad_norm": 0.9322056770324707, "learning_rate": 2.472385230080715e-05, "loss": 0.9332, "step": 209420 }, { "epoch": 1.3379885769776267, "grad_norm": 1.3366330862045288, "learning_rate": 2.4719523103451673e-05, "loss": 1.0764, "step": 209430 }, { "epoch": 1.3380524641273654, "grad_norm": 0.6746212244033813, "learning_rate": 2.4715194160692494e-05, "loss": 0.6417, "step": 209440 }, { "epoch": 1.3381163512771042, "grad_norm": 1.4820467233657837, "learning_rate": 2.4710865472573164e-05, "loss": 0.866, "step": 209450 }, { "epoch": 1.3381802384268429, "grad_norm": 0.6888427138328552, "learning_rate": 2.4706537039137316e-05, "loss": 1.2135, "step": 209460 }, { "epoch": 1.3382441255765816, "grad_norm": 0.9417890310287476, "learning_rate": 2.47022088604285e-05, "loss": 1.0028, "step": 209470 }, { "epoch": 1.3383080127263203, "grad_norm": 1.1863046884536743, "learning_rate": 2.4697880936490348e-05, "loss": 0.746, "step": 209480 }, { "epoch": 1.338371899876059, "grad_norm": 0.9177879095077515, "learning_rate": 2.4693553267366405e-05, "loss": 0.805, "step": 209490 }, { "epoch": 1.3384357870257977, "grad_norm": 0.8089203238487244, "learning_rate": 2.468922585310029e-05, "loss": 0.7571, "step": 209500 }, { "epoch": 1.3384996741755364, "grad_norm": 1.0447206497192383, "learning_rate": 2.468489869373555e-05, "loss": 0.9604, "step": 209510 }, { "epoch": 1.338563561325275, "grad_norm": 0.8918728828430176, "learning_rate": 2.4680571789315803e-05, "loss": 0.878, "step": 209520 }, { "epoch": 1.3386274484750138, "grad_norm": 1.284941554069519, "learning_rate": 2.46762451398846e-05, "loss": 0.8817, "step": 209530 }, { "epoch": 1.3386913356247525, "grad_norm": 0.9816265106201172, "learning_rate": 2.46719187454855e-05, "loss": 0.8537, "step": 209540 }, { "epoch": 1.3387552227744912, "grad_norm": 1.0117629766464233, "learning_rate": 2.4667592606162106e-05, "loss": 0.9611, "step": 209550 }, { "epoch": 1.33881910992423, "grad_norm": 1.9839173555374146, "learning_rate": 2.466326672195795e-05, "loss": 0.6887, "step": 209560 }, { "epoch": 1.3388829970739686, "grad_norm": 0.913004994392395, "learning_rate": 2.4658941092916642e-05, "loss": 0.8255, "step": 209570 }, { "epoch": 1.3389468842237071, "grad_norm": 1.4669578075408936, "learning_rate": 2.4654615719081696e-05, "loss": 0.8509, "step": 209580 }, { "epoch": 1.339010771373446, "grad_norm": 0.9293825030326843, "learning_rate": 2.4650290600496716e-05, "loss": 0.8569, "step": 209590 }, { "epoch": 1.3390746585231845, "grad_norm": 1.2991247177124023, "learning_rate": 2.4645965737205224e-05, "loss": 0.8354, "step": 209600 }, { "epoch": 1.3391385456729235, "grad_norm": 1.1067110300064087, "learning_rate": 2.4641641129250807e-05, "loss": 0.9723, "step": 209610 }, { "epoch": 1.339202432822662, "grad_norm": 1.071905255317688, "learning_rate": 2.4637316776676987e-05, "loss": 0.7582, "step": 209620 }, { "epoch": 1.3392663199724009, "grad_norm": 0.8266701102256775, "learning_rate": 2.463299267952735e-05, "loss": 0.965, "step": 209630 }, { "epoch": 1.3393302071221393, "grad_norm": 0.879081130027771, "learning_rate": 2.4628668837845397e-05, "loss": 0.7896, "step": 209640 }, { "epoch": 1.3393940942718783, "grad_norm": 0.9813835620880127, "learning_rate": 2.4624345251674702e-05, "loss": 1.0807, "step": 209650 }, { "epoch": 1.3394579814216168, "grad_norm": 1.115027904510498, "learning_rate": 2.462002192105882e-05, "loss": 0.8159, "step": 209660 }, { "epoch": 1.3395218685713557, "grad_norm": 1.6251577138900757, "learning_rate": 2.4615698846041258e-05, "loss": 0.7014, "step": 209670 }, { "epoch": 1.3395857557210942, "grad_norm": 0.8698475360870361, "learning_rate": 2.4611376026665584e-05, "loss": 0.942, "step": 209680 }, { "epoch": 1.3396496428708329, "grad_norm": 1.2194809913635254, "learning_rate": 2.46070534629753e-05, "loss": 0.6803, "step": 209690 }, { "epoch": 1.3397135300205716, "grad_norm": 1.7993695735931396, "learning_rate": 2.4602731155013974e-05, "loss": 0.9341, "step": 209700 }, { "epoch": 1.3397774171703103, "grad_norm": 0.7888948917388916, "learning_rate": 2.4598409102825103e-05, "loss": 0.9482, "step": 209710 }, { "epoch": 1.339841304320049, "grad_norm": 1.112903356552124, "learning_rate": 2.4594087306452244e-05, "loss": 0.7849, "step": 209720 }, { "epoch": 1.3399051914697877, "grad_norm": 1.238926649093628, "learning_rate": 2.458976576593888e-05, "loss": 1.0857, "step": 209730 }, { "epoch": 1.3399690786195264, "grad_norm": 1.2833373546600342, "learning_rate": 2.4585444481328584e-05, "loss": 0.9552, "step": 209740 }, { "epoch": 1.3400329657692651, "grad_norm": 0.7105987668037415, "learning_rate": 2.458112345266483e-05, "loss": 0.7341, "step": 209750 }, { "epoch": 1.3400968529190038, "grad_norm": 0.7378994226455688, "learning_rate": 2.4576802679991173e-05, "loss": 0.9365, "step": 209760 }, { "epoch": 1.3401607400687425, "grad_norm": 0.8128080368041992, "learning_rate": 2.4572482163351086e-05, "loss": 0.9999, "step": 209770 }, { "epoch": 1.3402246272184812, "grad_norm": 1.1463675498962402, "learning_rate": 2.4568161902788118e-05, "loss": 0.8544, "step": 209780 }, { "epoch": 1.34028851436822, "grad_norm": 0.8119245171546936, "learning_rate": 2.4563841898345745e-05, "loss": 0.8122, "step": 209790 }, { "epoch": 1.3403524015179586, "grad_norm": 0.8801420331001282, "learning_rate": 2.4559522150067504e-05, "loss": 0.8097, "step": 209800 }, { "epoch": 1.3404162886676974, "grad_norm": 0.726452648639679, "learning_rate": 2.4555202657996875e-05, "loss": 0.8997, "step": 209810 }, { "epoch": 1.340480175817436, "grad_norm": 1.294712781906128, "learning_rate": 2.4550883422177378e-05, "loss": 0.964, "step": 209820 }, { "epoch": 1.3405440629671748, "grad_norm": 1.0049042701721191, "learning_rate": 2.4546564442652487e-05, "loss": 0.6612, "step": 209830 }, { "epoch": 1.3406079501169135, "grad_norm": 0.6928468942642212, "learning_rate": 2.4542245719465706e-05, "loss": 0.8433, "step": 209840 }, { "epoch": 1.3406718372666522, "grad_norm": 0.8258686065673828, "learning_rate": 2.4537927252660565e-05, "loss": 1.0434, "step": 209850 }, { "epoch": 1.3407357244163909, "grad_norm": 1.3018081188201904, "learning_rate": 2.4533609042280496e-05, "loss": 0.9601, "step": 209860 }, { "epoch": 1.3407996115661296, "grad_norm": 1.6450886726379395, "learning_rate": 2.4529291088369038e-05, "loss": 0.9166, "step": 209870 }, { "epoch": 1.3408634987158683, "grad_norm": 1.0318559408187866, "learning_rate": 2.4524973390969635e-05, "loss": 1.085, "step": 209880 }, { "epoch": 1.340927385865607, "grad_norm": 1.9742481708526611, "learning_rate": 2.452065595012581e-05, "loss": 0.9039, "step": 209890 }, { "epoch": 1.3409912730153457, "grad_norm": 0.6447263956069946, "learning_rate": 2.4516338765880996e-05, "loss": 0.6018, "step": 209900 }, { "epoch": 1.3410551601650844, "grad_norm": 1.2177265882492065, "learning_rate": 2.451202183827872e-05, "loss": 0.8312, "step": 209910 }, { "epoch": 1.3411190473148231, "grad_norm": 0.8734244704246521, "learning_rate": 2.450770516736242e-05, "loss": 0.9552, "step": 209920 }, { "epoch": 1.3411829344645618, "grad_norm": 1.1779773235321045, "learning_rate": 2.45033887531756e-05, "loss": 0.8273, "step": 209930 }, { "epoch": 1.3412468216143005, "grad_norm": 0.9692370891571045, "learning_rate": 2.449907259576169e-05, "loss": 0.9669, "step": 209940 }, { "epoch": 1.3413107087640392, "grad_norm": 0.7844962477684021, "learning_rate": 2.44947566951642e-05, "loss": 0.7729, "step": 209950 }, { "epoch": 1.341374595913778, "grad_norm": 0.7607170939445496, "learning_rate": 2.449044105142656e-05, "loss": 0.6972, "step": 209960 }, { "epoch": 1.3414384830635167, "grad_norm": 1.2908620834350586, "learning_rate": 2.4486125664592263e-05, "loss": 1.1027, "step": 209970 }, { "epoch": 1.3415023702132554, "grad_norm": 1.3262430429458618, "learning_rate": 2.4481810534704734e-05, "loss": 0.7157, "step": 209980 }, { "epoch": 1.341566257362994, "grad_norm": 0.7177751660346985, "learning_rate": 2.4477495661807476e-05, "loss": 0.7883, "step": 209990 }, { "epoch": 1.3416301445127328, "grad_norm": 0.7598410248756409, "learning_rate": 2.447318104594391e-05, "loss": 0.798, "step": 210000 }, { "epoch": 1.3416940316624715, "grad_norm": 0.940240740776062, "learning_rate": 2.4468866687157477e-05, "loss": 1.0023, "step": 210010 }, { "epoch": 1.3417579188122102, "grad_norm": 0.5641880631446838, "learning_rate": 2.4464552585491662e-05, "loss": 0.7427, "step": 210020 }, { "epoch": 1.3418218059619489, "grad_norm": 1.3816797733306885, "learning_rate": 2.446023874098987e-05, "loss": 0.8589, "step": 210030 }, { "epoch": 1.3418856931116876, "grad_norm": 0.8124895691871643, "learning_rate": 2.4455925153695598e-05, "loss": 0.9625, "step": 210040 }, { "epoch": 1.341949580261426, "grad_norm": 1.486721158027649, "learning_rate": 2.445161182365223e-05, "loss": 0.8313, "step": 210050 }, { "epoch": 1.342013467411165, "grad_norm": 0.8006391525268555, "learning_rate": 2.444729875090326e-05, "loss": 0.9999, "step": 210060 }, { "epoch": 1.3420773545609035, "grad_norm": 0.6810546517372131, "learning_rate": 2.4442985935492073e-05, "loss": 1.0687, "step": 210070 }, { "epoch": 1.3421412417106424, "grad_norm": 0.5805924534797668, "learning_rate": 2.443867337746215e-05, "loss": 0.958, "step": 210080 }, { "epoch": 1.342205128860381, "grad_norm": 1.3511457443237305, "learning_rate": 2.443436107685688e-05, "loss": 0.795, "step": 210090 }, { "epoch": 1.3422690160101198, "grad_norm": 0.9503484964370728, "learning_rate": 2.4430049033719733e-05, "loss": 0.8793, "step": 210100 }, { "epoch": 1.3423329031598583, "grad_norm": 1.1696393489837646, "learning_rate": 2.4425737248094094e-05, "loss": 1.1012, "step": 210110 }, { "epoch": 1.3423967903095972, "grad_norm": 0.622090220451355, "learning_rate": 2.442142572002342e-05, "loss": 0.9457, "step": 210120 }, { "epoch": 1.3424606774593357, "grad_norm": 0.7924548983573914, "learning_rate": 2.4417114449551104e-05, "loss": 0.9871, "step": 210130 }, { "epoch": 1.3425245646090747, "grad_norm": 1.0553364753723145, "learning_rate": 2.4412803436720595e-05, "loss": 0.7615, "step": 210140 }, { "epoch": 1.3425884517588131, "grad_norm": 1.3900631666183472, "learning_rate": 2.4408492681575273e-05, "loss": 0.8291, "step": 210150 }, { "epoch": 1.342652338908552, "grad_norm": 1.1718361377716064, "learning_rate": 2.4404182184158563e-05, "loss": 0.808, "step": 210160 }, { "epoch": 1.3427162260582906, "grad_norm": 0.7470996975898743, "learning_rate": 2.4399871944513907e-05, "loss": 0.7203, "step": 210170 }, { "epoch": 1.3427801132080293, "grad_norm": 0.9686959981918335, "learning_rate": 2.439556196268467e-05, "loss": 1.1038, "step": 210180 }, { "epoch": 1.342844000357768, "grad_norm": 1.1691725254058838, "learning_rate": 2.439125223871429e-05, "loss": 1.2793, "step": 210190 }, { "epoch": 1.3429078875075067, "grad_norm": 0.729895293712616, "learning_rate": 2.4386942772646138e-05, "loss": 0.9453, "step": 210200 }, { "epoch": 1.3429717746572454, "grad_norm": 1.0728123188018799, "learning_rate": 2.438263356452365e-05, "loss": 1.0757, "step": 210210 }, { "epoch": 1.343035661806984, "grad_norm": 1.7532767057418823, "learning_rate": 2.4378324614390187e-05, "loss": 0.9886, "step": 210220 }, { "epoch": 1.3430995489567228, "grad_norm": 0.6979058384895325, "learning_rate": 2.4374015922289177e-05, "loss": 0.8571, "step": 210230 }, { "epoch": 1.3431634361064615, "grad_norm": 1.2367197275161743, "learning_rate": 2.4369707488263978e-05, "loss": 0.8007, "step": 210240 }, { "epoch": 1.3432273232562002, "grad_norm": 1.1389049291610718, "learning_rate": 2.436539931235802e-05, "loss": 0.9297, "step": 210250 }, { "epoch": 1.343291210405939, "grad_norm": 0.5948600172996521, "learning_rate": 2.4361091394614644e-05, "loss": 0.7676, "step": 210260 }, { "epoch": 1.3433550975556776, "grad_norm": 0.8651537299156189, "learning_rate": 2.4356783735077276e-05, "loss": 1.1049, "step": 210270 }, { "epoch": 1.3434189847054163, "grad_norm": 1.134677529335022, "learning_rate": 2.4352476333789266e-05, "loss": 0.8082, "step": 210280 }, { "epoch": 1.343482871855155, "grad_norm": 0.868580162525177, "learning_rate": 2.434816919079403e-05, "loss": 0.9725, "step": 210290 }, { "epoch": 1.3435467590048937, "grad_norm": 0.828855037689209, "learning_rate": 2.4343862306134897e-05, "loss": 0.8408, "step": 210300 }, { "epoch": 1.3436106461546324, "grad_norm": 0.8323947787284851, "learning_rate": 2.4339555679855284e-05, "loss": 0.9766, "step": 210310 }, { "epoch": 1.3436745333043711, "grad_norm": 1.219912052154541, "learning_rate": 2.4335249311998533e-05, "loss": 1.0028, "step": 210320 }, { "epoch": 1.3437384204541098, "grad_norm": 0.6117528080940247, "learning_rate": 2.4330943202608037e-05, "loss": 0.7249, "step": 210330 }, { "epoch": 1.3438023076038486, "grad_norm": 0.8942903876304626, "learning_rate": 2.4326637351727134e-05, "loss": 0.8577, "step": 210340 }, { "epoch": 1.3438661947535873, "grad_norm": 0.9196701049804688, "learning_rate": 2.432233175939922e-05, "loss": 0.9215, "step": 210350 }, { "epoch": 1.343930081903326, "grad_norm": 0.6849532723426819, "learning_rate": 2.4318026425667623e-05, "loss": 0.7433, "step": 210360 }, { "epoch": 1.3439939690530647, "grad_norm": 0.8016952872276306, "learning_rate": 2.4313721350575713e-05, "loss": 0.8727, "step": 210370 }, { "epoch": 1.3440578562028034, "grad_norm": 0.9626837372779846, "learning_rate": 2.430941653416688e-05, "loss": 0.8245, "step": 210380 }, { "epoch": 1.344121743352542, "grad_norm": 0.9934982061386108, "learning_rate": 2.430511197648442e-05, "loss": 1.0403, "step": 210390 }, { "epoch": 1.3441856305022808, "grad_norm": 0.8192433714866638, "learning_rate": 2.4300807677571736e-05, "loss": 1.1374, "step": 210400 }, { "epoch": 1.3442495176520195, "grad_norm": 2.489893913269043, "learning_rate": 2.429650363747213e-05, "loss": 0.7958, "step": 210410 }, { "epoch": 1.3443134048017582, "grad_norm": 0.5742020010948181, "learning_rate": 2.4292199856228986e-05, "loss": 0.7759, "step": 210420 }, { "epoch": 1.344377291951497, "grad_norm": 0.7302964925765991, "learning_rate": 2.4287896333885613e-05, "loss": 0.7775, "step": 210430 }, { "epoch": 1.3444411791012356, "grad_norm": 1.0013583898544312, "learning_rate": 2.428359307048539e-05, "loss": 0.7784, "step": 210440 }, { "epoch": 1.3445050662509743, "grad_norm": 1.9199076890945435, "learning_rate": 2.4279290066071608e-05, "loss": 0.913, "step": 210450 }, { "epoch": 1.344568953400713, "grad_norm": 0.9353756904602051, "learning_rate": 2.4274987320687648e-05, "loss": 0.7027, "step": 210460 }, { "epoch": 1.3446328405504517, "grad_norm": 0.9756304621696472, "learning_rate": 2.42706848343768e-05, "loss": 0.9233, "step": 210470 }, { "epoch": 1.3446967277001904, "grad_norm": 1.185735821723938, "learning_rate": 2.4266382607182435e-05, "loss": 0.8367, "step": 210480 }, { "epoch": 1.3447606148499291, "grad_norm": 0.7765511870384216, "learning_rate": 2.4262080639147865e-05, "loss": 0.6595, "step": 210490 }, { "epoch": 1.3448245019996679, "grad_norm": 1.5717637538909912, "learning_rate": 2.4257778930316384e-05, "loss": 0.703, "step": 210500 }, { "epoch": 1.3448883891494066, "grad_norm": 1.0303682088851929, "learning_rate": 2.4253477480731362e-05, "loss": 0.8976, "step": 210510 }, { "epoch": 1.3449522762991453, "grad_norm": 0.7455207109451294, "learning_rate": 2.4249176290436077e-05, "loss": 0.8332, "step": 210520 }, { "epoch": 1.345016163448884, "grad_norm": 0.8036611080169678, "learning_rate": 2.424487535947388e-05, "loss": 0.7954, "step": 210530 }, { "epoch": 1.3450800505986225, "grad_norm": 0.8801621198654175, "learning_rate": 2.4240574687888052e-05, "loss": 0.9002, "step": 210540 }, { "epoch": 1.3451439377483614, "grad_norm": 1.1013474464416504, "learning_rate": 2.4236274275721943e-05, "loss": 1.0208, "step": 210550 }, { "epoch": 1.3452078248980999, "grad_norm": 0.9770920276641846, "learning_rate": 2.423197412301882e-05, "loss": 0.8401, "step": 210560 }, { "epoch": 1.3452717120478388, "grad_norm": 0.9802312254905701, "learning_rate": 2.4227674229822028e-05, "loss": 0.6977, "step": 210570 }, { "epoch": 1.3453355991975773, "grad_norm": 1.036690354347229, "learning_rate": 2.4223374596174838e-05, "loss": 0.7917, "step": 210580 }, { "epoch": 1.3453994863473162, "grad_norm": 0.9629283547401428, "learning_rate": 2.421907522212058e-05, "loss": 0.7148, "step": 210590 }, { "epoch": 1.3454633734970547, "grad_norm": 0.5816007256507874, "learning_rate": 2.4214776107702518e-05, "loss": 0.9514, "step": 210600 }, { "epoch": 1.3455272606467936, "grad_norm": 1.4846789836883545, "learning_rate": 2.4210477252963993e-05, "loss": 0.8683, "step": 210610 }, { "epoch": 1.345591147796532, "grad_norm": 1.0084872245788574, "learning_rate": 2.4206178657948246e-05, "loss": 0.6668, "step": 210620 }, { "epoch": 1.345655034946271, "grad_norm": 0.884223997592926, "learning_rate": 2.4201880322698622e-05, "loss": 1.055, "step": 210630 }, { "epoch": 1.3457189220960095, "grad_norm": 0.749989926815033, "learning_rate": 2.419758224725836e-05, "loss": 0.7163, "step": 210640 }, { "epoch": 1.3457828092457484, "grad_norm": 0.8538677096366882, "learning_rate": 2.4193284431670786e-05, "loss": 0.8633, "step": 210650 }, { "epoch": 1.345846696395487, "grad_norm": 1.240678310394287, "learning_rate": 2.4188986875979146e-05, "loss": 0.7806, "step": 210660 }, { "epoch": 1.3459105835452256, "grad_norm": 0.6129598021507263, "learning_rate": 2.4184689580226756e-05, "loss": 0.6788, "step": 210670 }, { "epoch": 1.3459744706949643, "grad_norm": 1.047533631324768, "learning_rate": 2.4180392544456852e-05, "loss": 0.5952, "step": 210680 }, { "epoch": 1.346038357844703, "grad_norm": 0.9103485941886902, "learning_rate": 2.4176095768712737e-05, "loss": 0.7303, "step": 210690 }, { "epoch": 1.3461022449944418, "grad_norm": 0.8905709981918335, "learning_rate": 2.4171799253037698e-05, "loss": 0.7106, "step": 210700 }, { "epoch": 1.3461661321441805, "grad_norm": 1.0654971599578857, "learning_rate": 2.416750299747496e-05, "loss": 0.9144, "step": 210710 }, { "epoch": 1.3462300192939192, "grad_norm": 0.9546240568161011, "learning_rate": 2.416320700206784e-05, "loss": 0.7629, "step": 210720 }, { "epoch": 1.3462939064436579, "grad_norm": 0.566611111164093, "learning_rate": 2.4158911266859556e-05, "loss": 0.8748, "step": 210730 }, { "epoch": 1.3463577935933966, "grad_norm": 0.8656834363937378, "learning_rate": 2.4154615791893415e-05, "loss": 0.7819, "step": 210740 }, { "epoch": 1.3464216807431353, "grad_norm": 0.9609455466270447, "learning_rate": 2.4150320577212628e-05, "loss": 1.2611, "step": 210750 }, { "epoch": 1.346485567892874, "grad_norm": 1.36478853225708, "learning_rate": 2.4146025622860498e-05, "loss": 0.7677, "step": 210760 }, { "epoch": 1.3465494550426127, "grad_norm": 1.402571439743042, "learning_rate": 2.4141730928880235e-05, "loss": 0.6639, "step": 210770 }, { "epoch": 1.3466133421923514, "grad_norm": 0.8779550194740295, "learning_rate": 2.413743649531513e-05, "loss": 0.778, "step": 210780 }, { "epoch": 1.34667722934209, "grad_norm": 0.9404392242431641, "learning_rate": 2.41331423222084e-05, "loss": 0.8239, "step": 210790 }, { "epoch": 1.3467411164918288, "grad_norm": 0.8661839365959167, "learning_rate": 2.412884840960332e-05, "loss": 1.1463, "step": 210800 }, { "epoch": 1.3468050036415675, "grad_norm": 0.6455258727073669, "learning_rate": 2.41245547575431e-05, "loss": 0.6678, "step": 210810 }, { "epoch": 1.3468688907913062, "grad_norm": 0.808452308177948, "learning_rate": 2.4120261366071018e-05, "loss": 0.591, "step": 210820 }, { "epoch": 1.346932777941045, "grad_norm": 0.7696453928947449, "learning_rate": 2.4115968235230275e-05, "loss": 0.788, "step": 210830 }, { "epoch": 1.3469966650907836, "grad_norm": 0.9432682991027832, "learning_rate": 2.4111675365064146e-05, "loss": 0.8129, "step": 210840 }, { "epoch": 1.3470605522405223, "grad_norm": 1.2885863780975342, "learning_rate": 2.4107382755615822e-05, "loss": 0.9396, "step": 210850 }, { "epoch": 1.347124439390261, "grad_norm": 0.49041441082954407, "learning_rate": 2.410309040692857e-05, "loss": 0.8879, "step": 210860 }, { "epoch": 1.3471883265399998, "grad_norm": 1.4990586042404175, "learning_rate": 2.4098798319045583e-05, "loss": 0.8044, "step": 210870 }, { "epoch": 1.3472522136897385, "grad_norm": 0.9001103043556213, "learning_rate": 2.4094506492010127e-05, "loss": 0.8259, "step": 210880 }, { "epoch": 1.3473161008394772, "grad_norm": 0.582728385925293, "learning_rate": 2.409021492586538e-05, "loss": 0.8523, "step": 210890 }, { "epoch": 1.3473799879892159, "grad_norm": 1.2756515741348267, "learning_rate": 2.408592362065459e-05, "loss": 0.9167, "step": 210900 }, { "epoch": 1.3474438751389546, "grad_norm": 0.7740147709846497, "learning_rate": 2.4081632576420975e-05, "loss": 0.7101, "step": 210910 }, { "epoch": 1.3475077622886933, "grad_norm": 1.3967020511627197, "learning_rate": 2.4077341793207737e-05, "loss": 1.3073, "step": 210920 }, { "epoch": 1.347571649438432, "grad_norm": 0.6911699771881104, "learning_rate": 2.4073051271058106e-05, "loss": 0.8531, "step": 210930 }, { "epoch": 1.3476355365881707, "grad_norm": 0.7950164675712585, "learning_rate": 2.4068761010015256e-05, "loss": 0.8932, "step": 210940 }, { "epoch": 1.3476994237379094, "grad_norm": 0.8173822164535522, "learning_rate": 2.406447101012244e-05, "loss": 1.1364, "step": 210950 }, { "epoch": 1.3477633108876481, "grad_norm": 1.062247633934021, "learning_rate": 2.4060181271422823e-05, "loss": 0.8704, "step": 210960 }, { "epoch": 1.3478271980373868, "grad_norm": 0.691670298576355, "learning_rate": 2.405589179395965e-05, "loss": 1.0231, "step": 210970 }, { "epoch": 1.3478910851871255, "grad_norm": 1.0676474571228027, "learning_rate": 2.405160257777606e-05, "loss": 0.8037, "step": 210980 }, { "epoch": 1.3479549723368642, "grad_norm": 0.8413828015327454, "learning_rate": 2.4047313622915295e-05, "loss": 0.9708, "step": 210990 }, { "epoch": 1.348018859486603, "grad_norm": 1.0489099025726318, "learning_rate": 2.404302492942052e-05, "loss": 0.9631, "step": 211000 }, { "epoch": 1.3480827466363416, "grad_norm": 0.7359494566917419, "learning_rate": 2.403873649733494e-05, "loss": 0.7335, "step": 211010 }, { "epoch": 1.3481466337860804, "grad_norm": 1.3242499828338623, "learning_rate": 2.4034448326701763e-05, "loss": 0.9638, "step": 211020 }, { "epoch": 1.3482105209358188, "grad_norm": 0.7859591245651245, "learning_rate": 2.403016041756413e-05, "loss": 1.0956, "step": 211030 }, { "epoch": 1.3482744080855578, "grad_norm": 0.9521056413650513, "learning_rate": 2.4025872769965275e-05, "loss": 0.795, "step": 211040 }, { "epoch": 1.3483382952352962, "grad_norm": 0.9843899011611938, "learning_rate": 2.4021585383948325e-05, "loss": 0.9966, "step": 211050 }, { "epoch": 1.3484021823850352, "grad_norm": 2.194084644317627, "learning_rate": 2.401729825955651e-05, "loss": 0.9103, "step": 211060 }, { "epoch": 1.3484660695347737, "grad_norm": 0.6446147561073303, "learning_rate": 2.4013011396832956e-05, "loss": 0.9175, "step": 211070 }, { "epoch": 1.3485299566845126, "grad_norm": 1.1317280530929565, "learning_rate": 2.400872479582088e-05, "loss": 1.0431, "step": 211080 }, { "epoch": 1.348593843834251, "grad_norm": 0.8024947047233582, "learning_rate": 2.4004438456563417e-05, "loss": 0.7175, "step": 211090 }, { "epoch": 1.34865773098399, "grad_norm": 0.7585810422897339, "learning_rate": 2.4000152379103764e-05, "loss": 0.8535, "step": 211100 }, { "epoch": 1.3487216181337285, "grad_norm": 0.8179787397384644, "learning_rate": 2.399586656348505e-05, "loss": 0.7568, "step": 211110 }, { "epoch": 1.3487855052834674, "grad_norm": 0.7237686514854431, "learning_rate": 2.3991581009750475e-05, "loss": 0.8521, "step": 211120 }, { "epoch": 1.348849392433206, "grad_norm": 0.956449568271637, "learning_rate": 2.398729571794316e-05, "loss": 0.962, "step": 211130 }, { "epoch": 1.3489132795829448, "grad_norm": 0.7600199580192566, "learning_rate": 2.39830106881063e-05, "loss": 0.9539, "step": 211140 }, { "epoch": 1.3489771667326833, "grad_norm": 0.7863625288009644, "learning_rate": 2.397872592028301e-05, "loss": 0.7805, "step": 211150 }, { "epoch": 1.349041053882422, "grad_norm": 0.7679553627967834, "learning_rate": 2.3974441414516485e-05, "loss": 0.9939, "step": 211160 }, { "epoch": 1.3491049410321607, "grad_norm": 1.3117929697036743, "learning_rate": 2.3970157170849827e-05, "loss": 0.8474, "step": 211170 }, { "epoch": 1.3491688281818994, "grad_norm": 1.0582443475723267, "learning_rate": 2.3965873189326226e-05, "loss": 0.9592, "step": 211180 }, { "epoch": 1.3492327153316381, "grad_norm": 0.9994162917137146, "learning_rate": 2.3961589469988788e-05, "loss": 0.8485, "step": 211190 }, { "epoch": 1.3492966024813768, "grad_norm": 0.60932457447052, "learning_rate": 2.3957306012880686e-05, "loss": 0.7063, "step": 211200 }, { "epoch": 1.3493604896311155, "grad_norm": 0.7182226181030273, "learning_rate": 2.3953022818045023e-05, "loss": 0.847, "step": 211210 }, { "epoch": 1.3494243767808543, "grad_norm": 1.0658320188522339, "learning_rate": 2.3948739885524957e-05, "loss": 0.9415, "step": 211220 }, { "epoch": 1.349488263930593, "grad_norm": 0.8645941019058228, "learning_rate": 2.3944457215363636e-05, "loss": 0.6805, "step": 211230 }, { "epoch": 1.3495521510803317, "grad_norm": 1.8405535221099854, "learning_rate": 2.3940174807604154e-05, "loss": 0.9702, "step": 211240 }, { "epoch": 1.3496160382300704, "grad_norm": 0.7858190536499023, "learning_rate": 2.3935892662289678e-05, "loss": 0.9197, "step": 211250 }, { "epoch": 1.349679925379809, "grad_norm": 2.9478893280029297, "learning_rate": 2.3931610779463286e-05, "loss": 0.9379, "step": 211260 }, { "epoch": 1.3497438125295478, "grad_norm": 0.8578464984893799, "learning_rate": 2.392732915916815e-05, "loss": 0.7668, "step": 211270 }, { "epoch": 1.3498076996792865, "grad_norm": 0.8595908880233765, "learning_rate": 2.3923047801447345e-05, "loss": 0.7818, "step": 211280 }, { "epoch": 1.3498715868290252, "grad_norm": 1.156435489654541, "learning_rate": 2.3918766706344026e-05, "loss": 1.0211, "step": 211290 }, { "epoch": 1.349935473978764, "grad_norm": 1.146933674812317, "learning_rate": 2.391448587390127e-05, "loss": 1.0301, "step": 211300 }, { "epoch": 1.3499993611285026, "grad_norm": 0.7327816486358643, "learning_rate": 2.391020530416223e-05, "loss": 0.7104, "step": 211310 }, { "epoch": 1.3500632482782413, "grad_norm": 1.090005874633789, "learning_rate": 2.3905924997169978e-05, "loss": 0.887, "step": 211320 }, { "epoch": 1.35012713542798, "grad_norm": 0.7504577040672302, "learning_rate": 2.3901644952967643e-05, "loss": 0.6344, "step": 211330 }, { "epoch": 1.3501910225777187, "grad_norm": 0.6553131341934204, "learning_rate": 2.389736517159831e-05, "loss": 0.7824, "step": 211340 }, { "epoch": 1.3502549097274574, "grad_norm": 0.6806291937828064, "learning_rate": 2.3893085653105112e-05, "loss": 0.7066, "step": 211350 }, { "epoch": 1.3503187968771961, "grad_norm": 0.8419098258018494, "learning_rate": 2.388880639753111e-05, "loss": 0.8939, "step": 211360 }, { "epoch": 1.3503826840269348, "grad_norm": 1.017172932624817, "learning_rate": 2.3884527404919434e-05, "loss": 1.1268, "step": 211370 }, { "epoch": 1.3504465711766735, "grad_norm": 0.920089066028595, "learning_rate": 2.3880248675313138e-05, "loss": 1.1174, "step": 211380 }, { "epoch": 1.3505104583264123, "grad_norm": 0.7588639259338379, "learning_rate": 2.3875970208755354e-05, "loss": 0.9587, "step": 211390 }, { "epoch": 1.350574345476151, "grad_norm": 0.907305121421814, "learning_rate": 2.3871692005289137e-05, "loss": 0.9076, "step": 211400 }, { "epoch": 1.3506382326258897, "grad_norm": 0.9567956924438477, "learning_rate": 2.38674140649576e-05, "loss": 1.0364, "step": 211410 }, { "epoch": 1.3507021197756284, "grad_norm": 0.8349950313568115, "learning_rate": 2.3863136387803793e-05, "loss": 0.8609, "step": 211420 }, { "epoch": 1.350766006925367, "grad_norm": 0.9572453498840332, "learning_rate": 2.3858858973870813e-05, "loss": 1.0533, "step": 211430 }, { "epoch": 1.3508298940751058, "grad_norm": 1.308901309967041, "learning_rate": 2.385458182320176e-05, "loss": 0.8949, "step": 211440 }, { "epoch": 1.3508937812248445, "grad_norm": 0.664740264415741, "learning_rate": 2.3850304935839668e-05, "loss": 0.951, "step": 211450 }, { "epoch": 1.3509576683745832, "grad_norm": 0.962526261806488, "learning_rate": 2.3846028311827666e-05, "loss": 0.6875, "step": 211460 }, { "epoch": 1.351021555524322, "grad_norm": 0.8609249591827393, "learning_rate": 2.3841751951208745e-05, "loss": 0.9577, "step": 211470 }, { "epoch": 1.3510854426740606, "grad_norm": 0.6966132521629333, "learning_rate": 2.3837475854026033e-05, "loss": 0.7883, "step": 211480 }, { "epoch": 1.3511493298237993, "grad_norm": 1.8291178941726685, "learning_rate": 2.3833200020322554e-05, "loss": 1.0099, "step": 211490 }, { "epoch": 1.351213216973538, "grad_norm": 0.8845551609992981, "learning_rate": 2.3828924450141403e-05, "loss": 0.7502, "step": 211500 }, { "epoch": 1.3512771041232767, "grad_norm": 1.426044225692749, "learning_rate": 2.3824649143525613e-05, "loss": 0.6654, "step": 211510 }, { "epoch": 1.3513409912730152, "grad_norm": 0.8587886691093445, "learning_rate": 2.3820374100518263e-05, "loss": 0.9827, "step": 211520 }, { "epoch": 1.3514048784227541, "grad_norm": 0.9691550135612488, "learning_rate": 2.381609932116238e-05, "loss": 0.9936, "step": 211530 }, { "epoch": 1.3514687655724926, "grad_norm": 1.3499754667282104, "learning_rate": 2.3811824805501025e-05, "loss": 0.834, "step": 211540 }, { "epoch": 1.3515326527222316, "grad_norm": 0.6362777948379517, "learning_rate": 2.3807550553577274e-05, "loss": 0.7756, "step": 211550 }, { "epoch": 1.35159653987197, "grad_norm": 0.6540228724479675, "learning_rate": 2.3803276565434124e-05, "loss": 1.3985, "step": 211560 }, { "epoch": 1.351660427021709, "grad_norm": 1.1397900581359863, "learning_rate": 2.3799002841114666e-05, "loss": 0.9984, "step": 211570 }, { "epoch": 1.3517243141714474, "grad_norm": 0.7781268358230591, "learning_rate": 2.3794729380661896e-05, "loss": 0.8131, "step": 211580 }, { "epoch": 1.3517882013211864, "grad_norm": 1.5782052278518677, "learning_rate": 2.3790883491896017e-05, "loss": 0.8044, "step": 211590 }, { "epoch": 1.3518520884709249, "grad_norm": 0.9964745044708252, "learning_rate": 2.3786610532908554e-05, "loss": 0.7436, "step": 211600 }, { "epoch": 1.3519159756206638, "grad_norm": 1.558613657951355, "learning_rate": 2.3782337837912644e-05, "loss": 0.9581, "step": 211610 }, { "epoch": 1.3519798627704023, "grad_norm": 0.7820762991905212, "learning_rate": 2.377806540695124e-05, "loss": 0.9374, "step": 211620 }, { "epoch": 1.352043749920141, "grad_norm": 1.2613965272903442, "learning_rate": 2.3773793240067416e-05, "loss": 0.9243, "step": 211630 }, { "epoch": 1.3521076370698797, "grad_norm": 1.1297177076339722, "learning_rate": 2.3769521337304163e-05, "loss": 0.9963, "step": 211640 }, { "epoch": 1.3521715242196184, "grad_norm": 0.7416729927062988, "learning_rate": 2.376524969870454e-05, "loss": 0.8046, "step": 211650 }, { "epoch": 1.352235411369357, "grad_norm": 0.9396662712097168, "learning_rate": 2.3760978324311533e-05, "loss": 0.7937, "step": 211660 }, { "epoch": 1.3522992985190958, "grad_norm": 1.451461672782898, "learning_rate": 2.375670721416818e-05, "loss": 0.806, "step": 211670 }, { "epoch": 1.3523631856688345, "grad_norm": 1.6874157190322876, "learning_rate": 2.37524363683175e-05, "loss": 0.7712, "step": 211680 }, { "epoch": 1.3524270728185732, "grad_norm": 1.7415262460708618, "learning_rate": 2.3748165786802472e-05, "loss": 0.7358, "step": 211690 }, { "epoch": 1.352490959968312, "grad_norm": 0.7011155486106873, "learning_rate": 2.374389546966615e-05, "loss": 0.8004, "step": 211700 }, { "epoch": 1.3525548471180506, "grad_norm": 1.013753056526184, "learning_rate": 2.37396254169515e-05, "loss": 0.6766, "step": 211710 }, { "epoch": 1.3526187342677893, "grad_norm": 1.1585613489151, "learning_rate": 2.373535562870155e-05, "loss": 0.8866, "step": 211720 }, { "epoch": 1.352682621417528, "grad_norm": 0.6112300157546997, "learning_rate": 2.3731086104959277e-05, "loss": 0.7901, "step": 211730 }, { "epoch": 1.3527465085672667, "grad_norm": 1.580859661102295, "learning_rate": 2.3726816845767712e-05, "loss": 1.0197, "step": 211740 }, { "epoch": 1.3528103957170055, "grad_norm": 0.9176729321479797, "learning_rate": 2.3722547851169813e-05, "loss": 1.0305, "step": 211750 }, { "epoch": 1.3528742828667442, "grad_norm": 0.78628009557724, "learning_rate": 2.3718279121208608e-05, "loss": 0.8649, "step": 211760 }, { "epoch": 1.3529381700164829, "grad_norm": 1.3608671426773071, "learning_rate": 2.371401065592705e-05, "loss": 0.8869, "step": 211770 }, { "epoch": 1.3530020571662216, "grad_norm": 0.8958773612976074, "learning_rate": 2.3709742455368168e-05, "loss": 0.7032, "step": 211780 }, { "epoch": 1.3530659443159603, "grad_norm": 0.802314817905426, "learning_rate": 2.37054745195749e-05, "loss": 1.0902, "step": 211790 }, { "epoch": 1.353129831465699, "grad_norm": 0.7494603991508484, "learning_rate": 2.3701206848590267e-05, "loss": 0.7383, "step": 211800 }, { "epoch": 1.3531937186154377, "grad_norm": 0.6451514959335327, "learning_rate": 2.369693944245722e-05, "loss": 0.7629, "step": 211810 }, { "epoch": 1.3532576057651764, "grad_norm": 0.7738144993782043, "learning_rate": 2.3692672301218764e-05, "loss": 0.8307, "step": 211820 }, { "epoch": 1.353321492914915, "grad_norm": 0.7524031400680542, "learning_rate": 2.368840542491784e-05, "loss": 0.9163, "step": 211830 }, { "epoch": 1.3533853800646538, "grad_norm": 0.9877924919128418, "learning_rate": 2.3684138813597456e-05, "loss": 1.0076, "step": 211840 }, { "epoch": 1.3534492672143925, "grad_norm": 1.3271013498306274, "learning_rate": 2.3679872467300545e-05, "loss": 1.0871, "step": 211850 }, { "epoch": 1.3535131543641312, "grad_norm": 0.9618971943855286, "learning_rate": 2.3675606386070083e-05, "loss": 0.6661, "step": 211860 }, { "epoch": 1.35357704151387, "grad_norm": 0.9295185208320618, "learning_rate": 2.3671340569949057e-05, "loss": 0.5962, "step": 211870 }, { "epoch": 1.3536409286636086, "grad_norm": 1.1782742738723755, "learning_rate": 2.3667075018980396e-05, "loss": 0.8708, "step": 211880 }, { "epoch": 1.3537048158133473, "grad_norm": 0.8071572780609131, "learning_rate": 2.3662809733207082e-05, "loss": 0.6242, "step": 211890 }, { "epoch": 1.353768702963086, "grad_norm": 1.3956265449523926, "learning_rate": 2.3658544712672042e-05, "loss": 0.9122, "step": 211900 }, { "epoch": 1.3538325901128248, "grad_norm": 0.816125750541687, "learning_rate": 2.3654279957418268e-05, "loss": 0.703, "step": 211910 }, { "epoch": 1.3538964772625635, "grad_norm": 1.574976921081543, "learning_rate": 2.365001546748866e-05, "loss": 0.8301, "step": 211920 }, { "epoch": 1.3539603644123022, "grad_norm": 0.6849200129508972, "learning_rate": 2.3645751242926223e-05, "loss": 0.8724, "step": 211930 }, { "epoch": 1.3540242515620409, "grad_norm": 0.5813919901847839, "learning_rate": 2.3641487283773844e-05, "loss": 0.8879, "step": 211940 }, { "epoch": 1.3540881387117796, "grad_norm": 0.691142201423645, "learning_rate": 2.363722359007451e-05, "loss": 0.9655, "step": 211950 }, { "epoch": 1.3541520258615183, "grad_norm": 1.5124849081039429, "learning_rate": 2.3632960161871126e-05, "loss": 0.8739, "step": 211960 }, { "epoch": 1.354215913011257, "grad_norm": 0.6597105264663696, "learning_rate": 2.3629123303522642e-05, "loss": 1.048, "step": 211970 }, { "epoch": 1.3542798001609957, "grad_norm": 0.793196976184845, "learning_rate": 2.3624860379879876e-05, "loss": 0.6684, "step": 211980 }, { "epoch": 1.3543436873107344, "grad_norm": 0.7942538857460022, "learning_rate": 2.362059772185759e-05, "loss": 1.0311, "step": 211990 }, { "epoch": 1.354407574460473, "grad_norm": 1.052597999572754, "learning_rate": 2.361633532949872e-05, "loss": 1.015, "step": 212000 }, { "epoch": 1.3544714616102116, "grad_norm": 1.1230298280715942, "learning_rate": 2.3612073202846163e-05, "loss": 1.0466, "step": 212010 }, { "epoch": 1.3545353487599505, "grad_norm": 0.7834064364433289, "learning_rate": 2.3607811341942875e-05, "loss": 0.7204, "step": 212020 }, { "epoch": 1.354599235909689, "grad_norm": 0.6216392517089844, "learning_rate": 2.3603549746831737e-05, "loss": 0.9645, "step": 212030 }, { "epoch": 1.354663123059428, "grad_norm": 1.3307925462722778, "learning_rate": 2.359928841755571e-05, "loss": 1.0167, "step": 212040 }, { "epoch": 1.3547270102091664, "grad_norm": 1.176498293876648, "learning_rate": 2.3595027354157673e-05, "loss": 0.6712, "step": 212050 }, { "epoch": 1.3547908973589053, "grad_norm": 1.0552092790603638, "learning_rate": 2.359076655668057e-05, "loss": 0.9531, "step": 212060 }, { "epoch": 1.3548547845086438, "grad_norm": 0.6958808302879333, "learning_rate": 2.3586506025167275e-05, "loss": 0.6582, "step": 212070 }, { "epoch": 1.3549186716583828, "grad_norm": 0.9148417115211487, "learning_rate": 2.3582245759660735e-05, "loss": 1.134, "step": 212080 }, { "epoch": 1.3549825588081212, "grad_norm": 0.9399345517158508, "learning_rate": 2.357798576020382e-05, "loss": 0.8481, "step": 212090 }, { "epoch": 1.3550464459578602, "grad_norm": 1.0423952341079712, "learning_rate": 2.357372602683946e-05, "loss": 0.8451, "step": 212100 }, { "epoch": 1.3551103331075987, "grad_norm": 1.2409332990646362, "learning_rate": 2.3569466559610527e-05, "loss": 0.8279, "step": 212110 }, { "epoch": 1.3551742202573374, "grad_norm": 0.8522923588752747, "learning_rate": 2.3565207358559948e-05, "loss": 0.8555, "step": 212120 }, { "epoch": 1.355238107407076, "grad_norm": 1.4603486061096191, "learning_rate": 2.3560948423730578e-05, "loss": 0.8882, "step": 212130 }, { "epoch": 1.3553019945568148, "grad_norm": 1.0698403120040894, "learning_rate": 2.3556689755165357e-05, "loss": 0.9946, "step": 212140 }, { "epoch": 1.3553658817065535, "grad_norm": 0.9794582724571228, "learning_rate": 2.3552431352907123e-05, "loss": 0.7777, "step": 212150 }, { "epoch": 1.3554297688562922, "grad_norm": 1.0823343992233276, "learning_rate": 2.3548173216998805e-05, "loss": 1.0112, "step": 212160 }, { "epoch": 1.355493656006031, "grad_norm": 0.6649702787399292, "learning_rate": 2.3543915347483247e-05, "loss": 0.7617, "step": 212170 }, { "epoch": 1.3555575431557696, "grad_norm": 1.082112431526184, "learning_rate": 2.3539657744403366e-05, "loss": 0.7136, "step": 212180 }, { "epoch": 1.3556214303055083, "grad_norm": 0.6287432312965393, "learning_rate": 2.3535400407802012e-05, "loss": 0.5495, "step": 212190 }, { "epoch": 1.355685317455247, "grad_norm": 0.8478794097900391, "learning_rate": 2.353114333772206e-05, "loss": 0.8829, "step": 212200 }, { "epoch": 1.3557492046049857, "grad_norm": 0.7227674126625061, "learning_rate": 2.3526886534206422e-05, "loss": 0.9989, "step": 212210 }, { "epoch": 1.3558130917547244, "grad_norm": 1.2565025091171265, "learning_rate": 2.3522629997297914e-05, "loss": 0.9998, "step": 212220 }, { "epoch": 1.3558769789044631, "grad_norm": 1.0990771055221558, "learning_rate": 2.351837372703945e-05, "loss": 0.8897, "step": 212230 }, { "epoch": 1.3559408660542018, "grad_norm": 1.4418489933013916, "learning_rate": 2.351411772347387e-05, "loss": 1.0007, "step": 212240 }, { "epoch": 1.3560047532039405, "grad_norm": 1.40555739402771, "learning_rate": 2.3509861986644045e-05, "loss": 0.8061, "step": 212250 }, { "epoch": 1.3560686403536792, "grad_norm": 0.9722267985343933, "learning_rate": 2.3505606516592798e-05, "loss": 0.8834, "step": 212260 }, { "epoch": 1.356132527503418, "grad_norm": 0.7908661961555481, "learning_rate": 2.350135131336304e-05, "loss": 0.8076, "step": 212270 }, { "epoch": 1.3561964146531567, "grad_norm": 1.0190848112106323, "learning_rate": 2.3497096376997578e-05, "loss": 0.8599, "step": 212280 }, { "epoch": 1.3562603018028954, "grad_norm": 1.968063473701477, "learning_rate": 2.3492841707539305e-05, "loss": 0.9196, "step": 212290 }, { "epoch": 1.356324188952634, "grad_norm": 1.0753601789474487, "learning_rate": 2.3488587305031023e-05, "loss": 0.8658, "step": 212300 }, { "epoch": 1.3563880761023728, "grad_norm": 1.183738112449646, "learning_rate": 2.348433316951561e-05, "loss": 0.9116, "step": 212310 }, { "epoch": 1.3564519632521115, "grad_norm": 0.7145233154296875, "learning_rate": 2.3480079301035918e-05, "loss": 0.8693, "step": 212320 }, { "epoch": 1.3565158504018502, "grad_norm": 0.6723766326904297, "learning_rate": 2.3475825699634745e-05, "loss": 1.0396, "step": 212330 }, { "epoch": 1.356579737551589, "grad_norm": 0.8253989219665527, "learning_rate": 2.347157236535498e-05, "loss": 0.9733, "step": 212340 }, { "epoch": 1.3566436247013276, "grad_norm": 0.9459612369537354, "learning_rate": 2.346731929823941e-05, "loss": 0.796, "step": 212350 }, { "epoch": 1.3567075118510663, "grad_norm": 0.9628978371620178, "learning_rate": 2.346306649833091e-05, "loss": 0.7765, "step": 212360 }, { "epoch": 1.356771399000805, "grad_norm": 0.74045330286026, "learning_rate": 2.3458813965672267e-05, "loss": 0.7261, "step": 212370 }, { "epoch": 1.3568352861505437, "grad_norm": 0.8961535692214966, "learning_rate": 2.345456170030635e-05, "loss": 1.1465, "step": 212380 }, { "epoch": 1.3568991733002824, "grad_norm": 1.105933427810669, "learning_rate": 2.345030970227594e-05, "loss": 0.8767, "step": 212390 }, { "epoch": 1.3569630604500211, "grad_norm": 0.8771437406539917, "learning_rate": 2.3446057971623902e-05, "loss": 0.7039, "step": 212400 }, { "epoch": 1.3570269475997598, "grad_norm": 1.0957084894180298, "learning_rate": 2.344180650839301e-05, "loss": 0.8449, "step": 212410 }, { "epoch": 1.3570908347494985, "grad_norm": 0.6853296756744385, "learning_rate": 2.3437555312626126e-05, "loss": 0.8821, "step": 212420 }, { "epoch": 1.3571547218992372, "grad_norm": 1.5772802829742432, "learning_rate": 2.3433304384366017e-05, "loss": 0.9634, "step": 212430 }, { "epoch": 1.357218609048976, "grad_norm": 0.9938908815383911, "learning_rate": 2.3429053723655535e-05, "loss": 0.7178, "step": 212440 }, { "epoch": 1.3572824961987147, "grad_norm": 0.8694483637809753, "learning_rate": 2.3424803330537455e-05, "loss": 0.9839, "step": 212450 }, { "epoch": 1.3573463833484534, "grad_norm": 0.9842444062232971, "learning_rate": 2.3420553205054607e-05, "loss": 0.7376, "step": 212460 }, { "epoch": 1.357410270498192, "grad_norm": 0.7344105243682861, "learning_rate": 2.341630334724977e-05, "loss": 0.9092, "step": 212470 }, { "epoch": 1.3574741576479306, "grad_norm": 1.4985830783843994, "learning_rate": 2.3412053757165774e-05, "loss": 0.7086, "step": 212480 }, { "epoch": 1.3575380447976695, "grad_norm": 0.49012845754623413, "learning_rate": 2.340780443484538e-05, "loss": 1.0866, "step": 212490 }, { "epoch": 1.357601931947408, "grad_norm": 0.8759766221046448, "learning_rate": 2.3403555380331415e-05, "loss": 1.0279, "step": 212500 }, { "epoch": 1.357665819097147, "grad_norm": 0.5767194032669067, "learning_rate": 2.3399306593666647e-05, "loss": 0.678, "step": 212510 }, { "epoch": 1.3577297062468854, "grad_norm": 0.6936454176902771, "learning_rate": 2.3395058074893868e-05, "loss": 1.0519, "step": 212520 }, { "epoch": 1.3577935933966243, "grad_norm": 0.7158231139183044, "learning_rate": 2.3390809824055892e-05, "loss": 1.1214, "step": 212530 }, { "epoch": 1.3578574805463628, "grad_norm": 1.0264630317687988, "learning_rate": 2.3386561841195458e-05, "loss": 1.0875, "step": 212540 }, { "epoch": 1.3579213676961017, "grad_norm": 1.9632465839385986, "learning_rate": 2.338231412635539e-05, "loss": 0.889, "step": 212550 }, { "epoch": 1.3579852548458402, "grad_norm": 1.0401647090911865, "learning_rate": 2.3378066679578436e-05, "loss": 1.2824, "step": 212560 }, { "epoch": 1.3580491419955791, "grad_norm": 0.8090847730636597, "learning_rate": 2.3373819500907396e-05, "loss": 0.8421, "step": 212570 }, { "epoch": 1.3581130291453176, "grad_norm": 1.2447673082351685, "learning_rate": 2.3369572590385004e-05, "loss": 1.0739, "step": 212580 }, { "epoch": 1.3581769162950565, "grad_norm": 1.3129416704177856, "learning_rate": 2.3365325948054077e-05, "loss": 1.0271, "step": 212590 }, { "epoch": 1.358240803444795, "grad_norm": 0.7778066396713257, "learning_rate": 2.3361079573957346e-05, "loss": 1.0992, "step": 212600 }, { "epoch": 1.3583046905945337, "grad_norm": 0.9241915941238403, "learning_rate": 2.33568334681376e-05, "loss": 0.8088, "step": 212610 }, { "epoch": 1.3583685777442724, "grad_norm": 0.736827552318573, "learning_rate": 2.335258763063758e-05, "loss": 0.821, "step": 212620 }, { "epoch": 1.3584324648940111, "grad_norm": 1.1152037382125854, "learning_rate": 2.3348342061500067e-05, "loss": 0.8219, "step": 212630 }, { "epoch": 1.3584963520437499, "grad_norm": 0.8919548392295837, "learning_rate": 2.3344096760767793e-05, "loss": 1.0119, "step": 212640 }, { "epoch": 1.3585602391934886, "grad_norm": 1.2961301803588867, "learning_rate": 2.333985172848354e-05, "loss": 0.7603, "step": 212650 }, { "epoch": 1.3586241263432273, "grad_norm": 1.0925780534744263, "learning_rate": 2.3335606964690032e-05, "loss": 0.9653, "step": 212660 }, { "epoch": 1.358688013492966, "grad_norm": 1.0221478939056396, "learning_rate": 2.3331362469430045e-05, "loss": 0.8009, "step": 212670 }, { "epoch": 1.3587519006427047, "grad_norm": 0.905896008014679, "learning_rate": 2.3327118242746288e-05, "loss": 0.7346, "step": 212680 }, { "epoch": 1.3588157877924434, "grad_norm": 0.7704225778579712, "learning_rate": 2.3322874284681552e-05, "loss": 0.9561, "step": 212690 }, { "epoch": 1.358879674942182, "grad_norm": 1.1265662908554077, "learning_rate": 2.3318630595278522e-05, "loss": 0.9499, "step": 212700 }, { "epoch": 1.3589435620919208, "grad_norm": 0.8070195317268372, "learning_rate": 2.331438717457997e-05, "loss": 0.8712, "step": 212710 }, { "epoch": 1.3590074492416595, "grad_norm": 0.9437048435211182, "learning_rate": 2.3310144022628644e-05, "loss": 0.8336, "step": 212720 }, { "epoch": 1.3590713363913982, "grad_norm": 0.9660305976867676, "learning_rate": 2.3305901139467257e-05, "loss": 0.7254, "step": 212730 }, { "epoch": 1.359135223541137, "grad_norm": 0.7670518159866333, "learning_rate": 2.3301658525138543e-05, "loss": 0.8708, "step": 212740 }, { "epoch": 1.3591991106908756, "grad_norm": 0.7318940758705139, "learning_rate": 2.32974161796852e-05, "loss": 0.8674, "step": 212750 }, { "epoch": 1.3592629978406143, "grad_norm": 0.7752392888069153, "learning_rate": 2.329317410315e-05, "loss": 0.9143, "step": 212760 }, { "epoch": 1.359326884990353, "grad_norm": 1.292202353477478, "learning_rate": 2.328893229557562e-05, "loss": 0.7913, "step": 212770 }, { "epoch": 1.3593907721400917, "grad_norm": 0.8490725159645081, "learning_rate": 2.3284690757004824e-05, "loss": 1.0284, "step": 212780 }, { "epoch": 1.3594546592898304, "grad_norm": 0.8979281783103943, "learning_rate": 2.3280449487480284e-05, "loss": 1.2277, "step": 212790 }, { "epoch": 1.3595185464395692, "grad_norm": 0.689542293548584, "learning_rate": 2.327620848704475e-05, "loss": 0.9464, "step": 212800 }, { "epoch": 1.3595824335893079, "grad_norm": 0.6301685571670532, "learning_rate": 2.327196775574089e-05, "loss": 1.0883, "step": 212810 }, { "epoch": 1.3596463207390466, "grad_norm": 0.6643343567848206, "learning_rate": 2.326772729361147e-05, "loss": 0.7041, "step": 212820 }, { "epoch": 1.3597102078887853, "grad_norm": 1.3023102283477783, "learning_rate": 2.3263487100699132e-05, "loss": 1.058, "step": 212830 }, { "epoch": 1.359774095038524, "grad_norm": 0.8597766757011414, "learning_rate": 2.3259247177046618e-05, "loss": 0.8037, "step": 212840 }, { "epoch": 1.3598379821882627, "grad_norm": 0.8535871505737305, "learning_rate": 2.3255007522696638e-05, "loss": 0.8148, "step": 212850 }, { "epoch": 1.3599018693380014, "grad_norm": 1.6547738313674927, "learning_rate": 2.3250768137691843e-05, "loss": 0.8184, "step": 212860 }, { "epoch": 1.35996575648774, "grad_norm": 0.9821819067001343, "learning_rate": 2.324652902207498e-05, "loss": 0.8821, "step": 212870 }, { "epoch": 1.3600296436374788, "grad_norm": 0.9379867315292358, "learning_rate": 2.324229017588869e-05, "loss": 1.3132, "step": 212880 }, { "epoch": 1.3600935307872175, "grad_norm": 1.1494848728179932, "learning_rate": 2.3238051599175714e-05, "loss": 1.2216, "step": 212890 }, { "epoch": 1.3601574179369562, "grad_norm": 1.0652223825454712, "learning_rate": 2.3233813291978684e-05, "loss": 1.2067, "step": 212900 }, { "epoch": 1.360221305086695, "grad_norm": 0.7528551816940308, "learning_rate": 2.3229575254340335e-05, "loss": 0.7722, "step": 212910 }, { "epoch": 1.3602851922364336, "grad_norm": 1.1288870573043823, "learning_rate": 2.32253374863033e-05, "loss": 0.669, "step": 212920 }, { "epoch": 1.3603490793861723, "grad_norm": 1.154624581336975, "learning_rate": 2.32210999879103e-05, "loss": 0.9427, "step": 212930 }, { "epoch": 1.360412966535911, "grad_norm": 1.0726332664489746, "learning_rate": 2.3216862759203973e-05, "loss": 0.9466, "step": 212940 }, { "epoch": 1.3604768536856497, "grad_norm": 1.285915732383728, "learning_rate": 2.321262580022703e-05, "loss": 1.0116, "step": 212950 }, { "epoch": 1.3605407408353885, "grad_norm": 0.9262716174125671, "learning_rate": 2.3208389111022095e-05, "loss": 0.6734, "step": 212960 }, { "epoch": 1.360604627985127, "grad_norm": 0.6906473636627197, "learning_rate": 2.3204152691631874e-05, "loss": 0.8864, "step": 212970 }, { "epoch": 1.3606685151348659, "grad_norm": 1.7085462808609009, "learning_rate": 2.319991654209901e-05, "loss": 0.9229, "step": 212980 }, { "epoch": 1.3607324022846043, "grad_norm": 0.7899442315101624, "learning_rate": 2.3195680662466183e-05, "loss": 0.6897, "step": 212990 }, { "epoch": 1.3607962894343433, "grad_norm": 0.758979082107544, "learning_rate": 2.3191445052776024e-05, "loss": 0.9965, "step": 213000 }, { "epoch": 1.3608601765840818, "grad_norm": 1.0996798276901245, "learning_rate": 2.3187209713071222e-05, "loss": 1.0065, "step": 213010 }, { "epoch": 1.3609240637338207, "grad_norm": 0.710588812828064, "learning_rate": 2.31829746433944e-05, "loss": 0.7498, "step": 213020 }, { "epoch": 1.3609879508835592, "grad_norm": 0.9244322180747986, "learning_rate": 2.3178739843788244e-05, "loss": 1.0425, "step": 213030 }, { "epoch": 1.361051838033298, "grad_norm": 1.1750261783599854, "learning_rate": 2.317450531429536e-05, "loss": 0.7303, "step": 213040 }, { "epoch": 1.3611157251830366, "grad_norm": 0.7922186255455017, "learning_rate": 2.3170271054958416e-05, "loss": 1.1356, "step": 213050 }, { "epoch": 1.3611796123327755, "grad_norm": 0.9548922777175903, "learning_rate": 2.3166037065820067e-05, "loss": 0.8428, "step": 213060 }, { "epoch": 1.361243499482514, "grad_norm": 0.9790717959403992, "learning_rate": 2.3161803346922927e-05, "loss": 0.7591, "step": 213070 }, { "epoch": 1.361307386632253, "grad_norm": 0.8037964701652527, "learning_rate": 2.315756989830966e-05, "loss": 0.6687, "step": 213080 }, { "epoch": 1.3613712737819914, "grad_norm": 1.0827137231826782, "learning_rate": 2.3153336720022867e-05, "loss": 0.9129, "step": 213090 }, { "epoch": 1.3614351609317301, "grad_norm": 0.7812821865081787, "learning_rate": 2.314910381210522e-05, "loss": 0.8242, "step": 213100 }, { "epoch": 1.3614990480814688, "grad_norm": 0.9069188833236694, "learning_rate": 2.3144871174599308e-05, "loss": 0.7806, "step": 213110 }, { "epoch": 1.3615629352312075, "grad_norm": 1.1667894124984741, "learning_rate": 2.31406388075478e-05, "loss": 1.0365, "step": 213120 }, { "epoch": 1.3616268223809462, "grad_norm": 0.7389217019081116, "learning_rate": 2.313640671099327e-05, "loss": 1.1612, "step": 213130 }, { "epoch": 1.361690709530685, "grad_norm": 0.9983386993408203, "learning_rate": 2.3132174884978388e-05, "loss": 1.0671, "step": 213140 }, { "epoch": 1.3617545966804236, "grad_norm": 1.3475415706634521, "learning_rate": 2.3127943329545727e-05, "loss": 0.9481, "step": 213150 }, { "epoch": 1.3618184838301624, "grad_norm": 0.8335529565811157, "learning_rate": 2.3123712044737946e-05, "loss": 0.688, "step": 213160 }, { "epoch": 1.361882370979901, "grad_norm": 2.321277379989624, "learning_rate": 2.311948103059761e-05, "loss": 0.8938, "step": 213170 }, { "epoch": 1.3619462581296398, "grad_norm": 0.7873269319534302, "learning_rate": 2.311525028716738e-05, "loss": 0.8673, "step": 213180 }, { "epoch": 1.3620101452793785, "grad_norm": 0.9356293082237244, "learning_rate": 2.311101981448982e-05, "loss": 0.7351, "step": 213190 }, { "epoch": 1.3620740324291172, "grad_norm": 0.7549158334732056, "learning_rate": 2.3106789612607567e-05, "loss": 0.7732, "step": 213200 }, { "epoch": 1.3621379195788559, "grad_norm": 1.441564679145813, "learning_rate": 2.3102559681563214e-05, "loss": 0.9888, "step": 213210 }, { "epoch": 1.3622018067285946, "grad_norm": 1.071210503578186, "learning_rate": 2.309833002139933e-05, "loss": 0.6508, "step": 213220 }, { "epoch": 1.3622656938783333, "grad_norm": 0.7152714729309082, "learning_rate": 2.309410063215856e-05, "loss": 1.1104, "step": 213230 }, { "epoch": 1.362329581028072, "grad_norm": 0.8112555742263794, "learning_rate": 2.308987151388345e-05, "loss": 0.8675, "step": 213240 }, { "epoch": 1.3623934681778107, "grad_norm": 0.9434011578559875, "learning_rate": 2.3085642666616637e-05, "loss": 0.9279, "step": 213250 }, { "epoch": 1.3624573553275494, "grad_norm": 1.1366301774978638, "learning_rate": 2.3081414090400666e-05, "loss": 0.7816, "step": 213260 }, { "epoch": 1.3625212424772881, "grad_norm": 1.0150545835494995, "learning_rate": 2.3077185785278166e-05, "loss": 0.9145, "step": 213270 }, { "epoch": 1.3625851296270268, "grad_norm": 0.6110414266586304, "learning_rate": 2.3072957751291675e-05, "loss": 0.7733, "step": 213280 }, { "epoch": 1.3626490167767655, "grad_norm": 0.844383955001831, "learning_rate": 2.306872998848381e-05, "loss": 0.8608, "step": 213290 }, { "epoch": 1.3627129039265042, "grad_norm": 0.8418871760368347, "learning_rate": 2.3064502496897118e-05, "loss": 0.9518, "step": 213300 }, { "epoch": 1.362776791076243, "grad_norm": 0.8400182723999023, "learning_rate": 2.3060275276574206e-05, "loss": 0.8602, "step": 213310 }, { "epoch": 1.3628406782259816, "grad_norm": 1.9541168212890625, "learning_rate": 2.3056048327557604e-05, "loss": 0.8702, "step": 213320 }, { "epoch": 1.3629045653757204, "grad_norm": 1.0552854537963867, "learning_rate": 2.305182164988993e-05, "loss": 0.9381, "step": 213330 }, { "epoch": 1.362968452525459, "grad_norm": 0.7301352024078369, "learning_rate": 2.3047595243613705e-05, "loss": 0.7445, "step": 213340 }, { "epoch": 1.3630323396751978, "grad_norm": 0.8270401954650879, "learning_rate": 2.3043369108771535e-05, "loss": 0.6635, "step": 213350 }, { "epoch": 1.3630962268249365, "grad_norm": 1.0124050378799438, "learning_rate": 2.303914324540594e-05, "loss": 0.7198, "step": 213360 }, { "epoch": 1.3631601139746752, "grad_norm": 1.3121830224990845, "learning_rate": 2.3034917653559497e-05, "loss": 0.8133, "step": 213370 }, { "epoch": 1.3632240011244139, "grad_norm": 0.9895945191383362, "learning_rate": 2.3030692333274777e-05, "loss": 0.7254, "step": 213380 }, { "epoch": 1.3632878882741526, "grad_norm": 0.8924286365509033, "learning_rate": 2.30264672845943e-05, "loss": 0.7883, "step": 213390 }, { "epoch": 1.3633517754238913, "grad_norm": 0.6295365691184998, "learning_rate": 2.3022242507560647e-05, "loss": 0.8513, "step": 213400 }, { "epoch": 1.36341566257363, "grad_norm": 0.8795775771141052, "learning_rate": 2.301801800221634e-05, "loss": 0.9533, "step": 213410 }, { "epoch": 1.3634795497233687, "grad_norm": 0.703488290309906, "learning_rate": 2.3013793768603948e-05, "loss": 0.8536, "step": 213420 }, { "epoch": 1.3635434368731074, "grad_norm": 1.8506830930709839, "learning_rate": 2.300956980676598e-05, "loss": 0.9916, "step": 213430 }, { "epoch": 1.3636073240228461, "grad_norm": 0.8599974513053894, "learning_rate": 2.3005346116745014e-05, "loss": 0.81, "step": 213440 }, { "epoch": 1.3636712111725848, "grad_norm": 1.0625780820846558, "learning_rate": 2.3001122698583548e-05, "loss": 0.6802, "step": 213450 }, { "epoch": 1.3637350983223233, "grad_norm": 0.860063910484314, "learning_rate": 2.299689955232415e-05, "loss": 0.7409, "step": 213460 }, { "epoch": 1.3637989854720622, "grad_norm": 1.2501275539398193, "learning_rate": 2.2992676678009324e-05, "loss": 0.9112, "step": 213470 }, { "epoch": 1.3638628726218007, "grad_norm": 1.930945634841919, "learning_rate": 2.298845407568162e-05, "loss": 0.7647, "step": 213480 }, { "epoch": 1.3639267597715397, "grad_norm": 1.4821895360946655, "learning_rate": 2.2984231745383533e-05, "loss": 1.1456, "step": 213490 }, { "epoch": 1.3639906469212781, "grad_norm": 2.2362570762634277, "learning_rate": 2.2980009687157628e-05, "loss": 1.0458, "step": 213500 }, { "epoch": 1.364054534071017, "grad_norm": 1.25023353099823, "learning_rate": 2.297578790104638e-05, "loss": 1.0971, "step": 213510 }, { "epoch": 1.3641184212207556, "grad_norm": 0.988797664642334, "learning_rate": 2.297156638709234e-05, "loss": 0.8612, "step": 213520 }, { "epoch": 1.3641823083704945, "grad_norm": 0.8659194707870483, "learning_rate": 2.2967345145338e-05, "loss": 0.9736, "step": 213530 }, { "epoch": 1.364246195520233, "grad_norm": 0.8853728175163269, "learning_rate": 2.2963124175825896e-05, "loss": 0.9972, "step": 213540 }, { "epoch": 1.364310082669972, "grad_norm": 1.851197361946106, "learning_rate": 2.2958903478598504e-05, "loss": 0.9088, "step": 213550 }, { "epoch": 1.3643739698197104, "grad_norm": 0.7157253623008728, "learning_rate": 2.2954683053698344e-05, "loss": 0.7841, "step": 213560 }, { "epoch": 1.3644378569694493, "grad_norm": 1.3922441005706787, "learning_rate": 2.295046290116794e-05, "loss": 0.8863, "step": 213570 }, { "epoch": 1.3645017441191878, "grad_norm": 0.6246136426925659, "learning_rate": 2.2946243021049763e-05, "loss": 0.8969, "step": 213580 }, { "epoch": 1.3645656312689265, "grad_norm": 1.028562307357788, "learning_rate": 2.2942023413386344e-05, "loss": 0.7831, "step": 213590 }, { "epoch": 1.3646295184186652, "grad_norm": 0.7829952836036682, "learning_rate": 2.2937804078220132e-05, "loss": 0.8312, "step": 213600 }, { "epoch": 1.364693405568404, "grad_norm": 0.6332594752311707, "learning_rate": 2.2933585015593666e-05, "loss": 1.2098, "step": 213610 }, { "epoch": 1.3647572927181426, "grad_norm": 0.7257765531539917, "learning_rate": 2.2929366225549398e-05, "loss": 0.6906, "step": 213620 }, { "epoch": 1.3648211798678813, "grad_norm": 0.8252881169319153, "learning_rate": 2.292514770812985e-05, "loss": 0.729, "step": 213630 }, { "epoch": 1.36488506701762, "grad_norm": 1.1068850755691528, "learning_rate": 2.2920929463377474e-05, "loss": 0.8345, "step": 213640 }, { "epoch": 1.3649489541673587, "grad_norm": 1.2068063020706177, "learning_rate": 2.2916711491334773e-05, "loss": 0.9002, "step": 213650 }, { "epoch": 1.3650128413170974, "grad_norm": 1.1177639961242676, "learning_rate": 2.2912493792044204e-05, "loss": 0.7525, "step": 213660 }, { "epoch": 1.3650767284668361, "grad_norm": 1.1064202785491943, "learning_rate": 2.2908276365548276e-05, "loss": 0.847, "step": 213670 }, { "epoch": 1.3651406156165748, "grad_norm": 1.1914925575256348, "learning_rate": 2.2904059211889423e-05, "loss": 0.9992, "step": 213680 }, { "epoch": 1.3652045027663136, "grad_norm": 1.1300867795944214, "learning_rate": 2.2899842331110155e-05, "loss": 1.0911, "step": 213690 }, { "epoch": 1.3652683899160523, "grad_norm": 1.6770097017288208, "learning_rate": 2.2895625723252916e-05, "loss": 1.1771, "step": 213700 }, { "epoch": 1.365332277065791, "grad_norm": 0.8899660110473633, "learning_rate": 2.2891409388360165e-05, "loss": 0.7521, "step": 213710 }, { "epoch": 1.3653961642155297, "grad_norm": 0.9477804899215698, "learning_rate": 2.2887193326474383e-05, "loss": 0.7287, "step": 213720 }, { "epoch": 1.3654600513652684, "grad_norm": 2.1322531700134277, "learning_rate": 2.2882977537638007e-05, "loss": 0.9309, "step": 213730 }, { "epoch": 1.365523938515007, "grad_norm": 1.0908210277557373, "learning_rate": 2.287876202189352e-05, "loss": 0.8519, "step": 213740 }, { "epoch": 1.3655878256647458, "grad_norm": 0.8647819757461548, "learning_rate": 2.2874546779283344e-05, "loss": 0.6995, "step": 213750 }, { "epoch": 1.3656517128144845, "grad_norm": 1.0555542707443237, "learning_rate": 2.2870331809849978e-05, "loss": 0.8651, "step": 213760 }, { "epoch": 1.3657155999642232, "grad_norm": 0.5738083720207214, "learning_rate": 2.286611711363581e-05, "loss": 0.8654, "step": 213770 }, { "epoch": 1.365779487113962, "grad_norm": 0.8562591671943665, "learning_rate": 2.286190269068334e-05, "loss": 0.862, "step": 213780 }, { "epoch": 1.3658433742637006, "grad_norm": 1.0389251708984375, "learning_rate": 2.285768854103497e-05, "loss": 0.9576, "step": 213790 }, { "epoch": 1.3659072614134393, "grad_norm": 1.2807830572128296, "learning_rate": 2.2853474664733178e-05, "loss": 1.1481, "step": 213800 }, { "epoch": 1.365971148563178, "grad_norm": 1.054508090019226, "learning_rate": 2.284926106182036e-05, "loss": 1.0283, "step": 213810 }, { "epoch": 1.3660350357129167, "grad_norm": 0.9369847774505615, "learning_rate": 2.284504773233899e-05, "loss": 0.6915, "step": 213820 }, { "epoch": 1.3660989228626554, "grad_norm": 0.5408312082290649, "learning_rate": 2.2840834676331464e-05, "loss": 0.6874, "step": 213830 }, { "epoch": 1.3661628100123941, "grad_norm": 1.0407315492630005, "learning_rate": 2.2836621893840247e-05, "loss": 1.0371, "step": 213840 }, { "epoch": 1.3662266971621329, "grad_norm": 0.866386890411377, "learning_rate": 2.2832409384907726e-05, "loss": 0.8674, "step": 213850 }, { "epoch": 1.3662905843118716, "grad_norm": 1.1507446765899658, "learning_rate": 2.282819714957637e-05, "loss": 0.6694, "step": 213860 }, { "epoch": 1.3663544714616103, "grad_norm": 1.4874440431594849, "learning_rate": 2.282398518788856e-05, "loss": 0.7711, "step": 213870 }, { "epoch": 1.366418358611349, "grad_norm": 0.7873359322547913, "learning_rate": 2.281977349988672e-05, "loss": 0.9726, "step": 213880 }, { "epoch": 1.3664822457610877, "grad_norm": 0.7608168125152588, "learning_rate": 2.2815562085613294e-05, "loss": 0.8677, "step": 213890 }, { "epoch": 1.3665461329108264, "grad_norm": 1.3678696155548096, "learning_rate": 2.2811350945110665e-05, "loss": 1.1596, "step": 213900 }, { "epoch": 1.366610020060565, "grad_norm": 1.2935272455215454, "learning_rate": 2.2807140078421268e-05, "loss": 0.8756, "step": 213910 }, { "epoch": 1.3666739072103038, "grad_norm": 1.2438666820526123, "learning_rate": 2.2802929485587476e-05, "loss": 0.9217, "step": 213920 }, { "epoch": 1.3667377943600425, "grad_norm": 1.114298939704895, "learning_rate": 2.2798719166651734e-05, "loss": 1.0421, "step": 213930 }, { "epoch": 1.3668016815097812, "grad_norm": 0.7132619023323059, "learning_rate": 2.2794509121656406e-05, "loss": 0.7425, "step": 213940 }, { "epoch": 1.3668655686595197, "grad_norm": 0.914767861366272, "learning_rate": 2.2790299350643917e-05, "loss": 0.7722, "step": 213950 }, { "epoch": 1.3669294558092586, "grad_norm": 0.7639245390892029, "learning_rate": 2.2786089853656645e-05, "loss": 0.6271, "step": 213960 }, { "epoch": 1.366993342958997, "grad_norm": 0.6949672102928162, "learning_rate": 2.2781880630737e-05, "loss": 0.8217, "step": 213970 }, { "epoch": 1.367057230108736, "grad_norm": 0.8127015233039856, "learning_rate": 2.2777671681927355e-05, "loss": 1.0723, "step": 213980 }, { "epoch": 1.3671211172584745, "grad_norm": 1.067530632019043, "learning_rate": 2.2773463007270118e-05, "loss": 0.7072, "step": 213990 }, { "epoch": 1.3671850044082134, "grad_norm": 1.0031954050064087, "learning_rate": 2.276925460680765e-05, "loss": 0.8714, "step": 214000 }, { "epoch": 1.367248891557952, "grad_norm": 1.203041672706604, "learning_rate": 2.276504648058236e-05, "loss": 0.8492, "step": 214010 }, { "epoch": 1.3673127787076909, "grad_norm": 0.9548153877258301, "learning_rate": 2.27608386286366e-05, "loss": 1.481, "step": 214020 }, { "epoch": 1.3673766658574293, "grad_norm": 1.148245096206665, "learning_rate": 2.2756631051012772e-05, "loss": 1.1608, "step": 214030 }, { "epoch": 1.3674405530071683, "grad_norm": 0.7084174156188965, "learning_rate": 2.2752423747753227e-05, "loss": 0.8583, "step": 214040 }, { "epoch": 1.3675044401569068, "grad_norm": 1.07353937625885, "learning_rate": 2.2748216718900363e-05, "loss": 0.9307, "step": 214050 }, { "epoch": 1.3675683273066455, "grad_norm": 0.5779439806938171, "learning_rate": 2.274400996449651e-05, "loss": 0.8697, "step": 214060 }, { "epoch": 1.3676322144563842, "grad_norm": 0.9872804284095764, "learning_rate": 2.2739803484584077e-05, "loss": 0.8438, "step": 214070 }, { "epoch": 1.3676961016061229, "grad_norm": 0.7021150588989258, "learning_rate": 2.2735597279205388e-05, "loss": 0.8816, "step": 214080 }, { "epoch": 1.3677599887558616, "grad_norm": 1.5829733610153198, "learning_rate": 2.2731391348402824e-05, "loss": 0.9133, "step": 214090 }, { "epoch": 1.3678238759056003, "grad_norm": 1.0218287706375122, "learning_rate": 2.272718569221876e-05, "loss": 1.0297, "step": 214100 }, { "epoch": 1.367887763055339, "grad_norm": 1.0792396068572998, "learning_rate": 2.2722980310695508e-05, "loss": 0.9565, "step": 214110 }, { "epoch": 1.3679516502050777, "grad_norm": 0.630308210849762, "learning_rate": 2.2718775203875465e-05, "loss": 0.7774, "step": 214120 }, { "epoch": 1.3680155373548164, "grad_norm": 1.0366673469543457, "learning_rate": 2.271457037180093e-05, "loss": 0.9403, "step": 214130 }, { "epoch": 1.368079424504555, "grad_norm": 4.574321746826172, "learning_rate": 2.27103658145143e-05, "loss": 0.7539, "step": 214140 }, { "epoch": 1.3681433116542938, "grad_norm": 1.278727650642395, "learning_rate": 2.270616153205788e-05, "loss": 0.7088, "step": 214150 }, { "epoch": 1.3682071988040325, "grad_norm": 1.062374472618103, "learning_rate": 2.2701957524474042e-05, "loss": 0.7578, "step": 214160 }, { "epoch": 1.3682710859537712, "grad_norm": 1.1994844675064087, "learning_rate": 2.2697753791805087e-05, "loss": 0.9603, "step": 214170 }, { "epoch": 1.36833497310351, "grad_norm": 0.8804671168327332, "learning_rate": 2.2693550334093412e-05, "loss": 0.8427, "step": 214180 }, { "epoch": 1.3683988602532486, "grad_norm": 1.0678255558013916, "learning_rate": 2.268934715138127e-05, "loss": 0.7044, "step": 214190 }, { "epoch": 1.3684627474029873, "grad_norm": 0.7160975933074951, "learning_rate": 2.2685144243711048e-05, "loss": 0.7304, "step": 214200 }, { "epoch": 1.368526634552726, "grad_norm": 2.113140821456909, "learning_rate": 2.2680941611125032e-05, "loss": 0.9366, "step": 214210 }, { "epoch": 1.3685905217024648, "grad_norm": 0.808111310005188, "learning_rate": 2.2676739253665574e-05, "loss": 0.674, "step": 214220 }, { "epoch": 1.3686544088522035, "grad_norm": 0.9904662370681763, "learning_rate": 2.2672537171375007e-05, "loss": 1.0068, "step": 214230 }, { "epoch": 1.3687182960019422, "grad_norm": 1.1128408908843994, "learning_rate": 2.2668335364295613e-05, "loss": 0.7818, "step": 214240 }, { "epoch": 1.3687821831516809, "grad_norm": 0.9577661752700806, "learning_rate": 2.2664133832469746e-05, "loss": 1.0456, "step": 214250 }, { "epoch": 1.3688460703014196, "grad_norm": 0.9240872859954834, "learning_rate": 2.265993257593968e-05, "loss": 1.0572, "step": 214260 }, { "epoch": 1.3689099574511583, "grad_norm": 1.0088399648666382, "learning_rate": 2.2655731594747764e-05, "loss": 0.9687, "step": 214270 }, { "epoch": 1.368973844600897, "grad_norm": 0.9549551010131836, "learning_rate": 2.2651530888936274e-05, "loss": 1.1469, "step": 214280 }, { "epoch": 1.3690377317506357, "grad_norm": 0.9399086833000183, "learning_rate": 2.264733045854754e-05, "loss": 1.0533, "step": 214290 }, { "epoch": 1.3691016189003744, "grad_norm": 0.9131207466125488, "learning_rate": 2.2643130303623834e-05, "loss": 0.8803, "step": 214300 }, { "epoch": 1.3691655060501131, "grad_norm": 0.5964922308921814, "learning_rate": 2.2638930424207493e-05, "loss": 0.8778, "step": 214310 }, { "epoch": 1.3692293931998518, "grad_norm": 1.3003700971603394, "learning_rate": 2.263473082034077e-05, "loss": 0.6881, "step": 214320 }, { "epoch": 1.3692932803495905, "grad_norm": 1.2452034950256348, "learning_rate": 2.263053149206601e-05, "loss": 0.4928, "step": 214330 }, { "epoch": 1.3693571674993292, "grad_norm": 1.003343105316162, "learning_rate": 2.262633243942545e-05, "loss": 1.0223, "step": 214340 }, { "epoch": 1.369421054649068, "grad_norm": 1.2331476211547852, "learning_rate": 2.2622133662461424e-05, "loss": 0.7122, "step": 214350 }, { "epoch": 1.3694849417988066, "grad_norm": 1.6258493661880493, "learning_rate": 2.2617935161216182e-05, "loss": 0.8656, "step": 214360 }, { "epoch": 1.3695488289485453, "grad_norm": 1.027157187461853, "learning_rate": 2.2613736935732034e-05, "loss": 0.7493, "step": 214370 }, { "epoch": 1.369612716098284, "grad_norm": 0.9395402669906616, "learning_rate": 2.2609538986051232e-05, "loss": 1.0075, "step": 214380 }, { "epoch": 1.3696766032480228, "grad_norm": 1.0628516674041748, "learning_rate": 2.2605341312216088e-05, "loss": 0.8902, "step": 214390 }, { "epoch": 1.3697404903977615, "grad_norm": 1.0474220514297485, "learning_rate": 2.260114391426884e-05, "loss": 0.8962, "step": 214400 }, { "epoch": 1.3698043775475002, "grad_norm": 0.9830883741378784, "learning_rate": 2.2596946792251772e-05, "loss": 0.9756, "step": 214410 }, { "epoch": 1.3698682646972389, "grad_norm": 1.4582486152648926, "learning_rate": 2.2592749946207175e-05, "loss": 0.7602, "step": 214420 }, { "epoch": 1.3699321518469776, "grad_norm": 1.0433655977249146, "learning_rate": 2.2588553376177283e-05, "loss": 0.853, "step": 214430 }, { "epoch": 1.369996038996716, "grad_norm": 0.9852280616760254, "learning_rate": 2.2584357082204387e-05, "loss": 1.0361, "step": 214440 }, { "epoch": 1.370059926146455, "grad_norm": 0.9670477509498596, "learning_rate": 2.2580161064330717e-05, "loss": 0.8454, "step": 214450 }, { "epoch": 1.3701238132961935, "grad_norm": 0.7297714948654175, "learning_rate": 2.2575965322598562e-05, "loss": 0.7035, "step": 214460 }, { "epoch": 1.3701877004459324, "grad_norm": 1.3668649196624756, "learning_rate": 2.2571769857050147e-05, "loss": 0.8924, "step": 214470 }, { "epoch": 1.370251587595671, "grad_norm": 1.1395583152770996, "learning_rate": 2.2567574667727754e-05, "loss": 0.76, "step": 214480 }, { "epoch": 1.3703154747454098, "grad_norm": 0.6072282791137695, "learning_rate": 2.2563379754673604e-05, "loss": 0.9047, "step": 214490 }, { "epoch": 1.3703793618951483, "grad_norm": 0.9671122431755066, "learning_rate": 2.2559185117929966e-05, "loss": 0.6726, "step": 214500 }, { "epoch": 1.3704432490448872, "grad_norm": 1.3676090240478516, "learning_rate": 2.2554990757539057e-05, "loss": 0.9646, "step": 214510 }, { "epoch": 1.3705071361946257, "grad_norm": 0.5860486626625061, "learning_rate": 2.2550796673543158e-05, "loss": 0.8328, "step": 214520 }, { "epoch": 1.3705710233443646, "grad_norm": 0.5600946545600891, "learning_rate": 2.2546602865984463e-05, "loss": 0.8783, "step": 214530 }, { "epoch": 1.3706349104941031, "grad_norm": 0.7447695136070251, "learning_rate": 2.254240933490524e-05, "loss": 0.7817, "step": 214540 }, { "epoch": 1.3706987976438418, "grad_norm": 0.5831778049468994, "learning_rate": 2.2538216080347696e-05, "loss": 0.5659, "step": 214550 }, { "epoch": 1.3707626847935805, "grad_norm": 0.9292523264884949, "learning_rate": 2.253402310235409e-05, "loss": 0.8999, "step": 214560 }, { "epoch": 1.3708265719433192, "grad_norm": 1.09566068649292, "learning_rate": 2.2529830400966618e-05, "loss": 0.6296, "step": 214570 }, { "epoch": 1.370890459093058, "grad_norm": 0.8166540265083313, "learning_rate": 2.2525637976227536e-05, "loss": 0.9464, "step": 214580 }, { "epoch": 1.3709543462427967, "grad_norm": 0.8931240439414978, "learning_rate": 2.2521445828179028e-05, "loss": 0.9223, "step": 214590 }, { "epoch": 1.3710182333925354, "grad_norm": 0.9479050040245056, "learning_rate": 2.2517253956863356e-05, "loss": 0.7455, "step": 214600 }, { "epoch": 1.371082120542274, "grad_norm": 0.949516236782074, "learning_rate": 2.251306236232269e-05, "loss": 0.7871, "step": 214610 }, { "epoch": 1.3711460076920128, "grad_norm": 0.8050344586372375, "learning_rate": 2.2508871044599268e-05, "loss": 0.9032, "step": 214620 }, { "epoch": 1.3712098948417515, "grad_norm": 0.5675163865089417, "learning_rate": 2.2504680003735314e-05, "loss": 0.9212, "step": 214630 }, { "epoch": 1.3712737819914902, "grad_norm": 1.4616913795471191, "learning_rate": 2.2500489239772997e-05, "loss": 0.9347, "step": 214640 }, { "epoch": 1.371337669141229, "grad_norm": 1.066999077796936, "learning_rate": 2.249629875275457e-05, "loss": 0.8779, "step": 214650 }, { "epoch": 1.3714015562909676, "grad_norm": 1.2175489664077759, "learning_rate": 2.2492108542722185e-05, "loss": 1.121, "step": 214660 }, { "epoch": 1.3714654434407063, "grad_norm": 1.4719328880310059, "learning_rate": 2.2487918609718105e-05, "loss": 0.9578, "step": 214670 }, { "epoch": 1.371529330590445, "grad_norm": 1.204595923423767, "learning_rate": 2.2483728953784445e-05, "loss": 0.8123, "step": 214680 }, { "epoch": 1.3715932177401837, "grad_norm": 0.9094199538230896, "learning_rate": 2.247953957496346e-05, "loss": 0.7774, "step": 214690 }, { "epoch": 1.3716571048899224, "grad_norm": 0.7834303379058838, "learning_rate": 2.2475350473297303e-05, "loss": 0.8452, "step": 214700 }, { "epoch": 1.3717209920396611, "grad_norm": 0.8282777667045593, "learning_rate": 2.24711616488282e-05, "loss": 0.996, "step": 214710 }, { "epoch": 1.3717848791893998, "grad_norm": 2.026803731918335, "learning_rate": 2.2466973101598288e-05, "loss": 1.13, "step": 214720 }, { "epoch": 1.3718487663391385, "grad_norm": 0.8557829856872559, "learning_rate": 2.246278483164978e-05, "loss": 0.6976, "step": 214730 }, { "epoch": 1.3719126534888773, "grad_norm": 0.7997044920921326, "learning_rate": 2.2458596839024875e-05, "loss": 0.7832, "step": 214740 }, { "epoch": 1.371976540638616, "grad_norm": 0.9678969383239746, "learning_rate": 2.24544091237657e-05, "loss": 0.7593, "step": 214750 }, { "epoch": 1.3720404277883547, "grad_norm": 1.38689124584198, "learning_rate": 2.245022168591448e-05, "loss": 0.6785, "step": 214760 }, { "epoch": 1.3721043149380934, "grad_norm": 0.7496582865715027, "learning_rate": 2.2446034525513343e-05, "loss": 0.7313, "step": 214770 }, { "epoch": 1.372168202087832, "grad_norm": 1.183701992034912, "learning_rate": 2.244184764260449e-05, "loss": 1.1634, "step": 214780 }, { "epoch": 1.3722320892375708, "grad_norm": 1.503637671470642, "learning_rate": 2.2437661037230063e-05, "loss": 0.9871, "step": 214790 }, { "epoch": 1.3722959763873095, "grad_norm": 0.6041831970214844, "learning_rate": 2.2433474709432245e-05, "loss": 0.7255, "step": 214800 }, { "epoch": 1.3723598635370482, "grad_norm": 1.6717568635940552, "learning_rate": 2.2429288659253166e-05, "loss": 0.8908, "step": 214810 }, { "epoch": 1.372423750686787, "grad_norm": 1.4359157085418701, "learning_rate": 2.2425102886735023e-05, "loss": 1.1106, "step": 214820 }, { "epoch": 1.3724876378365256, "grad_norm": 1.5275332927703857, "learning_rate": 2.2420917391919933e-05, "loss": 1.1028, "step": 214830 }, { "epoch": 1.3725515249862643, "grad_norm": 1.115195870399475, "learning_rate": 2.241673217485008e-05, "loss": 1.1771, "step": 214840 }, { "epoch": 1.372615412136003, "grad_norm": 0.9098389148712158, "learning_rate": 2.241254723556758e-05, "loss": 0.8661, "step": 214850 }, { "epoch": 1.3726792992857417, "grad_norm": 0.8639233708381653, "learning_rate": 2.240836257411461e-05, "loss": 0.9158, "step": 214860 }, { "epoch": 1.3727431864354804, "grad_norm": 0.9827595949172974, "learning_rate": 2.2404178190533286e-05, "loss": 0.832, "step": 214870 }, { "epoch": 1.3728070735852191, "grad_norm": 1.564755916595459, "learning_rate": 2.239999408486578e-05, "loss": 1.0676, "step": 214880 }, { "epoch": 1.3728709607349578, "grad_norm": 1.3222931623458862, "learning_rate": 2.2395810257154186e-05, "loss": 0.8444, "step": 214890 }, { "epoch": 1.3729348478846966, "grad_norm": 1.785544991493225, "learning_rate": 2.239162670744069e-05, "loss": 0.7951, "step": 214900 }, { "epoch": 1.372998735034435, "grad_norm": 0.7356242537498474, "learning_rate": 2.2387443435767375e-05, "loss": 0.6265, "step": 214910 }, { "epoch": 1.373062622184174, "grad_norm": 1.0977994203567505, "learning_rate": 2.2383260442176413e-05, "loss": 0.9025, "step": 214920 }, { "epoch": 1.3731265093339124, "grad_norm": 0.8271603584289551, "learning_rate": 2.2379077726709892e-05, "loss": 1.1127, "step": 214930 }, { "epoch": 1.3731903964836514, "grad_norm": 1.3959122896194458, "learning_rate": 2.2374895289409953e-05, "loss": 0.9131, "step": 214940 }, { "epoch": 1.3732542836333899, "grad_norm": 0.8944525718688965, "learning_rate": 2.237071313031873e-05, "loss": 0.8691, "step": 214950 }, { "epoch": 1.3733181707831288, "grad_norm": 1.157148003578186, "learning_rate": 2.2366531249478316e-05, "loss": 0.861, "step": 214960 }, { "epoch": 1.3733820579328673, "grad_norm": 1.1422433853149414, "learning_rate": 2.2362349646930858e-05, "loss": 0.9311, "step": 214970 }, { "epoch": 1.3734459450826062, "grad_norm": 0.9563591480255127, "learning_rate": 2.2358168322718433e-05, "loss": 0.5491, "step": 214980 }, { "epoch": 1.3735098322323447, "grad_norm": 1.180920958518982, "learning_rate": 2.2353987276883182e-05, "loss": 0.8662, "step": 214990 }, { "epoch": 1.3735737193820836, "grad_norm": 0.8338070511817932, "learning_rate": 2.2349806509467174e-05, "loss": 0.8664, "step": 215000 }, { "epoch": 1.373637606531822, "grad_norm": 0.6759228706359863, "learning_rate": 2.2345626020512556e-05, "loss": 0.7186, "step": 215010 }, { "epoch": 1.373701493681561, "grad_norm": 0.5735316872596741, "learning_rate": 2.2341445810061395e-05, "loss": 0.702, "step": 215020 }, { "epoch": 1.3737653808312995, "grad_norm": 0.9189698696136475, "learning_rate": 2.2337265878155816e-05, "loss": 0.9387, "step": 215030 }, { "epoch": 1.3738292679810382, "grad_norm": 0.998117208480835, "learning_rate": 2.2333086224837886e-05, "loss": 1.0075, "step": 215040 }, { "epoch": 1.373893155130777, "grad_norm": 0.8722880482673645, "learning_rate": 2.232890685014973e-05, "loss": 0.7019, "step": 215050 }, { "epoch": 1.3739570422805156, "grad_norm": 0.9833821058273315, "learning_rate": 2.23247277541334e-05, "loss": 1.0301, "step": 215060 }, { "epoch": 1.3740209294302543, "grad_norm": 0.890064001083374, "learning_rate": 2.2320548936831025e-05, "loss": 0.7118, "step": 215070 }, { "epoch": 1.374084816579993, "grad_norm": 0.7501860857009888, "learning_rate": 2.2316370398284646e-05, "loss": 0.8444, "step": 215080 }, { "epoch": 1.3741487037297317, "grad_norm": 1.1047126054763794, "learning_rate": 2.2312192138536393e-05, "loss": 0.8822, "step": 215090 }, { "epoch": 1.3742125908794705, "grad_norm": 1.0687460899353027, "learning_rate": 2.2308014157628294e-05, "loss": 0.9311, "step": 215100 }, { "epoch": 1.3742764780292092, "grad_norm": 1.225905179977417, "learning_rate": 2.2303836455602468e-05, "loss": 0.8496, "step": 215110 }, { "epoch": 1.3743403651789479, "grad_norm": 1.1884292364120483, "learning_rate": 2.2299659032500953e-05, "loss": 1.0965, "step": 215120 }, { "epoch": 1.3744042523286866, "grad_norm": 1.1230326890945435, "learning_rate": 2.2295481888365856e-05, "loss": 0.7909, "step": 215130 }, { "epoch": 1.3744681394784253, "grad_norm": 0.9136203527450562, "learning_rate": 2.22913050232392e-05, "loss": 0.9625, "step": 215140 }, { "epoch": 1.374532026628164, "grad_norm": 1.0029051303863525, "learning_rate": 2.2287128437163095e-05, "loss": 0.7709, "step": 215150 }, { "epoch": 1.3745959137779027, "grad_norm": 1.027402639389038, "learning_rate": 2.2282952130179584e-05, "loss": 0.6512, "step": 215160 }, { "epoch": 1.3746598009276414, "grad_norm": 2.28485107421875, "learning_rate": 2.2278776102330706e-05, "loss": 0.948, "step": 215170 }, { "epoch": 1.37472368807738, "grad_norm": 0.8364056348800659, "learning_rate": 2.2274600353658555e-05, "loss": 0.9034, "step": 215180 }, { "epoch": 1.3747875752271188, "grad_norm": 0.9878214001655579, "learning_rate": 2.227042488420514e-05, "loss": 0.9154, "step": 215190 }, { "epoch": 1.3748514623768575, "grad_norm": 0.9499024152755737, "learning_rate": 2.2266249694012553e-05, "loss": 0.7886, "step": 215200 }, { "epoch": 1.3749153495265962, "grad_norm": 1.2275502681732178, "learning_rate": 2.226207478312281e-05, "loss": 0.8554, "step": 215210 }, { "epoch": 1.374979236676335, "grad_norm": 0.6945877075195312, "learning_rate": 2.2257900151577992e-05, "loss": 0.7988, "step": 215220 }, { "epoch": 1.3750431238260736, "grad_norm": 1.13132905960083, "learning_rate": 2.22537257994201e-05, "loss": 1.0158, "step": 215230 }, { "epoch": 1.3751070109758123, "grad_norm": 0.9972148537635803, "learning_rate": 2.224955172669121e-05, "loss": 0.9711, "step": 215240 }, { "epoch": 1.375170898125551, "grad_norm": 1.326120138168335, "learning_rate": 2.224537793343332e-05, "loss": 1.03, "step": 215250 }, { "epoch": 1.3752347852752898, "grad_norm": 1.3188718557357788, "learning_rate": 2.224120441968849e-05, "loss": 0.9548, "step": 215260 }, { "epoch": 1.3752986724250285, "grad_norm": 1.372585654258728, "learning_rate": 2.2237031185498764e-05, "loss": 0.7345, "step": 215270 }, { "epoch": 1.3753625595747672, "grad_norm": 0.8943201303482056, "learning_rate": 2.223285823090613e-05, "loss": 0.9941, "step": 215280 }, { "epoch": 1.3754264467245059, "grad_norm": 0.7395158410072327, "learning_rate": 2.2228685555952655e-05, "loss": 1.0877, "step": 215290 }, { "epoch": 1.3754903338742446, "grad_norm": 0.8535774350166321, "learning_rate": 2.2224513160680327e-05, "loss": 1.0003, "step": 215300 }, { "epoch": 1.3755542210239833, "grad_norm": 0.8217079639434814, "learning_rate": 2.22203410451312e-05, "loss": 0.9577, "step": 215310 }, { "epoch": 1.375618108173722, "grad_norm": 0.6563711166381836, "learning_rate": 2.221616920934725e-05, "loss": 0.8651, "step": 215320 }, { "epoch": 1.3756819953234607, "grad_norm": 0.7311186194419861, "learning_rate": 2.221199765337053e-05, "loss": 0.7602, "step": 215330 }, { "epoch": 1.3757458824731994, "grad_norm": 0.7395635843276978, "learning_rate": 2.220782637724302e-05, "loss": 0.8297, "step": 215340 }, { "epoch": 1.375809769622938, "grad_norm": 0.8966172933578491, "learning_rate": 2.2203655381006755e-05, "loss": 0.7369, "step": 215350 }, { "epoch": 1.3758736567726768, "grad_norm": 1.1480859518051147, "learning_rate": 2.219948466470371e-05, "loss": 0.5877, "step": 215360 }, { "epoch": 1.3759375439224155, "grad_norm": 0.6096826195716858, "learning_rate": 2.219531422837593e-05, "loss": 1.1601, "step": 215370 }, { "epoch": 1.3760014310721542, "grad_norm": 1.1459797620773315, "learning_rate": 2.2191144072065362e-05, "loss": 0.733, "step": 215380 }, { "epoch": 1.376065318221893, "grad_norm": 0.884146511554718, "learning_rate": 2.218697419581406e-05, "loss": 0.9174, "step": 215390 }, { "epoch": 1.3761292053716314, "grad_norm": 1.501900553703308, "learning_rate": 2.2182804599663963e-05, "loss": 0.822, "step": 215400 }, { "epoch": 1.3761930925213703, "grad_norm": 0.6949141621589661, "learning_rate": 2.217863528365711e-05, "loss": 0.7998, "step": 215410 }, { "epoch": 1.3762569796711088, "grad_norm": 0.9651074409484863, "learning_rate": 2.217446624783545e-05, "loss": 0.8932, "step": 215420 }, { "epoch": 1.3763208668208478, "grad_norm": 0.8191916942596436, "learning_rate": 2.217029749224101e-05, "loss": 0.8928, "step": 215430 }, { "epoch": 1.3763847539705862, "grad_norm": 1.7742459774017334, "learning_rate": 2.2166129016915726e-05, "loss": 0.9563, "step": 215440 }, { "epoch": 1.3764486411203252, "grad_norm": 1.2688603401184082, "learning_rate": 2.216196082190162e-05, "loss": 0.7006, "step": 215450 }, { "epoch": 1.3765125282700637, "grad_norm": 1.0408973693847656, "learning_rate": 2.2157792907240637e-05, "loss": 0.8796, "step": 215460 }, { "epoch": 1.3765764154198026, "grad_norm": 1.0547093152999878, "learning_rate": 2.215362527297477e-05, "loss": 1.0913, "step": 215470 }, { "epoch": 1.376640302569541, "grad_norm": 0.9978485107421875, "learning_rate": 2.2149457919146e-05, "loss": 0.9159, "step": 215480 }, { "epoch": 1.37670418971928, "grad_norm": 0.8262386918067932, "learning_rate": 2.2145290845796268e-05, "loss": 0.8887, "step": 215490 }, { "epoch": 1.3767680768690185, "grad_norm": 0.5713022351264954, "learning_rate": 2.2141124052967572e-05, "loss": 0.6874, "step": 215500 }, { "epoch": 1.3768319640187574, "grad_norm": 0.887799859046936, "learning_rate": 2.2136957540701836e-05, "loss": 0.9792, "step": 215510 }, { "epoch": 1.3768958511684959, "grad_norm": 3.2510287761688232, "learning_rate": 2.2132791309041066e-05, "loss": 0.7196, "step": 215520 }, { "epoch": 1.3769597383182346, "grad_norm": 1.2461315393447876, "learning_rate": 2.212862535802717e-05, "loss": 0.7465, "step": 215530 }, { "epoch": 1.3770236254679733, "grad_norm": 0.8961158394813538, "learning_rate": 2.212445968770216e-05, "loss": 0.8395, "step": 215540 }, { "epoch": 1.377087512617712, "grad_norm": 0.9440718293190002, "learning_rate": 2.212029429810793e-05, "loss": 0.9693, "step": 215550 }, { "epoch": 1.3771513997674507, "grad_norm": 5.130351543426514, "learning_rate": 2.2116129189286472e-05, "loss": 0.8738, "step": 215560 }, { "epoch": 1.3772152869171894, "grad_norm": 1.188360571861267, "learning_rate": 2.2111964361279704e-05, "loss": 0.996, "step": 215570 }, { "epoch": 1.3772791740669281, "grad_norm": 1.9239073991775513, "learning_rate": 2.21077998141296e-05, "loss": 0.7404, "step": 215580 }, { "epoch": 1.3773430612166668, "grad_norm": 1.9211300611495972, "learning_rate": 2.2103635547878053e-05, "loss": 0.7482, "step": 215590 }, { "epoch": 1.3774069483664055, "grad_norm": 0.9710404276847839, "learning_rate": 2.209947156256706e-05, "loss": 0.7785, "step": 215600 }, { "epoch": 1.3774708355161442, "grad_norm": 1.1858057975769043, "learning_rate": 2.20953078582385e-05, "loss": 0.8877, "step": 215610 }, { "epoch": 1.377534722665883, "grad_norm": 0.7331709861755371, "learning_rate": 2.2091144434934352e-05, "loss": 0.8328, "step": 215620 }, { "epoch": 1.3775986098156217, "grad_norm": 0.984566330909729, "learning_rate": 2.2086981292696506e-05, "loss": 0.7231, "step": 215630 }, { "epoch": 1.3776624969653604, "grad_norm": 1.0698328018188477, "learning_rate": 2.2082818431566926e-05, "loss": 0.9652, "step": 215640 }, { "epoch": 1.377726384115099, "grad_norm": 0.6931266784667969, "learning_rate": 2.2078655851587514e-05, "loss": 0.7886, "step": 215650 }, { "epoch": 1.3777902712648378, "grad_norm": 0.5347517132759094, "learning_rate": 2.207449355280018e-05, "loss": 0.8593, "step": 215660 }, { "epoch": 1.3778541584145765, "grad_norm": 1.114911675453186, "learning_rate": 2.2070331535246868e-05, "loss": 0.6501, "step": 215670 }, { "epoch": 1.3779180455643152, "grad_norm": 1.0739355087280273, "learning_rate": 2.206616979896946e-05, "loss": 0.8551, "step": 215680 }, { "epoch": 1.377981932714054, "grad_norm": 1.1093593835830688, "learning_rate": 2.206200834400991e-05, "loss": 0.9642, "step": 215690 }, { "epoch": 1.3780458198637926, "grad_norm": 1.3782254457473755, "learning_rate": 2.205784717041009e-05, "loss": 1.0764, "step": 215700 }, { "epoch": 1.3781097070135313, "grad_norm": 0.859857439994812, "learning_rate": 2.205368627821194e-05, "loss": 0.8035, "step": 215710 }, { "epoch": 1.37817359416327, "grad_norm": 1.3140043020248413, "learning_rate": 2.2049525667457322e-05, "loss": 0.8525, "step": 215720 }, { "epoch": 1.3782374813130087, "grad_norm": 0.9456557035446167, "learning_rate": 2.2045365338188185e-05, "loss": 1.048, "step": 215730 }, { "epoch": 1.3783013684627474, "grad_norm": 0.9949221611022949, "learning_rate": 2.2041205290446383e-05, "loss": 0.7744, "step": 215740 }, { "epoch": 1.3783652556124861, "grad_norm": 2.190789222717285, "learning_rate": 2.2037045524273847e-05, "loss": 0.9398, "step": 215750 }, { "epoch": 1.3784291427622248, "grad_norm": 2.9107577800750732, "learning_rate": 2.203288603971244e-05, "loss": 1.0338, "step": 215760 }, { "epoch": 1.3784930299119635, "grad_norm": 1.2673238515853882, "learning_rate": 2.202872683680408e-05, "loss": 0.6364, "step": 215770 }, { "epoch": 1.3785569170617022, "grad_norm": 2.4107301235198975, "learning_rate": 2.2024567915590627e-05, "loss": 0.8876, "step": 215780 }, { "epoch": 1.378620804211441, "grad_norm": 0.8848767876625061, "learning_rate": 2.2020409276113972e-05, "loss": 0.9391, "step": 215790 }, { "epoch": 1.3786846913611797, "grad_norm": 1.3917646408081055, "learning_rate": 2.201625091841602e-05, "loss": 1.2833, "step": 215800 }, { "epoch": 1.3787485785109184, "grad_norm": 0.8002511858940125, "learning_rate": 2.2012092842538618e-05, "loss": 0.7029, "step": 215810 }, { "epoch": 1.378812465660657, "grad_norm": 0.7884570360183716, "learning_rate": 2.2007935048523664e-05, "loss": 0.9076, "step": 215820 }, { "epoch": 1.3788763528103958, "grad_norm": 1.1183714866638184, "learning_rate": 2.2003777536413007e-05, "loss": 0.8994, "step": 215830 }, { "epoch": 1.3789402399601345, "grad_norm": 1.8624705076217651, "learning_rate": 2.1999620306248547e-05, "loss": 1.0381, "step": 215840 }, { "epoch": 1.3790041271098732, "grad_norm": 1.2032179832458496, "learning_rate": 2.199546335807212e-05, "loss": 0.7494, "step": 215850 }, { "epoch": 1.379068014259612, "grad_norm": 0.9324236512184143, "learning_rate": 2.1991306691925616e-05, "loss": 0.5596, "step": 215860 }, { "epoch": 1.3791319014093506, "grad_norm": 1.2062602043151855, "learning_rate": 2.1987150307850874e-05, "loss": 0.8037, "step": 215870 }, { "epoch": 1.3791957885590893, "grad_norm": 1.706902027130127, "learning_rate": 2.198299420588978e-05, "loss": 0.7597, "step": 215880 }, { "epoch": 1.3792596757088278, "grad_norm": 0.8762345314025879, "learning_rate": 2.197883838608415e-05, "loss": 0.9189, "step": 215890 }, { "epoch": 1.3793235628585667, "grad_norm": 1.1141246557235718, "learning_rate": 2.1974682848475874e-05, "loss": 0.7046, "step": 215900 }, { "epoch": 1.3793874500083052, "grad_norm": 2.5568530559539795, "learning_rate": 2.1970527593106777e-05, "loss": 0.9037, "step": 215910 }, { "epoch": 1.3794513371580441, "grad_norm": 0.8247811198234558, "learning_rate": 2.1966372620018733e-05, "loss": 0.8501, "step": 215920 }, { "epoch": 1.3795152243077826, "grad_norm": 0.7716491222381592, "learning_rate": 2.196221792925355e-05, "loss": 0.8452, "step": 215930 }, { "epoch": 1.3795791114575215, "grad_norm": 0.950640082359314, "learning_rate": 2.1958063520853107e-05, "loss": 0.8706, "step": 215940 }, { "epoch": 1.37964299860726, "grad_norm": 0.7813429236412048, "learning_rate": 2.1953909394859202e-05, "loss": 0.8404, "step": 215950 }, { "epoch": 1.379706885756999, "grad_norm": 1.2660363912582397, "learning_rate": 2.194975555131371e-05, "loss": 0.9184, "step": 215960 }, { "epoch": 1.3797707729067374, "grad_norm": 0.8251976370811462, "learning_rate": 2.1945601990258434e-05, "loss": 0.9045, "step": 215970 }, { "epoch": 1.3798346600564764, "grad_norm": 1.511618971824646, "learning_rate": 2.1941448711735234e-05, "loss": 0.639, "step": 215980 }, { "epoch": 1.3798985472062149, "grad_norm": 0.873430609703064, "learning_rate": 2.1937711002663826e-05, "loss": 0.763, "step": 215990 }, { "epoch": 1.3799624343559538, "grad_norm": 0.9361000657081604, "learning_rate": 2.1933558261066743e-05, "loss": 0.8217, "step": 216000 }, { "epoch": 1.3800263215056923, "grad_norm": 1.2457658052444458, "learning_rate": 2.1929405802123038e-05, "loss": 0.9674, "step": 216010 }, { "epoch": 1.380090208655431, "grad_norm": 1.4045180082321167, "learning_rate": 2.1925253625874474e-05, "loss": 0.628, "step": 216020 }, { "epoch": 1.3801540958051697, "grad_norm": 0.8839975595474243, "learning_rate": 2.192110173236292e-05, "loss": 0.7358, "step": 216030 }, { "epoch": 1.3802179829549084, "grad_norm": 0.8986994028091431, "learning_rate": 2.1916950121630144e-05, "loss": 0.7854, "step": 216040 }, { "epoch": 1.380281870104647, "grad_norm": 1.0271767377853394, "learning_rate": 2.1912798793717986e-05, "loss": 1.0156, "step": 216050 }, { "epoch": 1.3803457572543858, "grad_norm": 1.1156021356582642, "learning_rate": 2.190864774866823e-05, "loss": 0.8847, "step": 216060 }, { "epoch": 1.3804096444041245, "grad_norm": 1.4262725114822388, "learning_rate": 2.1904496986522715e-05, "loss": 0.8734, "step": 216070 }, { "epoch": 1.3804735315538632, "grad_norm": 0.9161853790283203, "learning_rate": 2.19003465073232e-05, "loss": 0.8119, "step": 216080 }, { "epoch": 1.380537418703602, "grad_norm": 0.8312424421310425, "learning_rate": 2.1896196311111523e-05, "loss": 1.0997, "step": 216090 }, { "epoch": 1.3806013058533406, "grad_norm": 1.6136534214019775, "learning_rate": 2.1892046397929444e-05, "loss": 0.9357, "step": 216100 }, { "epoch": 1.3806651930030793, "grad_norm": 0.7072123289108276, "learning_rate": 2.188789676781878e-05, "loss": 0.8362, "step": 216110 }, { "epoch": 1.380729080152818, "grad_norm": 1.436051845550537, "learning_rate": 2.1883747420821327e-05, "loss": 0.806, "step": 216120 }, { "epoch": 1.3807929673025567, "grad_norm": 1.851639986038208, "learning_rate": 2.1879598356978848e-05, "loss": 1.0181, "step": 216130 }, { "epoch": 1.3808568544522954, "grad_norm": 1.6991009712219238, "learning_rate": 2.187544957633316e-05, "loss": 0.9347, "step": 216140 }, { "epoch": 1.3809207416020342, "grad_norm": 0.9016327261924744, "learning_rate": 2.1871301078926e-05, "loss": 1.0082, "step": 216150 }, { "epoch": 1.3809846287517729, "grad_norm": 0.9443797469139099, "learning_rate": 2.1867152864799194e-05, "loss": 0.725, "step": 216160 }, { "epoch": 1.3810485159015116, "grad_norm": 1.1066298484802246, "learning_rate": 2.1863004933994484e-05, "loss": 0.6321, "step": 216170 }, { "epoch": 1.3811124030512503, "grad_norm": 1.2663660049438477, "learning_rate": 2.1858857286553676e-05, "loss": 0.9728, "step": 216180 }, { "epoch": 1.381176290200989, "grad_norm": 2.669292688369751, "learning_rate": 2.1854709922518495e-05, "loss": 1.0648, "step": 216190 }, { "epoch": 1.3812401773507277, "grad_norm": 1.2658430337905884, "learning_rate": 2.1850562841930756e-05, "loss": 1.0093, "step": 216200 }, { "epoch": 1.3813040645004664, "grad_norm": 0.9764119982719421, "learning_rate": 2.184641604483218e-05, "loss": 0.9526, "step": 216210 }, { "epoch": 1.381367951650205, "grad_norm": 1.3725136518478394, "learning_rate": 2.1842269531264575e-05, "loss": 0.7896, "step": 216220 }, { "epoch": 1.3814318387999438, "grad_norm": 0.7538588047027588, "learning_rate": 2.1838123301269653e-05, "loss": 0.6495, "step": 216230 }, { "epoch": 1.3814957259496825, "grad_norm": 1.4654730558395386, "learning_rate": 2.1833977354889212e-05, "loss": 0.9431, "step": 216240 }, { "epoch": 1.3815596130994212, "grad_norm": 1.0827220678329468, "learning_rate": 2.182983169216497e-05, "loss": 1.0752, "step": 216250 }, { "epoch": 1.38162350024916, "grad_norm": 1.0800297260284424, "learning_rate": 2.182568631313871e-05, "loss": 0.8834, "step": 216260 }, { "epoch": 1.3816873873988986, "grad_norm": 3.10566782951355, "learning_rate": 2.1821541217852164e-05, "loss": 0.7927, "step": 216270 }, { "epoch": 1.3817512745486373, "grad_norm": 1.3620527982711792, "learning_rate": 2.1817396406347056e-05, "loss": 0.9694, "step": 216280 }, { "epoch": 1.381815161698376, "grad_norm": 0.870901882648468, "learning_rate": 2.181325187866517e-05, "loss": 1.1438, "step": 216290 }, { "epoch": 1.3818790488481147, "grad_norm": 1.473087191581726, "learning_rate": 2.1809107634848202e-05, "loss": 0.726, "step": 216300 }, { "epoch": 1.3819429359978534, "grad_norm": 0.7571781277656555, "learning_rate": 2.1804963674937924e-05, "loss": 0.7195, "step": 216310 }, { "epoch": 1.3820068231475922, "grad_norm": 1.2076596021652222, "learning_rate": 2.180081999897604e-05, "loss": 0.91, "step": 216320 }, { "epoch": 1.3820707102973309, "grad_norm": 1.4176630973815918, "learning_rate": 2.1796676607004307e-05, "loss": 0.8324, "step": 216330 }, { "epoch": 1.3821345974470696, "grad_norm": 1.1328399181365967, "learning_rate": 2.179253349906443e-05, "loss": 1.0424, "step": 216340 }, { "epoch": 1.3821984845968083, "grad_norm": 0.8762274980545044, "learning_rate": 2.1788390675198157e-05, "loss": 1.0355, "step": 216350 }, { "epoch": 1.382262371746547, "grad_norm": 0.8054455518722534, "learning_rate": 2.1784248135447177e-05, "loss": 0.832, "step": 216360 }, { "epoch": 1.3823262588962857, "grad_norm": 1.6195545196533203, "learning_rate": 2.1780105879853247e-05, "loss": 0.8238, "step": 216370 }, { "epoch": 1.3823901460460242, "grad_norm": 1.0791467428207397, "learning_rate": 2.1775963908458047e-05, "loss": 0.8072, "step": 216380 }, { "epoch": 1.382454033195763, "grad_norm": 0.9370194673538208, "learning_rate": 2.1771822221303323e-05, "loss": 0.9024, "step": 216390 }, { "epoch": 1.3825179203455016, "grad_norm": 0.7822071313858032, "learning_rate": 2.176768081843076e-05, "loss": 0.6919, "step": 216400 }, { "epoch": 1.3825818074952405, "grad_norm": 0.9446224570274353, "learning_rate": 2.1763539699882087e-05, "loss": 0.9925, "step": 216410 }, { "epoch": 1.382645694644979, "grad_norm": 0.71107017993927, "learning_rate": 2.1759398865698977e-05, "loss": 0.9031, "step": 216420 }, { "epoch": 1.382709581794718, "grad_norm": 0.7870688438415527, "learning_rate": 2.175525831592316e-05, "loss": 0.8776, "step": 216430 }, { "epoch": 1.3827734689444564, "grad_norm": 1.1367087364196777, "learning_rate": 2.1751118050596336e-05, "loss": 0.8533, "step": 216440 }, { "epoch": 1.3828373560941953, "grad_norm": 0.8634241819381714, "learning_rate": 2.1746978069760184e-05, "loss": 1.0664, "step": 216450 }, { "epoch": 1.3829012432439338, "grad_norm": 0.9241101741790771, "learning_rate": 2.1742838373456415e-05, "loss": 0.9777, "step": 216460 }, { "epoch": 1.3829651303936727, "grad_norm": 0.8248175382614136, "learning_rate": 2.1738698961726694e-05, "loss": 0.7993, "step": 216470 }, { "epoch": 1.3830290175434112, "grad_norm": 0.4851258397102356, "learning_rate": 2.1734559834612745e-05, "loss": 0.8707, "step": 216480 }, { "epoch": 1.38309290469315, "grad_norm": 0.9273546934127808, "learning_rate": 2.173042099215621e-05, "loss": 0.8059, "step": 216490 }, { "epoch": 1.3831567918428886, "grad_norm": 0.8645648956298828, "learning_rate": 2.172628243439881e-05, "loss": 1.1275, "step": 216500 }, { "epoch": 1.3832206789926274, "grad_norm": 0.38949495553970337, "learning_rate": 2.172214416138219e-05, "loss": 0.745, "step": 216510 }, { "epoch": 1.383284566142366, "grad_norm": 1.0118781328201294, "learning_rate": 2.171800617314806e-05, "loss": 0.8089, "step": 216520 }, { "epoch": 1.3833484532921048, "grad_norm": 0.7357847690582275, "learning_rate": 2.1713868469738057e-05, "loss": 0.8804, "step": 216530 }, { "epoch": 1.3834123404418435, "grad_norm": 2.8220245838165283, "learning_rate": 2.1709731051193887e-05, "loss": 0.9014, "step": 216540 }, { "epoch": 1.3834762275915822, "grad_norm": 0.9902445077896118, "learning_rate": 2.1705593917557183e-05, "loss": 0.8465, "step": 216550 }, { "epoch": 1.3835401147413209, "grad_norm": 1.9937920570373535, "learning_rate": 2.170145706886964e-05, "loss": 1.14, "step": 216560 }, { "epoch": 1.3836040018910596, "grad_norm": 1.017596960067749, "learning_rate": 2.169732050517289e-05, "loss": 0.7427, "step": 216570 }, { "epoch": 1.3836678890407983, "grad_norm": 0.7377170324325562, "learning_rate": 2.1693184226508635e-05, "loss": 0.9442, "step": 216580 }, { "epoch": 1.383731776190537, "grad_norm": 1.1950017213821411, "learning_rate": 2.1689048232918475e-05, "loss": 0.809, "step": 216590 }, { "epoch": 1.3837956633402757, "grad_norm": 1.2431923151016235, "learning_rate": 2.168491252444411e-05, "loss": 0.748, "step": 216600 }, { "epoch": 1.3838595504900144, "grad_norm": 0.7115759253501892, "learning_rate": 2.168077710112716e-05, "loss": 0.8706, "step": 216610 }, { "epoch": 1.3839234376397531, "grad_norm": 0.7203143835067749, "learning_rate": 2.16766419630093e-05, "loss": 0.7823, "step": 216620 }, { "epoch": 1.3839873247894918, "grad_norm": 0.8078294396400452, "learning_rate": 2.1672507110132134e-05, "loss": 0.8612, "step": 216630 }, { "epoch": 1.3840512119392305, "grad_norm": 1.1752616167068481, "learning_rate": 2.1668372542537336e-05, "loss": 0.8119, "step": 216640 }, { "epoch": 1.3841150990889692, "grad_norm": 1.060088872909546, "learning_rate": 2.1664238260266556e-05, "loss": 0.86, "step": 216650 }, { "epoch": 1.384178986238708, "grad_norm": 4.633458614349365, "learning_rate": 2.1660104263361385e-05, "loss": 1.0264, "step": 216660 }, { "epoch": 1.3842428733884466, "grad_norm": 0.9945595264434814, "learning_rate": 2.1655970551863502e-05, "loss": 0.8249, "step": 216670 }, { "epoch": 1.3843067605381854, "grad_norm": 1.518704891204834, "learning_rate": 2.1651837125814506e-05, "loss": 0.8972, "step": 216680 }, { "epoch": 1.384370647687924, "grad_norm": 1.1230610609054565, "learning_rate": 2.1647703985256047e-05, "loss": 0.8053, "step": 216690 }, { "epoch": 1.3844345348376628, "grad_norm": 0.8968077898025513, "learning_rate": 2.1643571130229718e-05, "loss": 0.7231, "step": 216700 }, { "epoch": 1.3844984219874015, "grad_norm": 0.8500282168388367, "learning_rate": 2.1639438560777182e-05, "loss": 0.9592, "step": 216710 }, { "epoch": 1.3845623091371402, "grad_norm": 0.7221376895904541, "learning_rate": 2.163530627694001e-05, "loss": 0.987, "step": 216720 }, { "epoch": 1.3846261962868789, "grad_norm": 1.0014212131500244, "learning_rate": 2.1631174278759864e-05, "loss": 0.9718, "step": 216730 }, { "epoch": 1.3846900834366176, "grad_norm": 1.3466345071792603, "learning_rate": 2.162704256627832e-05, "loss": 0.7581, "step": 216740 }, { "epoch": 1.3847539705863563, "grad_norm": 0.6369860172271729, "learning_rate": 2.1622911139537015e-05, "loss": 0.7653, "step": 216750 }, { "epoch": 1.384817857736095, "grad_norm": 0.9595978856086731, "learning_rate": 2.1618779998577553e-05, "loss": 0.7024, "step": 216760 }, { "epoch": 1.3848817448858337, "grad_norm": 1.0387907028198242, "learning_rate": 2.1614649143441508e-05, "loss": 0.9721, "step": 216770 }, { "epoch": 1.3849456320355724, "grad_norm": 1.3099180459976196, "learning_rate": 2.1610518574170518e-05, "loss": 0.833, "step": 216780 }, { "epoch": 1.3850095191853111, "grad_norm": 0.8216334581375122, "learning_rate": 2.160638829080615e-05, "loss": 1.0301, "step": 216790 }, { "epoch": 1.3850734063350498, "grad_norm": 1.4595752954483032, "learning_rate": 2.1602258293390038e-05, "loss": 0.6508, "step": 216800 }, { "epoch": 1.3851372934847885, "grad_norm": 0.721744954586029, "learning_rate": 2.1598128581963733e-05, "loss": 0.9679, "step": 216810 }, { "epoch": 1.3852011806345272, "grad_norm": 0.7369323372840881, "learning_rate": 2.159399915656886e-05, "loss": 0.9548, "step": 216820 }, { "epoch": 1.385265067784266, "grad_norm": 0.7699748873710632, "learning_rate": 2.1589870017246973e-05, "loss": 0.74, "step": 216830 }, { "epoch": 1.3853289549340047, "grad_norm": 0.5005737543106079, "learning_rate": 2.158574116403969e-05, "loss": 0.8124, "step": 216840 }, { "epoch": 1.3853928420837434, "grad_norm": 1.0250499248504639, "learning_rate": 2.158161259698856e-05, "loss": 0.7027, "step": 216850 }, { "epoch": 1.385456729233482, "grad_norm": 1.1445086002349854, "learning_rate": 2.1577484316135194e-05, "loss": 1.0355, "step": 216860 }, { "epoch": 1.3855206163832205, "grad_norm": 2.682488203048706, "learning_rate": 2.1573356321521132e-05, "loss": 1.2289, "step": 216870 }, { "epoch": 1.3855845035329595, "grad_norm": 1.184380292892456, "learning_rate": 2.156922861318798e-05, "loss": 0.8661, "step": 216880 }, { "epoch": 1.385648390682698, "grad_norm": 1.1919716596603394, "learning_rate": 2.156510119117727e-05, "loss": 0.789, "step": 216890 }, { "epoch": 1.385712277832437, "grad_norm": 0.9529315233230591, "learning_rate": 2.156097405553062e-05, "loss": 0.776, "step": 216900 }, { "epoch": 1.3857761649821754, "grad_norm": 1.4514542818069458, "learning_rate": 2.1556847206289537e-05, "loss": 0.7248, "step": 216910 }, { "epoch": 1.3858400521319143, "grad_norm": 1.3809146881103516, "learning_rate": 2.1552720643495632e-05, "loss": 0.7629, "step": 216920 }, { "epoch": 1.3859039392816528, "grad_norm": 1.508679986000061, "learning_rate": 2.1548594367190422e-05, "loss": 0.9625, "step": 216930 }, { "epoch": 1.3859678264313917, "grad_norm": 0.9351204037666321, "learning_rate": 2.15444683774155e-05, "loss": 1.1496, "step": 216940 }, { "epoch": 1.3860317135811302, "grad_norm": 0.6512327790260315, "learning_rate": 2.1540342674212377e-05, "loss": 0.9739, "step": 216950 }, { "epoch": 1.3860956007308691, "grad_norm": 1.4669448137283325, "learning_rate": 2.1536217257622627e-05, "loss": 0.8527, "step": 216960 }, { "epoch": 1.3861594878806076, "grad_norm": 0.9330663681030273, "learning_rate": 2.1532092127687813e-05, "loss": 1.2608, "step": 216970 }, { "epoch": 1.3862233750303463, "grad_norm": 1.1834372282028198, "learning_rate": 2.152796728444944e-05, "loss": 1.0855, "step": 216980 }, { "epoch": 1.386287262180085, "grad_norm": 1.1099574565887451, "learning_rate": 2.1523842727949084e-05, "loss": 0.6813, "step": 216990 }, { "epoch": 1.3863511493298237, "grad_norm": 0.8671126365661621, "learning_rate": 2.151971845822825e-05, "loss": 1.033, "step": 217000 }, { "epoch": 1.3864150364795624, "grad_norm": 0.9332876205444336, "learning_rate": 2.151559447532851e-05, "loss": 0.8845, "step": 217010 }, { "epoch": 1.3864789236293011, "grad_norm": 1.091829776763916, "learning_rate": 2.151147077929136e-05, "loss": 0.8085, "step": 217020 }, { "epoch": 1.3865428107790398, "grad_norm": 1.0661778450012207, "learning_rate": 2.150734737015836e-05, "loss": 1.1284, "step": 217030 }, { "epoch": 1.3866066979287786, "grad_norm": 1.2264496088027954, "learning_rate": 2.1503224247971003e-05, "loss": 1.2297, "step": 217040 }, { "epoch": 1.3866705850785173, "grad_norm": 0.6267675757408142, "learning_rate": 2.149910141277085e-05, "loss": 0.7822, "step": 217050 }, { "epoch": 1.386734472228256, "grad_norm": 1.2916646003723145, "learning_rate": 2.1494978864599384e-05, "loss": 0.7288, "step": 217060 }, { "epoch": 1.3867983593779947, "grad_norm": 1.2757755517959595, "learning_rate": 2.1490856603498165e-05, "loss": 0.7771, "step": 217070 }, { "epoch": 1.3868622465277334, "grad_norm": 1.0068482160568237, "learning_rate": 2.1486734629508657e-05, "loss": 0.8135, "step": 217080 }, { "epoch": 1.386926133677472, "grad_norm": 0.8916890621185303, "learning_rate": 2.1482612942672426e-05, "loss": 0.9525, "step": 217090 }, { "epoch": 1.3869900208272108, "grad_norm": 1.7053278684616089, "learning_rate": 2.1478491543030925e-05, "loss": 0.8083, "step": 217100 }, { "epoch": 1.3870539079769495, "grad_norm": 0.7205654978752136, "learning_rate": 2.1474370430625716e-05, "loss": 0.8443, "step": 217110 }, { "epoch": 1.3871177951266882, "grad_norm": 0.5934941172599792, "learning_rate": 2.1470249605498254e-05, "loss": 0.7754, "step": 217120 }, { "epoch": 1.387181682276427, "grad_norm": 0.9044548273086548, "learning_rate": 2.1466129067690076e-05, "loss": 1.0092, "step": 217130 }, { "epoch": 1.3872455694261656, "grad_norm": 1.3107562065124512, "learning_rate": 2.146200881724265e-05, "loss": 0.9626, "step": 217140 }, { "epoch": 1.3873094565759043, "grad_norm": 1.0588704347610474, "learning_rate": 2.14578888541975e-05, "loss": 0.859, "step": 217150 }, { "epoch": 1.387373343725643, "grad_norm": 1.0064996480941772, "learning_rate": 2.1453769178596083e-05, "loss": 0.8617, "step": 217160 }, { "epoch": 1.3874372308753817, "grad_norm": 0.774937093257904, "learning_rate": 2.1449649790479904e-05, "loss": 0.8253, "step": 217170 }, { "epoch": 1.3875011180251204, "grad_norm": 1.3672817945480347, "learning_rate": 2.1445530689890475e-05, "loss": 0.7635, "step": 217180 }, { "epoch": 1.3875650051748591, "grad_norm": 1.0915000438690186, "learning_rate": 2.1441411876869233e-05, "loss": 0.922, "step": 217190 }, { "epoch": 1.3876288923245979, "grad_norm": 0.8849831819534302, "learning_rate": 2.14372933514577e-05, "loss": 1.0391, "step": 217200 }, { "epoch": 1.3876927794743366, "grad_norm": 0.9821801781654358, "learning_rate": 2.1433175113697312e-05, "loss": 0.7842, "step": 217210 }, { "epoch": 1.3877566666240753, "grad_norm": 1.1746443510055542, "learning_rate": 2.1429057163629584e-05, "loss": 0.8189, "step": 217220 }, { "epoch": 1.387820553773814, "grad_norm": 1.5725518465042114, "learning_rate": 2.1424939501295947e-05, "loss": 0.7708, "step": 217230 }, { "epoch": 1.3878844409235527, "grad_norm": 0.9691241979598999, "learning_rate": 2.142082212673793e-05, "loss": 1.0128, "step": 217240 }, { "epoch": 1.3879483280732914, "grad_norm": 1.709093689918518, "learning_rate": 2.1416705039996924e-05, "loss": 0.9637, "step": 217250 }, { "epoch": 1.38801221522303, "grad_norm": 1.2288743257522583, "learning_rate": 2.1412999908047865e-05, "loss": 0.9048, "step": 217260 }, { "epoch": 1.3880761023727688, "grad_norm": 1.160415530204773, "learning_rate": 2.1408883368273474e-05, "loss": 0.8645, "step": 217270 }, { "epoch": 1.3881399895225075, "grad_norm": 1.401985764503479, "learning_rate": 2.140476711643636e-05, "loss": 1.0005, "step": 217280 }, { "epoch": 1.3882038766722462, "grad_norm": 1.4683265686035156, "learning_rate": 2.1400651152577996e-05, "loss": 0.7122, "step": 217290 }, { "epoch": 1.388267763821985, "grad_norm": 0.975030243396759, "learning_rate": 2.13965354767398e-05, "loss": 1.0364, "step": 217300 }, { "epoch": 1.3883316509717236, "grad_norm": 0.9857359528541565, "learning_rate": 2.1392420088963254e-05, "loss": 1.1408, "step": 217310 }, { "epoch": 1.3883955381214623, "grad_norm": 1.1964335441589355, "learning_rate": 2.138830498928977e-05, "loss": 0.7693, "step": 217320 }, { "epoch": 1.388459425271201, "grad_norm": 1.0191142559051514, "learning_rate": 2.1384190177760828e-05, "loss": 0.6866, "step": 217330 }, { "epoch": 1.3885233124209395, "grad_norm": 1.0814679861068726, "learning_rate": 2.1380075654417825e-05, "loss": 0.9072, "step": 217340 }, { "epoch": 1.3885871995706784, "grad_norm": 1.1120654344558716, "learning_rate": 2.1375961419302237e-05, "loss": 0.8842, "step": 217350 }, { "epoch": 1.388651086720417, "grad_norm": 0.6735295653343201, "learning_rate": 2.1371847472455465e-05, "loss": 0.8524, "step": 217360 }, { "epoch": 1.3887149738701559, "grad_norm": 0.8723472952842712, "learning_rate": 2.1367733813918972e-05, "loss": 0.901, "step": 217370 }, { "epoch": 1.3887788610198943, "grad_norm": 1.0541127920150757, "learning_rate": 2.1363620443734155e-05, "loss": 1.0075, "step": 217380 }, { "epoch": 1.3888427481696333, "grad_norm": 0.9354879260063171, "learning_rate": 2.1359507361942488e-05, "loss": 0.7836, "step": 217390 }, { "epoch": 1.3889066353193718, "grad_norm": 0.9547199606895447, "learning_rate": 2.1355394568585326e-05, "loss": 0.9017, "step": 217400 }, { "epoch": 1.3889705224691107, "grad_norm": 1.1774011850357056, "learning_rate": 2.135128206370412e-05, "loss": 0.7809, "step": 217410 }, { "epoch": 1.3890344096188492, "grad_norm": 0.6935076117515564, "learning_rate": 2.1347169847340305e-05, "loss": 0.9671, "step": 217420 }, { "epoch": 1.389098296768588, "grad_norm": 1.1165943145751953, "learning_rate": 2.1343057919535266e-05, "loss": 0.8947, "step": 217430 }, { "epoch": 1.3891621839183266, "grad_norm": 1.291062593460083, "learning_rate": 2.1338946280330435e-05, "loss": 0.8758, "step": 217440 }, { "epoch": 1.3892260710680655, "grad_norm": 3.222094774246216, "learning_rate": 2.1334834929767196e-05, "loss": 1.3247, "step": 217450 }, { "epoch": 1.389289958217804, "grad_norm": 0.9645674228668213, "learning_rate": 2.133072386788699e-05, "loss": 0.7852, "step": 217460 }, { "epoch": 1.3893538453675427, "grad_norm": 2.5170841217041016, "learning_rate": 2.1326613094731174e-05, "loss": 0.8575, "step": 217470 }, { "epoch": 1.3894177325172814, "grad_norm": 1.117675542831421, "learning_rate": 2.1322502610341188e-05, "loss": 0.9775, "step": 217480 }, { "epoch": 1.38948161966702, "grad_norm": 0.6247068643569946, "learning_rate": 2.1318392414758394e-05, "loss": 0.7127, "step": 217490 }, { "epoch": 1.3895455068167588, "grad_norm": 0.7332262992858887, "learning_rate": 2.131428250802422e-05, "loss": 0.9891, "step": 217500 }, { "epoch": 1.3896093939664975, "grad_norm": 2.6846208572387695, "learning_rate": 2.1310172890180018e-05, "loss": 0.9265, "step": 217510 }, { "epoch": 1.3896732811162362, "grad_norm": 1.089535117149353, "learning_rate": 2.1306063561267214e-05, "loss": 0.8925, "step": 217520 }, { "epoch": 1.389737168265975, "grad_norm": 0.637092649936676, "learning_rate": 2.1301954521327154e-05, "loss": 0.8965, "step": 217530 }, { "epoch": 1.3898010554157136, "grad_norm": 0.7360275983810425, "learning_rate": 2.1297845770401258e-05, "loss": 0.8893, "step": 217540 }, { "epoch": 1.3898649425654523, "grad_norm": 0.8324083089828491, "learning_rate": 2.129373730853087e-05, "loss": 0.9598, "step": 217550 }, { "epoch": 1.389928829715191, "grad_norm": 2.202166795730591, "learning_rate": 2.12896291357574e-05, "loss": 0.873, "step": 217560 }, { "epoch": 1.3899927168649298, "grad_norm": 0.8910554647445679, "learning_rate": 2.1285521252122188e-05, "loss": 0.9841, "step": 217570 }, { "epoch": 1.3900566040146685, "grad_norm": 0.9161718487739563, "learning_rate": 2.128141365766663e-05, "loss": 0.7002, "step": 217580 }, { "epoch": 1.3901204911644072, "grad_norm": 0.8528456091880798, "learning_rate": 2.1277306352432063e-05, "loss": 0.736, "step": 217590 }, { "epoch": 1.3901843783141459, "grad_norm": 0.5835663080215454, "learning_rate": 2.1273199336459893e-05, "loss": 0.7411, "step": 217600 }, { "epoch": 1.3902482654638846, "grad_norm": 1.2048559188842773, "learning_rate": 2.1269092609791437e-05, "loss": 0.9812, "step": 217610 }, { "epoch": 1.3903121526136233, "grad_norm": 0.8390817642211914, "learning_rate": 2.1264986172468077e-05, "loss": 0.9444, "step": 217620 }, { "epoch": 1.390376039763362, "grad_norm": 0.9608392715454102, "learning_rate": 2.1260880024531176e-05, "loss": 1.0438, "step": 217630 }, { "epoch": 1.3904399269131007, "grad_norm": 0.9510350823402405, "learning_rate": 2.1256774166022064e-05, "loss": 0.7556, "step": 217640 }, { "epoch": 1.3905038140628394, "grad_norm": 1.1506633758544922, "learning_rate": 2.1252668596982123e-05, "loss": 0.8586, "step": 217650 }, { "epoch": 1.3905677012125781, "grad_norm": 0.8959683775901794, "learning_rate": 2.124856331745266e-05, "loss": 0.9765, "step": 217660 }, { "epoch": 1.3906315883623168, "grad_norm": 0.5582273602485657, "learning_rate": 2.124445832747505e-05, "loss": 0.8269, "step": 217670 }, { "epoch": 1.3906954755120555, "grad_norm": 0.9217997193336487, "learning_rate": 2.124035362709061e-05, "loss": 1.1126, "step": 217680 }, { "epoch": 1.3907593626617942, "grad_norm": 0.7768685817718506, "learning_rate": 2.1236249216340704e-05, "loss": 0.9109, "step": 217690 }, { "epoch": 1.390823249811533, "grad_norm": 1.1810338497161865, "learning_rate": 2.1232145095266635e-05, "loss": 1.0181, "step": 217700 }, { "epoch": 1.3908871369612716, "grad_norm": 1.0850905179977417, "learning_rate": 2.122804126390977e-05, "loss": 0.7316, "step": 217710 }, { "epoch": 1.3909510241110103, "grad_norm": 1.9918285608291626, "learning_rate": 2.1223937722311404e-05, "loss": 0.9262, "step": 217720 }, { "epoch": 1.391014911260749, "grad_norm": 0.724561333656311, "learning_rate": 2.12198344705129e-05, "loss": 0.6552, "step": 217730 }, { "epoch": 1.3910787984104878, "grad_norm": 0.9518063068389893, "learning_rate": 2.121573150855554e-05, "loss": 0.9679, "step": 217740 }, { "epoch": 1.3911426855602265, "grad_norm": 0.7545220851898193, "learning_rate": 2.121162883648069e-05, "loss": 0.6445, "step": 217750 }, { "epoch": 1.3912065727099652, "grad_norm": 0.9066117405891418, "learning_rate": 2.120752645432962e-05, "loss": 0.9309, "step": 217760 }, { "epoch": 1.3912704598597039, "grad_norm": 0.7924219369888306, "learning_rate": 2.120342436214368e-05, "loss": 0.858, "step": 217770 }, { "epoch": 1.3913343470094426, "grad_norm": 0.6461116671562195, "learning_rate": 2.1199322559964163e-05, "loss": 0.8988, "step": 217780 }, { "epoch": 1.3913982341591813, "grad_norm": 0.9200494885444641, "learning_rate": 2.1195221047832392e-05, "loss": 1.0004, "step": 217790 }, { "epoch": 1.39146212130892, "grad_norm": 0.7647868394851685, "learning_rate": 2.1191119825789652e-05, "loss": 0.8146, "step": 217800 }, { "epoch": 1.3915260084586587, "grad_norm": 1.1263049840927124, "learning_rate": 2.118701889387726e-05, "loss": 0.6912, "step": 217810 }, { "epoch": 1.3915898956083974, "grad_norm": 0.7079087495803833, "learning_rate": 2.118291825213653e-05, "loss": 0.6851, "step": 217820 }, { "epoch": 1.391653782758136, "grad_norm": 1.2798701524734497, "learning_rate": 2.117881790060872e-05, "loss": 0.8018, "step": 217830 }, { "epoch": 1.3917176699078748, "grad_norm": 0.9899066090583801, "learning_rate": 2.1174717839335172e-05, "loss": 1.1333, "step": 217840 }, { "epoch": 1.3917815570576133, "grad_norm": 0.3420279026031494, "learning_rate": 2.1170618068357134e-05, "loss": 0.6272, "step": 217850 }, { "epoch": 1.3918454442073522, "grad_norm": 0.8472849130630493, "learning_rate": 2.1166518587715932e-05, "loss": 0.7776, "step": 217860 }, { "epoch": 1.3919093313570907, "grad_norm": 1.085749864578247, "learning_rate": 2.1162419397452814e-05, "loss": 0.734, "step": 217870 }, { "epoch": 1.3919732185068296, "grad_norm": 0.9656279683113098, "learning_rate": 2.1158320497609117e-05, "loss": 0.7615, "step": 217880 }, { "epoch": 1.3920371056565681, "grad_norm": 0.6101782917976379, "learning_rate": 2.1154221888226046e-05, "loss": 1.0044, "step": 217890 }, { "epoch": 1.392100992806307, "grad_norm": 1.6533408164978027, "learning_rate": 2.1150123569344938e-05, "loss": 1.1256, "step": 217900 }, { "epoch": 1.3921648799560455, "grad_norm": 0.6959205865859985, "learning_rate": 2.114602554100702e-05, "loss": 0.6763, "step": 217910 }, { "epoch": 1.3922287671057845, "grad_norm": 1.275519847869873, "learning_rate": 2.1141927803253608e-05, "loss": 0.703, "step": 217920 }, { "epoch": 1.392292654255523, "grad_norm": 1.3255060911178589, "learning_rate": 2.113783035612593e-05, "loss": 0.9429, "step": 217930 }, { "epoch": 1.3923565414052619, "grad_norm": 0.5562800765037537, "learning_rate": 2.1133733199665274e-05, "loss": 0.695, "step": 217940 }, { "epoch": 1.3924204285550004, "grad_norm": 1.1394197940826416, "learning_rate": 2.112963633391291e-05, "loss": 0.6777, "step": 217950 }, { "epoch": 1.392484315704739, "grad_norm": 1.1498068571090698, "learning_rate": 2.112553975891007e-05, "loss": 0.7624, "step": 217960 }, { "epoch": 1.3925482028544778, "grad_norm": 2.9445035457611084, "learning_rate": 2.1121443474698038e-05, "loss": 0.7222, "step": 217970 }, { "epoch": 1.3926120900042165, "grad_norm": 0.9396551847457886, "learning_rate": 2.111734748131804e-05, "loss": 0.8654, "step": 217980 }, { "epoch": 1.3926759771539552, "grad_norm": 0.7392997145652771, "learning_rate": 2.111325177881136e-05, "loss": 0.7883, "step": 217990 }, { "epoch": 1.392739864303694, "grad_norm": 0.49229753017425537, "learning_rate": 2.1109156367219202e-05, "loss": 0.8635, "step": 218000 }, { "epoch": 1.3928037514534326, "grad_norm": 1.3487190008163452, "learning_rate": 2.1105061246582854e-05, "loss": 1.0008, "step": 218010 }, { "epoch": 1.3928676386031713, "grad_norm": 0.7436345219612122, "learning_rate": 2.110096641694352e-05, "loss": 0.9823, "step": 218020 }, { "epoch": 1.39293152575291, "grad_norm": 0.8380775451660156, "learning_rate": 2.1096871878342472e-05, "loss": 0.9085, "step": 218030 }, { "epoch": 1.3929954129026487, "grad_norm": 0.9696984887123108, "learning_rate": 2.109277763082091e-05, "loss": 1.1286, "step": 218040 }, { "epoch": 1.3930593000523874, "grad_norm": 1.152116060256958, "learning_rate": 2.1088683674420108e-05, "loss": 0.8687, "step": 218050 }, { "epoch": 1.3931231872021261, "grad_norm": 0.7602722644805908, "learning_rate": 2.1084590009181248e-05, "loss": 1.1443, "step": 218060 }, { "epoch": 1.3931870743518648, "grad_norm": 0.7400329113006592, "learning_rate": 2.1080496635145604e-05, "loss": 0.7862, "step": 218070 }, { "epoch": 1.3932509615016035, "grad_norm": 1.3810337781906128, "learning_rate": 2.107640355235436e-05, "loss": 0.8758, "step": 218080 }, { "epoch": 1.3933148486513423, "grad_norm": 1.6315202713012695, "learning_rate": 2.1072310760848773e-05, "loss": 0.9272, "step": 218090 }, { "epoch": 1.393378735801081, "grad_norm": 0.7120254635810852, "learning_rate": 2.1068218260670024e-05, "loss": 0.9861, "step": 218100 }, { "epoch": 1.3934426229508197, "grad_norm": 1.1353204250335693, "learning_rate": 2.1064126051859363e-05, "loss": 0.7806, "step": 218110 }, { "epoch": 1.3935065101005584, "grad_norm": 0.9815794825553894, "learning_rate": 2.1060034134457967e-05, "loss": 0.9848, "step": 218120 }, { "epoch": 1.393570397250297, "grad_norm": 0.5680572390556335, "learning_rate": 2.105594250850707e-05, "loss": 0.9667, "step": 218130 }, { "epoch": 1.3936342844000358, "grad_norm": 0.7751454710960388, "learning_rate": 2.1051851174047886e-05, "loss": 0.8767, "step": 218140 }, { "epoch": 1.3936981715497745, "grad_norm": 1.4941065311431885, "learning_rate": 2.1047760131121587e-05, "loss": 0.9672, "step": 218150 }, { "epoch": 1.3937620586995132, "grad_norm": 0.8780585527420044, "learning_rate": 2.104366937976941e-05, "loss": 0.9633, "step": 218160 }, { "epoch": 1.393825945849252, "grad_norm": 0.7456322908401489, "learning_rate": 2.103957892003251e-05, "loss": 0.9585, "step": 218170 }, { "epoch": 1.3938898329989906, "grad_norm": 1.5976117849349976, "learning_rate": 2.1035488751952126e-05, "loss": 0.8625, "step": 218180 }, { "epoch": 1.3939537201487293, "grad_norm": 1.2155790328979492, "learning_rate": 2.1031398875569414e-05, "loss": 0.8107, "step": 218190 }, { "epoch": 1.394017607298468, "grad_norm": 0.7908154726028442, "learning_rate": 2.102730929092559e-05, "loss": 0.7408, "step": 218200 }, { "epoch": 1.3940814944482067, "grad_norm": 0.5561248660087585, "learning_rate": 2.102321999806181e-05, "loss": 0.8089, "step": 218210 }, { "epoch": 1.3941453815979454, "grad_norm": 0.9182764291763306, "learning_rate": 2.1019130997019286e-05, "loss": 0.9739, "step": 218220 }, { "epoch": 1.3942092687476841, "grad_norm": 1.1026109457015991, "learning_rate": 2.1015042287839176e-05, "loss": 1.1646, "step": 218230 }, { "epoch": 1.3942731558974228, "grad_norm": 2.6774260997772217, "learning_rate": 2.1010953870562676e-05, "loss": 0.7264, "step": 218240 }, { "epoch": 1.3943370430471616, "grad_norm": 0.9442703127861023, "learning_rate": 2.1006865745230935e-05, "loss": 0.8565, "step": 218250 }, { "epoch": 1.3944009301969003, "grad_norm": 0.9139619469642639, "learning_rate": 2.1002777911885156e-05, "loss": 0.7589, "step": 218260 }, { "epoch": 1.394464817346639, "grad_norm": 1.131667137145996, "learning_rate": 2.0998690370566478e-05, "loss": 0.8819, "step": 218270 }, { "epoch": 1.3945287044963777, "grad_norm": 1.2039259672164917, "learning_rate": 2.0994603121316086e-05, "loss": 0.7831, "step": 218280 }, { "epoch": 1.3945925916461164, "grad_norm": 0.6849585771560669, "learning_rate": 2.0990516164175116e-05, "loss": 0.7287, "step": 218290 }, { "epoch": 1.394656478795855, "grad_norm": 1.0747158527374268, "learning_rate": 2.0986429499184768e-05, "loss": 1.0633, "step": 218300 }, { "epoch": 1.3947203659455938, "grad_norm": 1.6333814859390259, "learning_rate": 2.0982343126386157e-05, "loss": 0.9955, "step": 218310 }, { "epoch": 1.3947842530953323, "grad_norm": 2.6120517253875732, "learning_rate": 2.0978257045820472e-05, "loss": 0.8598, "step": 218320 }, { "epoch": 1.3948481402450712, "grad_norm": 0.9668644070625305, "learning_rate": 2.0974171257528828e-05, "loss": 0.9431, "step": 218330 }, { "epoch": 1.3949120273948097, "grad_norm": 0.8444498777389526, "learning_rate": 2.0970085761552388e-05, "loss": 1.2379, "step": 218340 }, { "epoch": 1.3949759145445486, "grad_norm": 1.2543483972549438, "learning_rate": 2.096600055793232e-05, "loss": 1.2642, "step": 218350 }, { "epoch": 1.395039801694287, "grad_norm": 0.5028043389320374, "learning_rate": 2.096191564670974e-05, "loss": 0.9692, "step": 218360 }, { "epoch": 1.395103688844026, "grad_norm": 0.7030778527259827, "learning_rate": 2.0957831027925795e-05, "loss": 0.946, "step": 218370 }, { "epoch": 1.3951675759937645, "grad_norm": 0.9780330061912537, "learning_rate": 2.0953746701621597e-05, "loss": 0.8594, "step": 218380 }, { "epoch": 1.3952314631435034, "grad_norm": 0.7831057906150818, "learning_rate": 2.0949662667838316e-05, "loss": 0.8269, "step": 218390 }, { "epoch": 1.395295350293242, "grad_norm": 1.1478424072265625, "learning_rate": 2.0945578926617047e-05, "loss": 0.8945, "step": 218400 }, { "epoch": 1.3953592374429808, "grad_norm": 1.120334267616272, "learning_rate": 2.094149547799895e-05, "loss": 1.2116, "step": 218410 }, { "epoch": 1.3954231245927193, "grad_norm": 0.8612017035484314, "learning_rate": 2.0937412322025118e-05, "loss": 0.8987, "step": 218420 }, { "epoch": 1.395487011742458, "grad_norm": 1.0608981847763062, "learning_rate": 2.0933329458736704e-05, "loss": 0.8946, "step": 218430 }, { "epoch": 1.3955508988921967, "grad_norm": 3.73439884185791, "learning_rate": 2.0929246888174793e-05, "loss": 0.884, "step": 218440 }, { "epoch": 1.3956147860419355, "grad_norm": 0.8932453393936157, "learning_rate": 2.092516461038051e-05, "loss": 0.9874, "step": 218450 }, { "epoch": 1.3956786731916742, "grad_norm": 1.17633855342865, "learning_rate": 2.092108262539499e-05, "loss": 0.8117, "step": 218460 }, { "epoch": 1.3957425603414129, "grad_norm": 1.3153350353240967, "learning_rate": 2.0917000933259312e-05, "loss": 1.0372, "step": 218470 }, { "epoch": 1.3958064474911516, "grad_norm": 1.2601327896118164, "learning_rate": 2.0912919534014608e-05, "loss": 0.7329, "step": 218480 }, { "epoch": 1.3958703346408903, "grad_norm": 0.7555443048477173, "learning_rate": 2.0908838427701954e-05, "loss": 0.9326, "step": 218490 }, { "epoch": 1.395934221790629, "grad_norm": 0.595453679561615, "learning_rate": 2.090475761436248e-05, "loss": 0.9627, "step": 218500 }, { "epoch": 1.3959981089403677, "grad_norm": 1.642309308052063, "learning_rate": 2.090067709403725e-05, "loss": 0.7874, "step": 218510 }, { "epoch": 1.3960619960901064, "grad_norm": 4.310575008392334, "learning_rate": 2.0896596866767397e-05, "loss": 0.7205, "step": 218520 }, { "epoch": 1.396125883239845, "grad_norm": 0.8452205061912537, "learning_rate": 2.089251693259397e-05, "loss": 0.9381, "step": 218530 }, { "epoch": 1.3961897703895838, "grad_norm": 0.8055098056793213, "learning_rate": 2.08884372915581e-05, "loss": 0.7921, "step": 218540 }, { "epoch": 1.3962536575393225, "grad_norm": 1.0302882194519043, "learning_rate": 2.088435794370083e-05, "loss": 1.0015, "step": 218550 }, { "epoch": 1.3963175446890612, "grad_norm": 1.0790350437164307, "learning_rate": 2.088027888906329e-05, "loss": 0.7645, "step": 218560 }, { "epoch": 1.3963814318388, "grad_norm": 0.9331994652748108, "learning_rate": 2.087620012768651e-05, "loss": 0.9206, "step": 218570 }, { "epoch": 1.3964453189885386, "grad_norm": 1.6856683492660522, "learning_rate": 2.0872121659611604e-05, "loss": 1.0113, "step": 218580 }, { "epoch": 1.3965092061382773, "grad_norm": 0.9520609974861145, "learning_rate": 2.0868043484879625e-05, "loss": 0.8135, "step": 218590 }, { "epoch": 1.396573093288016, "grad_norm": 1.096556544303894, "learning_rate": 2.086396560353166e-05, "loss": 0.7393, "step": 218600 }, { "epoch": 1.3966369804377547, "grad_norm": 0.6126187443733215, "learning_rate": 2.0859888015608754e-05, "loss": 1.0076, "step": 218610 }, { "epoch": 1.3967008675874935, "grad_norm": 1.1883734464645386, "learning_rate": 2.0855810721151998e-05, "loss": 0.9263, "step": 218620 }, { "epoch": 1.3967647547372322, "grad_norm": 1.0129221677780151, "learning_rate": 2.0851733720202427e-05, "loss": 1.2883, "step": 218630 }, { "epoch": 1.3968286418869709, "grad_norm": 0.758244514465332, "learning_rate": 2.084765701280113e-05, "loss": 0.7941, "step": 218640 }, { "epoch": 1.3968925290367096, "grad_norm": 0.7279469966888428, "learning_rate": 2.084358059898913e-05, "loss": 0.6846, "step": 218650 }, { "epoch": 1.3969564161864483, "grad_norm": 0.8215343356132507, "learning_rate": 2.0839504478807505e-05, "loss": 0.7877, "step": 218660 }, { "epoch": 1.397020303336187, "grad_norm": 1.332100510597229, "learning_rate": 2.0835428652297305e-05, "loss": 0.6285, "step": 218670 }, { "epoch": 1.3970841904859257, "grad_norm": 0.7422265410423279, "learning_rate": 2.0831353119499557e-05, "loss": 1.1877, "step": 218680 }, { "epoch": 1.3971480776356644, "grad_norm": 0.9069400429725647, "learning_rate": 2.0827277880455336e-05, "loss": 0.6081, "step": 218690 }, { "epoch": 1.397211964785403, "grad_norm": 1.2797025442123413, "learning_rate": 2.0823202935205644e-05, "loss": 1.1428, "step": 218700 }, { "epoch": 1.3972758519351418, "grad_norm": 0.7592565417289734, "learning_rate": 2.0819128283791557e-05, "loss": 0.7437, "step": 218710 }, { "epoch": 1.3973397390848805, "grad_norm": 0.8271276354789734, "learning_rate": 2.081505392625408e-05, "loss": 0.7639, "step": 218720 }, { "epoch": 1.3974036262346192, "grad_norm": 0.9150506854057312, "learning_rate": 2.081097986263427e-05, "loss": 0.8553, "step": 218730 }, { "epoch": 1.397467513384358, "grad_norm": 0.5852721929550171, "learning_rate": 2.0806906092973134e-05, "loss": 0.7293, "step": 218740 }, { "epoch": 1.3975314005340966, "grad_norm": 1.0465751886367798, "learning_rate": 2.0802832617311724e-05, "loss": 1.1388, "step": 218750 }, { "epoch": 1.3975952876838353, "grad_norm": 1.1258282661437988, "learning_rate": 2.079875943569103e-05, "loss": 1.0519, "step": 218760 }, { "epoch": 1.397659174833574, "grad_norm": 1.0739339590072632, "learning_rate": 2.0794686548152108e-05, "loss": 0.7896, "step": 218770 }, { "epoch": 1.3977230619833128, "grad_norm": 1.094150185585022, "learning_rate": 2.0790613954735944e-05, "loss": 0.6958, "step": 218780 }, { "epoch": 1.3977869491330515, "grad_norm": 2.0920755863189697, "learning_rate": 2.0786541655483582e-05, "loss": 0.708, "step": 218790 }, { "epoch": 1.3978508362827902, "grad_norm": 1.8577488660812378, "learning_rate": 2.0782469650436005e-05, "loss": 0.5569, "step": 218800 }, { "epoch": 1.3979147234325286, "grad_norm": 1.0605297088623047, "learning_rate": 2.0778397939634247e-05, "loss": 0.6519, "step": 218810 }, { "epoch": 1.3979786105822676, "grad_norm": 0.8613771200180054, "learning_rate": 2.0774326523119288e-05, "loss": 0.8127, "step": 218820 }, { "epoch": 1.398042497732006, "grad_norm": 1.1038992404937744, "learning_rate": 2.077025540093216e-05, "loss": 0.8351, "step": 218830 }, { "epoch": 1.398106384881745, "grad_norm": 1.2433876991271973, "learning_rate": 2.0766184573113833e-05, "loss": 0.6526, "step": 218840 }, { "epoch": 1.3981702720314835, "grad_norm": 0.8283995985984802, "learning_rate": 2.0762114039705337e-05, "loss": 0.8199, "step": 218850 }, { "epoch": 1.3982341591812224, "grad_norm": 0.9119390845298767, "learning_rate": 2.0758043800747646e-05, "loss": 1.1715, "step": 218860 }, { "epoch": 1.3982980463309609, "grad_norm": 1.2474699020385742, "learning_rate": 2.0753973856281728e-05, "loss": 0.8878, "step": 218870 }, { "epoch": 1.3983619334806998, "grad_norm": 0.7916103005409241, "learning_rate": 2.074990420634862e-05, "loss": 0.8466, "step": 218880 }, { "epoch": 1.3984258206304383, "grad_norm": 0.9151920676231384, "learning_rate": 2.074583485098926e-05, "loss": 0.9369, "step": 218890 }, { "epoch": 1.3984897077801772, "grad_norm": 0.9492369890213013, "learning_rate": 2.0741765790244673e-05, "loss": 0.9729, "step": 218900 }, { "epoch": 1.3985535949299157, "grad_norm": 0.7728332281112671, "learning_rate": 2.0737697024155796e-05, "loss": 0.8331, "step": 218910 }, { "epoch": 1.3986174820796544, "grad_norm": 0.49122798442840576, "learning_rate": 2.073362855276364e-05, "loss": 0.9952, "step": 218920 }, { "epoch": 1.3986813692293931, "grad_norm": 0.6504753232002258, "learning_rate": 2.0729560376109147e-05, "loss": 0.7918, "step": 218930 }, { "epoch": 1.3987452563791318, "grad_norm": 0.7926653027534485, "learning_rate": 2.0725492494233328e-05, "loss": 0.7887, "step": 218940 }, { "epoch": 1.3988091435288705, "grad_norm": 1.5223766565322876, "learning_rate": 2.07214249071771e-05, "loss": 0.766, "step": 218950 }, { "epoch": 1.3988730306786092, "grad_norm": 1.3729267120361328, "learning_rate": 2.0717357614981476e-05, "loss": 0.7495, "step": 218960 }, { "epoch": 1.398936917828348, "grad_norm": 0.6617783308029175, "learning_rate": 2.0713290617687375e-05, "loss": 1.032, "step": 218970 }, { "epoch": 1.3990008049780867, "grad_norm": 0.7680843472480774, "learning_rate": 2.0709223915335774e-05, "loss": 1.1359, "step": 218980 }, { "epoch": 1.3990646921278254, "grad_norm": 0.45521071553230286, "learning_rate": 2.0705157507967642e-05, "loss": 0.7724, "step": 218990 }, { "epoch": 1.399128579277564, "grad_norm": 1.3348525762557983, "learning_rate": 2.0701091395623902e-05, "loss": 0.9884, "step": 219000 }, { "epoch": 1.3991924664273028, "grad_norm": 0.8209075331687927, "learning_rate": 2.0697025578345535e-05, "loss": 0.9539, "step": 219010 }, { "epoch": 1.3992563535770415, "grad_norm": 1.1476972103118896, "learning_rate": 2.069296005617346e-05, "loss": 0.7315, "step": 219020 }, { "epoch": 1.3993202407267802, "grad_norm": 1.3872950077056885, "learning_rate": 2.068889482914864e-05, "loss": 0.9604, "step": 219030 }, { "epoch": 1.399384127876519, "grad_norm": 0.8013013601303101, "learning_rate": 2.0684829897311998e-05, "loss": 0.8888, "step": 219040 }, { "epoch": 1.3994480150262576, "grad_norm": 0.8018562197685242, "learning_rate": 2.0680765260704494e-05, "loss": 0.7031, "step": 219050 }, { "epoch": 1.3995119021759963, "grad_norm": 1.0981850624084473, "learning_rate": 2.067670091936703e-05, "loss": 0.8978, "step": 219060 }, { "epoch": 1.399575789325735, "grad_norm": 1.376737356185913, "learning_rate": 2.0672636873340572e-05, "loss": 0.7644, "step": 219070 }, { "epoch": 1.3996396764754737, "grad_norm": 1.0223459005355835, "learning_rate": 2.0668573122666017e-05, "loss": 0.9286, "step": 219080 }, { "epoch": 1.3997035636252124, "grad_norm": 0.9271156191825867, "learning_rate": 2.0664509667384323e-05, "loss": 0.9949, "step": 219090 }, { "epoch": 1.3997674507749511, "grad_norm": 0.8813492655754089, "learning_rate": 2.066044650753638e-05, "loss": 0.7916, "step": 219100 }, { "epoch": 1.3998313379246898, "grad_norm": 1.286502480506897, "learning_rate": 2.065638364316314e-05, "loss": 0.8053, "step": 219110 }, { "epoch": 1.3998952250744285, "grad_norm": 0.827023446559906, "learning_rate": 2.0652321074305487e-05, "loss": 0.5385, "step": 219120 }, { "epoch": 1.3999591122241672, "grad_norm": 2.0776171684265137, "learning_rate": 2.0648258801004362e-05, "loss": 0.8984, "step": 219130 }, { "epoch": 1.400022999373906, "grad_norm": 0.699685275554657, "learning_rate": 2.064419682330065e-05, "loss": 0.9742, "step": 219140 }, { "epoch": 1.4000868865236447, "grad_norm": 0.9540247321128845, "learning_rate": 2.0640135141235288e-05, "loss": 0.9716, "step": 219150 }, { "epoch": 1.4001507736733834, "grad_norm": 0.72536700963974, "learning_rate": 2.063607375484915e-05, "loss": 0.8139, "step": 219160 }, { "epoch": 1.400214660823122, "grad_norm": 1.4952839612960815, "learning_rate": 2.063201266418317e-05, "loss": 0.8069, "step": 219170 }, { "epoch": 1.4002785479728608, "grad_norm": 0.8095305562019348, "learning_rate": 2.062795186927821e-05, "loss": 0.7897, "step": 219180 }, { "epoch": 1.4003424351225995, "grad_norm": 2.51078462600708, "learning_rate": 2.0623891370175187e-05, "loss": 0.9514, "step": 219190 }, { "epoch": 1.4004063222723382, "grad_norm": 0.8767799735069275, "learning_rate": 2.0619831166915005e-05, "loss": 0.9333, "step": 219200 }, { "epoch": 1.400470209422077, "grad_norm": 0.9809701442718506, "learning_rate": 2.0615771259538526e-05, "loss": 0.9765, "step": 219210 }, { "epoch": 1.4005340965718156, "grad_norm": 0.9264981746673584, "learning_rate": 2.0611711648086668e-05, "loss": 0.9022, "step": 219220 }, { "epoch": 1.4005979837215543, "grad_norm": 0.977389395236969, "learning_rate": 2.0607652332600285e-05, "loss": 0.9624, "step": 219230 }, { "epoch": 1.400661870871293, "grad_norm": 0.8509820699691772, "learning_rate": 2.0603593313120286e-05, "loss": 0.7289, "step": 219240 }, { "epoch": 1.4007257580210317, "grad_norm": 0.7682042717933655, "learning_rate": 2.0599534589687514e-05, "loss": 0.7303, "step": 219250 }, { "epoch": 1.4007896451707704, "grad_norm": 1.349259614944458, "learning_rate": 2.059547616234288e-05, "loss": 1.1691, "step": 219260 }, { "epoch": 1.4008535323205091, "grad_norm": 1.2627943754196167, "learning_rate": 2.059141803112723e-05, "loss": 0.8081, "step": 219270 }, { "epoch": 1.4009174194702476, "grad_norm": 1.5132968425750732, "learning_rate": 2.0587360196081452e-05, "loss": 0.8969, "step": 219280 }, { "epoch": 1.4009813066199865, "grad_norm": 0.8733160495758057, "learning_rate": 2.0583302657246388e-05, "loss": 0.9716, "step": 219290 }, { "epoch": 1.401045193769725, "grad_norm": 1.0363460779190063, "learning_rate": 2.0579245414662934e-05, "loss": 0.7691, "step": 219300 }, { "epoch": 1.401109080919464, "grad_norm": 1.1736512184143066, "learning_rate": 2.057518846837191e-05, "loss": 0.7217, "step": 219310 }, { "epoch": 1.4011729680692024, "grad_norm": 3.8237926959991455, "learning_rate": 2.0571131818414213e-05, "loss": 0.7578, "step": 219320 }, { "epoch": 1.4012368552189414, "grad_norm": 0.9899071455001831, "learning_rate": 2.0567075464830682e-05, "loss": 0.6559, "step": 219330 }, { "epoch": 1.4013007423686799, "grad_norm": 0.8879222869873047, "learning_rate": 2.0563019407662143e-05, "loss": 0.877, "step": 219340 }, { "epoch": 1.4013646295184188, "grad_norm": 1.2862142324447632, "learning_rate": 2.0558963646949487e-05, "loss": 0.8434, "step": 219350 }, { "epoch": 1.4014285166681573, "grad_norm": 1.0837254524230957, "learning_rate": 2.055490818273351e-05, "loss": 0.8101, "step": 219360 }, { "epoch": 1.4014924038178962, "grad_norm": 1.694610357284546, "learning_rate": 2.055085301505511e-05, "loss": 0.8474, "step": 219370 }, { "epoch": 1.4015562909676347, "grad_norm": 0.8167562484741211, "learning_rate": 2.054679814395507e-05, "loss": 1.0352, "step": 219380 }, { "epoch": 1.4016201781173736, "grad_norm": 1.1539112329483032, "learning_rate": 2.0542743569474277e-05, "loss": 0.8725, "step": 219390 }, { "epoch": 1.401684065267112, "grad_norm": 1.7117409706115723, "learning_rate": 2.0538689291653522e-05, "loss": 0.7507, "step": 219400 }, { "epoch": 1.4017479524168508, "grad_norm": 1.2127348184585571, "learning_rate": 2.0534635310533673e-05, "loss": 0.7178, "step": 219410 }, { "epoch": 1.4018118395665895, "grad_norm": 0.7299874424934387, "learning_rate": 2.0530581626155514e-05, "loss": 0.8289, "step": 219420 }, { "epoch": 1.4018757267163282, "grad_norm": 0.7044321894645691, "learning_rate": 2.0526528238559915e-05, "loss": 0.9426, "step": 219430 }, { "epoch": 1.401939613866067, "grad_norm": 1.472915530204773, "learning_rate": 2.0522475147787656e-05, "loss": 0.8895, "step": 219440 }, { "epoch": 1.4020035010158056, "grad_norm": 1.1369291543960571, "learning_rate": 2.051842235387959e-05, "loss": 0.7921, "step": 219450 }, { "epoch": 1.4020673881655443, "grad_norm": 0.7624445557594299, "learning_rate": 2.0514369856876503e-05, "loss": 0.7382, "step": 219460 }, { "epoch": 1.402131275315283, "grad_norm": 0.6840057373046875, "learning_rate": 2.0510317656819233e-05, "loss": 0.9609, "step": 219470 }, { "epoch": 1.4021951624650217, "grad_norm": 0.5969876646995544, "learning_rate": 2.050626575374856e-05, "loss": 0.9641, "step": 219480 }, { "epoch": 1.4022590496147604, "grad_norm": 1.046563744544983, "learning_rate": 2.0502214147705327e-05, "loss": 0.9015, "step": 219490 }, { "epoch": 1.4023229367644992, "grad_norm": 1.2152701616287231, "learning_rate": 2.0498162838730295e-05, "loss": 0.8255, "step": 219500 }, { "epoch": 1.4023868239142379, "grad_norm": 0.8147159814834595, "learning_rate": 2.0494111826864287e-05, "loss": 0.7135, "step": 219510 }, { "epoch": 1.4024507110639766, "grad_norm": 0.9122790098190308, "learning_rate": 2.0490061112148123e-05, "loss": 1.2854, "step": 219520 }, { "epoch": 1.4025145982137153, "grad_norm": 0.8407679796218872, "learning_rate": 2.048601069462255e-05, "loss": 1.038, "step": 219530 }, { "epoch": 1.402578485363454, "grad_norm": 0.7449984550476074, "learning_rate": 2.04819605743284e-05, "loss": 0.7232, "step": 219540 }, { "epoch": 1.4026423725131927, "grad_norm": 1.1216732263565063, "learning_rate": 2.0477910751306427e-05, "loss": 0.7442, "step": 219550 }, { "epoch": 1.4027062596629314, "grad_norm": 1.1450462341308594, "learning_rate": 2.0473861225597445e-05, "loss": 0.6044, "step": 219560 }, { "epoch": 1.40277014681267, "grad_norm": 1.2385889291763306, "learning_rate": 2.0469811997242218e-05, "loss": 0.7567, "step": 219570 }, { "epoch": 1.4028340339624088, "grad_norm": 0.7899599075317383, "learning_rate": 2.046576306628154e-05, "loss": 0.7281, "step": 219580 }, { "epoch": 1.4028979211121475, "grad_norm": 0.6492139101028442, "learning_rate": 2.0461714432756162e-05, "loss": 0.8082, "step": 219590 }, { "epoch": 1.4029618082618862, "grad_norm": 0.7320935726165771, "learning_rate": 2.0457666096706896e-05, "loss": 0.7874, "step": 219600 }, { "epoch": 1.403025695411625, "grad_norm": 0.9208077192306519, "learning_rate": 2.0453618058174473e-05, "loss": 1.1376, "step": 219610 }, { "epoch": 1.4030895825613636, "grad_norm": 1.1169242858886719, "learning_rate": 2.044957031719969e-05, "loss": 0.8853, "step": 219620 }, { "epoch": 1.4031534697111023, "grad_norm": 1.4261136054992676, "learning_rate": 2.044552287382328e-05, "loss": 0.9555, "step": 219630 }, { "epoch": 1.403217356860841, "grad_norm": 1.1854610443115234, "learning_rate": 2.0441475728086047e-05, "loss": 0.7595, "step": 219640 }, { "epoch": 1.4032812440105797, "grad_norm": 1.04494309425354, "learning_rate": 2.0437428880028704e-05, "loss": 0.8212, "step": 219650 }, { "epoch": 1.4033451311603184, "grad_norm": 0.8896396160125732, "learning_rate": 2.0433382329692048e-05, "loss": 0.8674, "step": 219660 }, { "epoch": 1.4034090183100572, "grad_norm": 0.9874494075775146, "learning_rate": 2.042933607711679e-05, "loss": 1.0137, "step": 219670 }, { "epoch": 1.4034729054597959, "grad_norm": 0.7782787084579468, "learning_rate": 2.0425290122343716e-05, "loss": 0.9088, "step": 219680 }, { "epoch": 1.4035367926095346, "grad_norm": 1.2257322072982788, "learning_rate": 2.0421244465413543e-05, "loss": 0.8651, "step": 219690 }, { "epoch": 1.4036006797592733, "grad_norm": 1.2603285312652588, "learning_rate": 2.0417199106367042e-05, "loss": 0.8384, "step": 219700 }, { "epoch": 1.403664566909012, "grad_norm": 1.0163233280181885, "learning_rate": 2.041315404524492e-05, "loss": 0.8613, "step": 219710 }, { "epoch": 1.4037284540587507, "grad_norm": 1.0572268962860107, "learning_rate": 2.0409109282087933e-05, "loss": 1.0124, "step": 219720 }, { "epoch": 1.4037923412084894, "grad_norm": 1.1266729831695557, "learning_rate": 2.0405064816936837e-05, "loss": 0.9166, "step": 219730 }, { "epoch": 1.403856228358228, "grad_norm": 1.2244828939437866, "learning_rate": 2.0401020649832318e-05, "loss": 0.8353, "step": 219740 }, { "epoch": 1.4039201155079668, "grad_norm": 1.0080479383468628, "learning_rate": 2.0396976780815153e-05, "loss": 0.8262, "step": 219750 }, { "epoch": 1.4039840026577055, "grad_norm": 0.8630098700523376, "learning_rate": 2.039293320992602e-05, "loss": 0.9429, "step": 219760 }, { "epoch": 1.404047889807444, "grad_norm": 0.6371837258338928, "learning_rate": 2.038888993720568e-05, "loss": 0.8733, "step": 219770 }, { "epoch": 1.404111776957183, "grad_norm": 0.805311381816864, "learning_rate": 2.038484696269482e-05, "loss": 0.6292, "step": 219780 }, { "epoch": 1.4041756641069214, "grad_norm": 1.1514561176300049, "learning_rate": 2.038080428643419e-05, "loss": 1.1225, "step": 219790 }, { "epoch": 1.4042395512566603, "grad_norm": 0.9797691702842712, "learning_rate": 2.0376761908464464e-05, "loss": 0.7642, "step": 219800 }, { "epoch": 1.4043034384063988, "grad_norm": 0.9690388441085815, "learning_rate": 2.03727198288264e-05, "loss": 0.9549, "step": 219810 }, { "epoch": 1.4043673255561377, "grad_norm": 0.8218302130699158, "learning_rate": 2.036867804756067e-05, "loss": 0.9085, "step": 219820 }, { "epoch": 1.4044312127058762, "grad_norm": 0.8038190007209778, "learning_rate": 2.0364636564707972e-05, "loss": 0.8697, "step": 219830 }, { "epoch": 1.4044950998556152, "grad_norm": 0.8588317632675171, "learning_rate": 2.0360595380309038e-05, "loss": 0.7064, "step": 219840 }, { "epoch": 1.4045589870053536, "grad_norm": 1.052468180656433, "learning_rate": 2.0356554494404534e-05, "loss": 0.9069, "step": 219850 }, { "epoch": 1.4046228741550926, "grad_norm": 0.8712472915649414, "learning_rate": 2.0352513907035187e-05, "loss": 0.697, "step": 219860 }, { "epoch": 1.404686761304831, "grad_norm": 0.8167270421981812, "learning_rate": 2.034847361824166e-05, "loss": 0.951, "step": 219870 }, { "epoch": 1.40475064845457, "grad_norm": 0.6242989897727966, "learning_rate": 2.034443362806467e-05, "loss": 1.0533, "step": 219880 }, { "epoch": 1.4048145356043085, "grad_norm": 0.9029278755187988, "learning_rate": 2.0340393936544872e-05, "loss": 0.9406, "step": 219890 }, { "epoch": 1.4048784227540472, "grad_norm": 1.5568900108337402, "learning_rate": 2.0336354543722986e-05, "loss": 0.9028, "step": 219900 }, { "epoch": 1.4049423099037859, "grad_norm": 1.0189552307128906, "learning_rate": 2.033231544963965e-05, "loss": 0.9335, "step": 219910 }, { "epoch": 1.4050061970535246, "grad_norm": 0.8458846807479858, "learning_rate": 2.032827665433559e-05, "loss": 1.0823, "step": 219920 }, { "epoch": 1.4050700842032633, "grad_norm": 1.2060385942459106, "learning_rate": 2.032423815785143e-05, "loss": 0.8751, "step": 219930 }, { "epoch": 1.405133971353002, "grad_norm": 0.7118062376976013, "learning_rate": 2.0320199960227882e-05, "loss": 0.9541, "step": 219940 }, { "epoch": 1.4051978585027407, "grad_norm": 0.9647814035415649, "learning_rate": 2.031616206150558e-05, "loss": 1.0803, "step": 219950 }, { "epoch": 1.4052617456524794, "grad_norm": 1.1308926343917847, "learning_rate": 2.0312124461725222e-05, "loss": 0.9342, "step": 219960 }, { "epoch": 1.4053256328022181, "grad_norm": 0.7198635935783386, "learning_rate": 2.030808716092744e-05, "loss": 1.0821, "step": 219970 }, { "epoch": 1.4053895199519568, "grad_norm": 1.2182941436767578, "learning_rate": 2.0304050159152928e-05, "loss": 0.6881, "step": 219980 }, { "epoch": 1.4054534071016955, "grad_norm": 0.7641770243644714, "learning_rate": 2.0300013456442295e-05, "loss": 0.8982, "step": 219990 }, { "epoch": 1.4055172942514342, "grad_norm": 1.0956320762634277, "learning_rate": 2.0295977052836245e-05, "loss": 1.013, "step": 220000 }, { "epoch": 1.405581181401173, "grad_norm": 1.0628162622451782, "learning_rate": 2.029194094837539e-05, "loss": 0.8651, "step": 220010 }, { "epoch": 1.4056450685509116, "grad_norm": 1.4605565071105957, "learning_rate": 2.02879051431004e-05, "loss": 0.9023, "step": 220020 }, { "epoch": 1.4057089557006504, "grad_norm": 1.8608735799789429, "learning_rate": 2.0283869637051893e-05, "loss": 1.0067, "step": 220030 }, { "epoch": 1.405772842850389, "grad_norm": 1.1879396438598633, "learning_rate": 2.0279834430270526e-05, "loss": 0.8728, "step": 220040 }, { "epoch": 1.4058367300001278, "grad_norm": 0.9974208474159241, "learning_rate": 2.0275799522796962e-05, "loss": 0.83, "step": 220050 }, { "epoch": 1.4059006171498665, "grad_norm": 2.049335241317749, "learning_rate": 2.0271764914671794e-05, "loss": 1.1515, "step": 220060 }, { "epoch": 1.4059645042996052, "grad_norm": 2.182713031768799, "learning_rate": 2.0267730605935686e-05, "loss": 0.9389, "step": 220070 }, { "epoch": 1.4060283914493439, "grad_norm": 0.6502020955085754, "learning_rate": 2.0263696596629235e-05, "loss": 0.9203, "step": 220080 }, { "epoch": 1.4060922785990826, "grad_norm": 0.9814128279685974, "learning_rate": 2.0259662886793102e-05, "loss": 0.9169, "step": 220090 }, { "epoch": 1.4061561657488213, "grad_norm": 0.8696256279945374, "learning_rate": 2.0255629476467873e-05, "loss": 1.0024, "step": 220100 }, { "epoch": 1.40622005289856, "grad_norm": 0.8340265154838562, "learning_rate": 2.0251596365694213e-05, "loss": 0.7545, "step": 220110 }, { "epoch": 1.4062839400482987, "grad_norm": 1.115959882736206, "learning_rate": 2.024756355451269e-05, "loss": 0.9034, "step": 220120 }, { "epoch": 1.4063478271980374, "grad_norm": 0.9420958161354065, "learning_rate": 2.024353104296396e-05, "loss": 1.0772, "step": 220130 }, { "epoch": 1.4064117143477761, "grad_norm": 0.8073174357414246, "learning_rate": 2.02394988310886e-05, "loss": 0.8894, "step": 220140 }, { "epoch": 1.4064756014975148, "grad_norm": 0.7026411890983582, "learning_rate": 2.0235466918927247e-05, "loss": 0.8304, "step": 220150 }, { "epoch": 1.4065394886472535, "grad_norm": 0.9653884768486023, "learning_rate": 2.023143530652048e-05, "loss": 1.1283, "step": 220160 }, { "epoch": 1.4066033757969922, "grad_norm": 0.9749668836593628, "learning_rate": 2.0227403993908928e-05, "loss": 1.1303, "step": 220170 }, { "epoch": 1.406667262946731, "grad_norm": 0.8139040470123291, "learning_rate": 2.0223372981133154e-05, "loss": 0.9092, "step": 220180 }, { "epoch": 1.4067311500964697, "grad_norm": 2.1971898078918457, "learning_rate": 2.02193422682338e-05, "loss": 0.6938, "step": 220190 }, { "epoch": 1.4067950372462084, "grad_norm": 1.9047714471817017, "learning_rate": 2.0215311855251406e-05, "loss": 0.8362, "step": 220200 }, { "epoch": 1.406858924395947, "grad_norm": 2.2644927501678467, "learning_rate": 2.0211281742226612e-05, "loss": 1.3439, "step": 220210 }, { "epoch": 1.4069228115456858, "grad_norm": 0.9895037412643433, "learning_rate": 2.0207251929199966e-05, "loss": 0.7277, "step": 220220 }, { "epoch": 1.4069866986954245, "grad_norm": 1.0718899965286255, "learning_rate": 2.0203222416212082e-05, "loss": 1.074, "step": 220230 }, { "epoch": 1.4070505858451632, "grad_norm": 1.2079740762710571, "learning_rate": 2.019919320330351e-05, "loss": 0.9072, "step": 220240 }, { "epoch": 1.4071144729949019, "grad_norm": 0.6357789635658264, "learning_rate": 2.019516429051484e-05, "loss": 0.7015, "step": 220250 }, { "epoch": 1.4071783601446404, "grad_norm": 1.1232203245162964, "learning_rate": 2.0191135677886668e-05, "loss": 0.8269, "step": 220260 }, { "epoch": 1.4072422472943793, "grad_norm": 1.065622091293335, "learning_rate": 2.0187107365459535e-05, "loss": 0.966, "step": 220270 }, { "epoch": 1.4073061344441178, "grad_norm": 1.32682466506958, "learning_rate": 2.0183079353274036e-05, "loss": 0.8666, "step": 220280 }, { "epoch": 1.4073700215938567, "grad_norm": 0.5304492712020874, "learning_rate": 2.0179051641370712e-05, "loss": 0.8609, "step": 220290 }, { "epoch": 1.4074339087435952, "grad_norm": 1.2137643098831177, "learning_rate": 2.017502422979015e-05, "loss": 0.7503, "step": 220300 }, { "epoch": 1.4074977958933341, "grad_norm": 0.9853841662406921, "learning_rate": 2.01709971185729e-05, "loss": 0.9774, "step": 220310 }, { "epoch": 1.4075616830430726, "grad_norm": 0.9920171499252319, "learning_rate": 2.0166970307759508e-05, "loss": 0.8804, "step": 220320 }, { "epoch": 1.4076255701928115, "grad_norm": 0.8332661986351013, "learning_rate": 2.0162943797390522e-05, "loss": 0.8944, "step": 220330 }, { "epoch": 1.40768945734255, "grad_norm": 0.9016265273094177, "learning_rate": 2.0158917587506522e-05, "loss": 0.8822, "step": 220340 }, { "epoch": 1.407753344492289, "grad_norm": 0.7224472165107727, "learning_rate": 2.0154891678148013e-05, "loss": 0.8487, "step": 220350 }, { "epoch": 1.4078172316420274, "grad_norm": 0.9430965781211853, "learning_rate": 2.0150866069355574e-05, "loss": 0.737, "step": 220360 }, { "epoch": 1.4078811187917664, "grad_norm": 1.0713948011398315, "learning_rate": 2.014684076116975e-05, "loss": 0.8471, "step": 220370 }, { "epoch": 1.4079450059415048, "grad_norm": 1.0659370422363281, "learning_rate": 2.0142815753631052e-05, "loss": 0.7762, "step": 220380 }, { "epoch": 1.4080088930912436, "grad_norm": 0.6916654706001282, "learning_rate": 2.0138791046780044e-05, "loss": 0.7892, "step": 220390 }, { "epoch": 1.4080727802409823, "grad_norm": 1.019726037979126, "learning_rate": 2.0134766640657228e-05, "loss": 1.0982, "step": 220400 }, { "epoch": 1.408136667390721, "grad_norm": 1.0331531763076782, "learning_rate": 2.0130742535303164e-05, "loss": 1.1579, "step": 220410 }, { "epoch": 1.4082005545404597, "grad_norm": 1.8808170557022095, "learning_rate": 2.0126718730758347e-05, "loss": 0.8189, "step": 220420 }, { "epoch": 1.4082644416901984, "grad_norm": 1.2670456171035767, "learning_rate": 2.0122695227063332e-05, "loss": 0.8223, "step": 220430 }, { "epoch": 1.408328328839937, "grad_norm": 1.2577275037765503, "learning_rate": 2.011867202425861e-05, "loss": 0.8128, "step": 220440 }, { "epoch": 1.4083922159896758, "grad_norm": 0.731774628162384, "learning_rate": 2.0114649122384727e-05, "loss": 1.4193, "step": 220450 }, { "epoch": 1.4084561031394145, "grad_norm": 0.6962316632270813, "learning_rate": 2.011062652148216e-05, "loss": 0.7145, "step": 220460 }, { "epoch": 1.4085199902891532, "grad_norm": 1.116998314857483, "learning_rate": 2.010660422159147e-05, "loss": 0.8743, "step": 220470 }, { "epoch": 1.408583877438892, "grad_norm": 1.2085821628570557, "learning_rate": 2.0102582222753114e-05, "loss": 0.798, "step": 220480 }, { "epoch": 1.4086477645886306, "grad_norm": 0.7292695641517639, "learning_rate": 2.0098560525007638e-05, "loss": 0.8246, "step": 220490 }, { "epoch": 1.4087116517383693, "grad_norm": 1.5681432485580444, "learning_rate": 2.0094539128395506e-05, "loss": 0.7658, "step": 220500 }, { "epoch": 1.408775538888108, "grad_norm": 0.6391398310661316, "learning_rate": 2.0090518032957255e-05, "loss": 1.0228, "step": 220510 }, { "epoch": 1.4088394260378467, "grad_norm": 1.8824732303619385, "learning_rate": 2.008649723873335e-05, "loss": 0.9237, "step": 220520 }, { "epoch": 1.4089033131875854, "grad_norm": 0.56192547082901, "learning_rate": 2.0082476745764304e-05, "loss": 0.6959, "step": 220530 }, { "epoch": 1.4089672003373241, "grad_norm": 0.6403698921203613, "learning_rate": 2.007845655409059e-05, "loss": 0.7584, "step": 220540 }, { "epoch": 1.4090310874870628, "grad_norm": 1.1955901384353638, "learning_rate": 2.007443666375272e-05, "loss": 0.8017, "step": 220550 }, { "epoch": 1.4090949746368016, "grad_norm": 1.067419409751892, "learning_rate": 2.007041707479115e-05, "loss": 0.8995, "step": 220560 }, { "epoch": 1.4091588617865403, "grad_norm": 1.870528221130371, "learning_rate": 2.0066397787246367e-05, "loss": 0.6883, "step": 220570 }, { "epoch": 1.409222748936279, "grad_norm": 1.0919605493545532, "learning_rate": 2.0062378801158872e-05, "loss": 0.974, "step": 220580 }, { "epoch": 1.4092866360860177, "grad_norm": 1.08636474609375, "learning_rate": 2.0058360116569103e-05, "loss": 1.0044, "step": 220590 }, { "epoch": 1.4093505232357564, "grad_norm": 0.803911030292511, "learning_rate": 2.0054341733517574e-05, "loss": 0.9509, "step": 220600 }, { "epoch": 1.409414410385495, "grad_norm": 1.357932686805725, "learning_rate": 2.0050323652044705e-05, "loss": 0.8243, "step": 220610 }, { "epoch": 1.4094782975352338, "grad_norm": 1.1059114933013916, "learning_rate": 2.0046305872191013e-05, "loss": 0.7825, "step": 220620 }, { "epoch": 1.4095421846849725, "grad_norm": 1.688194751739502, "learning_rate": 2.004228839399691e-05, "loss": 0.8504, "step": 220630 }, { "epoch": 1.4096060718347112, "grad_norm": 0.6670559644699097, "learning_rate": 2.00382712175029e-05, "loss": 0.828, "step": 220640 }, { "epoch": 1.40966995898445, "grad_norm": 1.0450772047042847, "learning_rate": 2.0034254342749402e-05, "loss": 0.6982, "step": 220650 }, { "epoch": 1.4097338461341886, "grad_norm": 1.279312252998352, "learning_rate": 2.0030237769776906e-05, "loss": 0.7851, "step": 220660 }, { "epoch": 1.4097977332839273, "grad_norm": 0.7498508095741272, "learning_rate": 2.0026221498625825e-05, "loss": 0.6041, "step": 220670 }, { "epoch": 1.409861620433666, "grad_norm": 0.9692347645759583, "learning_rate": 2.0022205529336642e-05, "loss": 1.1839, "step": 220680 }, { "epoch": 1.4099255075834047, "grad_norm": 1.1649972200393677, "learning_rate": 2.0018189861949764e-05, "loss": 1.0362, "step": 220690 }, { "epoch": 1.4099893947331434, "grad_norm": 0.7185434699058533, "learning_rate": 2.0014174496505673e-05, "loss": 0.7825, "step": 220700 }, { "epoch": 1.4100532818828821, "grad_norm": 1.2547842264175415, "learning_rate": 2.0010159433044766e-05, "loss": 0.8287, "step": 220710 }, { "epoch": 1.4101171690326209, "grad_norm": 0.8520128726959229, "learning_rate": 2.000614467160752e-05, "loss": 1.2777, "step": 220720 }, { "epoch": 1.4101810561823596, "grad_norm": 1.134917974472046, "learning_rate": 2.0002130212234322e-05, "loss": 0.6963, "step": 220730 }, { "epoch": 1.4102449433320983, "grad_norm": 0.7581837177276611, "learning_rate": 1.999811605496565e-05, "loss": 0.8686, "step": 220740 }, { "epoch": 1.4103088304818368, "grad_norm": 0.9078113436698914, "learning_rate": 1.999410219984188e-05, "loss": 1.0735, "step": 220750 }, { "epoch": 1.4103727176315757, "grad_norm": 2.1745991706848145, "learning_rate": 1.9990088646903477e-05, "loss": 0.8066, "step": 220760 }, { "epoch": 1.4104366047813142, "grad_norm": 0.9876050353050232, "learning_rate": 1.9986075396190828e-05, "loss": 0.6116, "step": 220770 }, { "epoch": 1.410500491931053, "grad_norm": 0.7553558945655823, "learning_rate": 1.998206244774437e-05, "loss": 0.8716, "step": 220780 }, { "epoch": 1.4105643790807916, "grad_norm": 0.6577632427215576, "learning_rate": 1.9978049801604542e-05, "loss": 0.9674, "step": 220790 }, { "epoch": 1.4106282662305305, "grad_norm": 0.951414167881012, "learning_rate": 1.997403745781169e-05, "loss": 1.0595, "step": 220800 }, { "epoch": 1.410692153380269, "grad_norm": 1.0069729089736938, "learning_rate": 1.9970025416406278e-05, "loss": 0.8617, "step": 220810 }, { "epoch": 1.410756040530008, "grad_norm": 1.0184953212738037, "learning_rate": 1.9966013677428668e-05, "loss": 1.0118, "step": 220820 }, { "epoch": 1.4108199276797464, "grad_norm": 0.9171404838562012, "learning_rate": 1.996200224091931e-05, "loss": 1.0464, "step": 220830 }, { "epoch": 1.4108838148294853, "grad_norm": 1.1307326555252075, "learning_rate": 1.995799110691855e-05, "loss": 0.9237, "step": 220840 }, { "epoch": 1.4109477019792238, "grad_norm": 2.443937063217163, "learning_rate": 1.995438134499613e-05, "loss": 0.8088, "step": 220850 }, { "epoch": 1.4110115891289625, "grad_norm": 0.9654321074485779, "learning_rate": 1.9950370785873044e-05, "loss": 0.9652, "step": 220860 }, { "epoch": 1.4110754762787012, "grad_norm": 0.7190786600112915, "learning_rate": 1.994636052937574e-05, "loss": 0.6677, "step": 220870 }, { "epoch": 1.41113936342844, "grad_norm": 0.5666462182998657, "learning_rate": 1.994235057554457e-05, "loss": 0.7352, "step": 220880 }, { "epoch": 1.4112032505781786, "grad_norm": 0.8541065454483032, "learning_rate": 1.993834092441993e-05, "loss": 0.7986, "step": 220890 }, { "epoch": 1.4112671377279173, "grad_norm": 0.6696165204048157, "learning_rate": 1.993433157604222e-05, "loss": 0.8192, "step": 220900 }, { "epoch": 1.411331024877656, "grad_norm": 1.8114312887191772, "learning_rate": 1.9930322530451783e-05, "loss": 1.041, "step": 220910 }, { "epoch": 1.4113949120273948, "grad_norm": 0.6636319160461426, "learning_rate": 1.9926313787689038e-05, "loss": 0.6572, "step": 220920 }, { "epoch": 1.4114587991771335, "grad_norm": 2.5401008129119873, "learning_rate": 1.9922305347794308e-05, "loss": 0.7537, "step": 220930 }, { "epoch": 1.4115226863268722, "grad_norm": 0.8678221702575684, "learning_rate": 1.991829721080802e-05, "loss": 0.8484, "step": 220940 }, { "epoch": 1.4115865734766109, "grad_norm": 1.05147385597229, "learning_rate": 1.9914289376770463e-05, "loss": 1.0464, "step": 220950 }, { "epoch": 1.4116504606263496, "grad_norm": 0.7742120027542114, "learning_rate": 1.991028184572206e-05, "loss": 0.8125, "step": 220960 }, { "epoch": 1.4117143477760883, "grad_norm": 0.7941192984580994, "learning_rate": 1.9906274617703136e-05, "loss": 0.805, "step": 220970 }, { "epoch": 1.411778234925827, "grad_norm": 0.6988613605499268, "learning_rate": 1.990226769275408e-05, "loss": 0.7882, "step": 220980 }, { "epoch": 1.4118421220755657, "grad_norm": 0.956760585308075, "learning_rate": 1.9898261070915203e-05, "loss": 0.7652, "step": 220990 }, { "epoch": 1.4119060092253044, "grad_norm": 0.8377808332443237, "learning_rate": 1.989425475222688e-05, "loss": 0.8399, "step": 221000 }, { "epoch": 1.411969896375043, "grad_norm": 0.8833522796630859, "learning_rate": 1.9890248736729477e-05, "loss": 1.0528, "step": 221010 }, { "epoch": 1.4120337835247818, "grad_norm": 1.3832995891571045, "learning_rate": 1.9886243024463298e-05, "loss": 0.8216, "step": 221020 }, { "epoch": 1.4120976706745205, "grad_norm": 1.0302939414978027, "learning_rate": 1.9882237615468724e-05, "loss": 0.9526, "step": 221030 }, { "epoch": 1.4121615578242592, "grad_norm": 1.5530582666397095, "learning_rate": 1.9878232509786054e-05, "loss": 0.9041, "step": 221040 }, { "epoch": 1.412225444973998, "grad_norm": 0.7477715015411377, "learning_rate": 1.9874227707455657e-05, "loss": 0.6952, "step": 221050 }, { "epoch": 1.4122893321237366, "grad_norm": 1.5359477996826172, "learning_rate": 1.9870223208517836e-05, "loss": 0.7534, "step": 221060 }, { "epoch": 1.4123532192734753, "grad_norm": 0.9684277772903442, "learning_rate": 1.986621901301295e-05, "loss": 1.0632, "step": 221070 }, { "epoch": 1.412417106423214, "grad_norm": 0.8300984501838684, "learning_rate": 1.9862215120981288e-05, "loss": 0.9463, "step": 221080 }, { "epoch": 1.4124809935729528, "grad_norm": 0.8654909729957581, "learning_rate": 1.9858211532463212e-05, "loss": 0.8561, "step": 221090 }, { "epoch": 1.4125448807226915, "grad_norm": 1.0056959390640259, "learning_rate": 1.9854208247499e-05, "loss": 0.7763, "step": 221100 }, { "epoch": 1.4126087678724302, "grad_norm": 1.648310899734497, "learning_rate": 1.9850205266129013e-05, "loss": 0.8435, "step": 221110 }, { "epoch": 1.4126726550221689, "grad_norm": 0.861349880695343, "learning_rate": 1.9846202588393526e-05, "loss": 0.8127, "step": 221120 }, { "epoch": 1.4127365421719076, "grad_norm": 1.2424954175949097, "learning_rate": 1.984220021433288e-05, "loss": 0.8182, "step": 221130 }, { "epoch": 1.4128004293216463, "grad_norm": 0.825581967830658, "learning_rate": 1.983819814398735e-05, "loss": 0.9335, "step": 221140 }, { "epoch": 1.412864316471385, "grad_norm": 0.7504009008407593, "learning_rate": 1.983419637739728e-05, "loss": 0.6404, "step": 221150 }, { "epoch": 1.4129282036211237, "grad_norm": 1.168960690498352, "learning_rate": 1.983019491460293e-05, "loss": 0.7979, "step": 221160 }, { "epoch": 1.4129920907708624, "grad_norm": 0.9512644410133362, "learning_rate": 1.9826193755644636e-05, "loss": 0.8454, "step": 221170 }, { "epoch": 1.4130559779206011, "grad_norm": 0.5762017965316772, "learning_rate": 1.9822192900562658e-05, "loss": 0.9529, "step": 221180 }, { "epoch": 1.4131198650703398, "grad_norm": 0.9827634692192078, "learning_rate": 1.9818192349397317e-05, "loss": 0.5732, "step": 221190 }, { "epoch": 1.4131837522200785, "grad_norm": 1.3534090518951416, "learning_rate": 1.981419210218888e-05, "loss": 0.9992, "step": 221200 }, { "epoch": 1.4132476393698172, "grad_norm": 1.1091433763504028, "learning_rate": 1.9810192158977635e-05, "loss": 1.0782, "step": 221210 }, { "epoch": 1.413311526519556, "grad_norm": 0.8881558179855347, "learning_rate": 1.98061925198039e-05, "loss": 0.9678, "step": 221220 }, { "epoch": 1.4133754136692946, "grad_norm": 0.8820149898529053, "learning_rate": 1.9802193184707907e-05, "loss": 0.7579, "step": 221230 }, { "epoch": 1.4134393008190331, "grad_norm": 0.8992303609848022, "learning_rate": 1.9798194153729964e-05, "loss": 0.9235, "step": 221240 }, { "epoch": 1.413503187968772, "grad_norm": 1.0931174755096436, "learning_rate": 1.9794195426910322e-05, "loss": 0.7635, "step": 221250 }, { "epoch": 1.4135670751185105, "grad_norm": 0.724915623664856, "learning_rate": 1.9790197004289284e-05, "loss": 0.9907, "step": 221260 }, { "epoch": 1.4136309622682495, "grad_norm": 0.7087599039077759, "learning_rate": 1.9786198885907075e-05, "loss": 0.5267, "step": 221270 }, { "epoch": 1.413694849417988, "grad_norm": 1.4535980224609375, "learning_rate": 1.9782201071804e-05, "loss": 1.1702, "step": 221280 }, { "epoch": 1.4137587365677269, "grad_norm": 0.7310057878494263, "learning_rate": 1.9778203562020287e-05, "loss": 1.1342, "step": 221290 }, { "epoch": 1.4138226237174654, "grad_norm": 1.2365026473999023, "learning_rate": 1.9774206356596227e-05, "loss": 0.7644, "step": 221300 }, { "epoch": 1.4138865108672043, "grad_norm": 0.9791273474693298, "learning_rate": 1.9770209455572043e-05, "loss": 0.7834, "step": 221310 }, { "epoch": 1.4139503980169428, "grad_norm": 0.9656670093536377, "learning_rate": 1.9766212858988014e-05, "loss": 0.8897, "step": 221320 }, { "epoch": 1.4140142851666817, "grad_norm": 1.2244353294372559, "learning_rate": 1.976221656688436e-05, "loss": 0.9983, "step": 221330 }, { "epoch": 1.4140781723164202, "grad_norm": 1.1544291973114014, "learning_rate": 1.975822057930137e-05, "loss": 0.946, "step": 221340 }, { "epoch": 1.414142059466159, "grad_norm": 1.3128889799118042, "learning_rate": 1.975422489627924e-05, "loss": 0.9674, "step": 221350 }, { "epoch": 1.4142059466158976, "grad_norm": 0.965920090675354, "learning_rate": 1.9750229517858243e-05, "loss": 0.9747, "step": 221360 }, { "epoch": 1.4142698337656363, "grad_norm": 0.6471821069717407, "learning_rate": 1.9746234444078592e-05, "loss": 0.7858, "step": 221370 }, { "epoch": 1.414333720915375, "grad_norm": 1.2321809530258179, "learning_rate": 1.974223967498055e-05, "loss": 0.7847, "step": 221380 }, { "epoch": 1.4143976080651137, "grad_norm": 1.0316810607910156, "learning_rate": 1.9738245210604317e-05, "loss": 0.6993, "step": 221390 }, { "epoch": 1.4144614952148524, "grad_norm": 0.6123872399330139, "learning_rate": 1.9734251050990148e-05, "loss": 0.8941, "step": 221400 }, { "epoch": 1.4145253823645911, "grad_norm": 0.8233139514923096, "learning_rate": 1.9730257196178244e-05, "loss": 0.732, "step": 221410 }, { "epoch": 1.4145892695143298, "grad_norm": 0.9176562428474426, "learning_rate": 1.972626364620885e-05, "loss": 0.7941, "step": 221420 }, { "epoch": 1.4146531566640685, "grad_norm": 1.1283241510391235, "learning_rate": 1.9722270401122166e-05, "loss": 1.0854, "step": 221430 }, { "epoch": 1.4147170438138073, "grad_norm": 0.9821565747261047, "learning_rate": 1.97182774609584e-05, "loss": 0.7775, "step": 221440 }, { "epoch": 1.414780930963546, "grad_norm": 1.1683138608932495, "learning_rate": 1.9714284825757795e-05, "loss": 1.1725, "step": 221450 }, { "epoch": 1.4148448181132847, "grad_norm": 0.8943763375282288, "learning_rate": 1.9710292495560527e-05, "loss": 0.6814, "step": 221460 }, { "epoch": 1.4149087052630234, "grad_norm": 1.9581735134124756, "learning_rate": 1.9706300470406837e-05, "loss": 0.6614, "step": 221470 }, { "epoch": 1.414972592412762, "grad_norm": 1.9280062913894653, "learning_rate": 1.9702308750336884e-05, "loss": 0.8017, "step": 221480 }, { "epoch": 1.4150364795625008, "grad_norm": 1.0093328952789307, "learning_rate": 1.9698317335390916e-05, "loss": 1.0391, "step": 221490 }, { "epoch": 1.4151003667122395, "grad_norm": 0.77775639295578, "learning_rate": 1.969432622560909e-05, "loss": 0.9507, "step": 221500 }, { "epoch": 1.4151642538619782, "grad_norm": 1.1072726249694824, "learning_rate": 1.969033542103163e-05, "loss": 0.7385, "step": 221510 }, { "epoch": 1.415228141011717, "grad_norm": 1.4591403007507324, "learning_rate": 1.96863449216987e-05, "loss": 1.0868, "step": 221520 }, { "epoch": 1.4152920281614556, "grad_norm": 1.0994980335235596, "learning_rate": 1.9682354727650505e-05, "loss": 0.8326, "step": 221530 }, { "epoch": 1.4153559153111943, "grad_norm": 1.7113072872161865, "learning_rate": 1.9678364838927238e-05, "loss": 0.7034, "step": 221540 }, { "epoch": 1.415419802460933, "grad_norm": 1.114270806312561, "learning_rate": 1.9674375255569055e-05, "loss": 0.7392, "step": 221550 }, { "epoch": 1.4154836896106717, "grad_norm": 1.084441065788269, "learning_rate": 1.9670385977616167e-05, "loss": 0.9154, "step": 221560 }, { "epoch": 1.4155475767604104, "grad_norm": 1.159225344657898, "learning_rate": 1.966639700510871e-05, "loss": 0.9085, "step": 221570 }, { "epoch": 1.4156114639101491, "grad_norm": 0.9540026783943176, "learning_rate": 1.9662408338086897e-05, "loss": 0.8693, "step": 221580 }, { "epoch": 1.4156753510598878, "grad_norm": 0.7180683612823486, "learning_rate": 1.9658419976590858e-05, "loss": 0.7765, "step": 221590 }, { "epoch": 1.4157392382096265, "grad_norm": 0.9916054606437683, "learning_rate": 1.96544319206608e-05, "loss": 0.8833, "step": 221600 }, { "epoch": 1.4158031253593653, "grad_norm": 0.6650932431221008, "learning_rate": 1.9650444170336846e-05, "loss": 0.6361, "step": 221610 }, { "epoch": 1.415867012509104, "grad_norm": 1.0005336999893188, "learning_rate": 1.964645672565919e-05, "loss": 0.7585, "step": 221620 }, { "epoch": 1.4159308996588427, "grad_norm": 1.8848949670791626, "learning_rate": 1.964246958666796e-05, "loss": 0.7068, "step": 221630 }, { "epoch": 1.4159947868085814, "grad_norm": 1.0205187797546387, "learning_rate": 1.963848275340334e-05, "loss": 0.865, "step": 221640 }, { "epoch": 1.41605867395832, "grad_norm": 1.4636200666427612, "learning_rate": 1.963449622590545e-05, "loss": 1.0402, "step": 221650 }, { "epoch": 1.4161225611080588, "grad_norm": 0.8260305523872375, "learning_rate": 1.963051000421447e-05, "loss": 0.7022, "step": 221660 }, { "epoch": 1.4161864482577975, "grad_norm": 2.168660879135132, "learning_rate": 1.9626524088370512e-05, "loss": 0.871, "step": 221670 }, { "epoch": 1.4162503354075362, "grad_norm": 1.222449541091919, "learning_rate": 1.9622538478413747e-05, "loss": 0.7785, "step": 221680 }, { "epoch": 1.416314222557275, "grad_norm": 1.6448215246200562, "learning_rate": 1.9618553174384284e-05, "loss": 0.8298, "step": 221690 }, { "epoch": 1.4163781097070136, "grad_norm": 0.6294973492622375, "learning_rate": 1.961456817632229e-05, "loss": 0.7221, "step": 221700 }, { "epoch": 1.416441996856752, "grad_norm": 1.326272964477539, "learning_rate": 1.9610583484267864e-05, "loss": 0.8475, "step": 221710 }, { "epoch": 1.416505884006491, "grad_norm": 0.6224335432052612, "learning_rate": 1.9606599098261175e-05, "loss": 0.9709, "step": 221720 }, { "epoch": 1.4165697711562295, "grad_norm": 0.9180363416671753, "learning_rate": 1.9602615018342307e-05, "loss": 0.9115, "step": 221730 }, { "epoch": 1.4166336583059684, "grad_norm": 0.8147386908531189, "learning_rate": 1.9598631244551402e-05, "loss": 0.8754, "step": 221740 }, { "epoch": 1.416697545455707, "grad_norm": 0.6118002533912659, "learning_rate": 1.95946477769286e-05, "loss": 0.6263, "step": 221750 }, { "epoch": 1.4167614326054458, "grad_norm": 0.5602393746376038, "learning_rate": 1.959066461551398e-05, "loss": 0.9374, "step": 221760 }, { "epoch": 1.4168253197551843, "grad_norm": 0.7014377117156982, "learning_rate": 1.9586681760347692e-05, "loss": 0.8284, "step": 221770 }, { "epoch": 1.4168892069049233, "grad_norm": 0.9873439073562622, "learning_rate": 1.9582699211469814e-05, "loss": 0.6845, "step": 221780 }, { "epoch": 1.4169530940546617, "grad_norm": 0.8853866457939148, "learning_rate": 1.957871696892049e-05, "loss": 0.7459, "step": 221790 }, { "epoch": 1.4170169812044007, "grad_norm": 0.8921706676483154, "learning_rate": 1.957473503273978e-05, "loss": 0.9808, "step": 221800 }, { "epoch": 1.4170808683541392, "grad_norm": 1.1913554668426514, "learning_rate": 1.9570753402967834e-05, "loss": 0.7517, "step": 221810 }, { "epoch": 1.417144755503878, "grad_norm": 0.9814984202384949, "learning_rate": 1.9566772079644706e-05, "loss": 0.8787, "step": 221820 }, { "epoch": 1.4172086426536166, "grad_norm": 0.6580504179000854, "learning_rate": 1.9562791062810533e-05, "loss": 0.724, "step": 221830 }, { "epoch": 1.4172725298033553, "grad_norm": 0.7003348469734192, "learning_rate": 1.9558810352505362e-05, "loss": 0.8837, "step": 221840 }, { "epoch": 1.417336416953094, "grad_norm": 1.384737253189087, "learning_rate": 1.9554829948769326e-05, "loss": 0.8948, "step": 221850 }, { "epoch": 1.4174003041028327, "grad_norm": 0.7046149969100952, "learning_rate": 1.9550849851642473e-05, "loss": 0.6696, "step": 221860 }, { "epoch": 1.4174641912525714, "grad_norm": 0.7566852569580078, "learning_rate": 1.9546870061164922e-05, "loss": 0.8452, "step": 221870 }, { "epoch": 1.41752807840231, "grad_norm": 1.0079212188720703, "learning_rate": 1.954289057737672e-05, "loss": 0.7338, "step": 221880 }, { "epoch": 1.4175919655520488, "grad_norm": 0.6846641898155212, "learning_rate": 1.9538911400317976e-05, "loss": 1.0205, "step": 221890 }, { "epoch": 1.4176558527017875, "grad_norm": 0.8684399724006653, "learning_rate": 1.9534932530028728e-05, "loss": 0.7658, "step": 221900 }, { "epoch": 1.4177197398515262, "grad_norm": 0.861835777759552, "learning_rate": 1.953095396654908e-05, "loss": 0.8338, "step": 221910 }, { "epoch": 1.417783627001265, "grad_norm": 1.1581312417984009, "learning_rate": 1.9526975709919092e-05, "loss": 0.7955, "step": 221920 }, { "epoch": 1.4178475141510036, "grad_norm": 1.2092033624649048, "learning_rate": 1.9522997760178803e-05, "loss": 0.6525, "step": 221930 }, { "epoch": 1.4179114013007423, "grad_norm": 1.3593804836273193, "learning_rate": 1.951902011736831e-05, "loss": 0.9873, "step": 221940 }, { "epoch": 1.417975288450481, "grad_norm": 1.110656976699829, "learning_rate": 1.951504278152763e-05, "loss": 0.7818, "step": 221950 }, { "epoch": 1.4180391756002197, "grad_norm": 0.5778676271438599, "learning_rate": 1.9511065752696866e-05, "loss": 0.9688, "step": 221960 }, { "epoch": 1.4181030627499585, "grad_norm": 0.8316310048103333, "learning_rate": 1.9507089030916027e-05, "loss": 1.0128, "step": 221970 }, { "epoch": 1.4181669498996972, "grad_norm": 0.8025345206260681, "learning_rate": 1.95031126162252e-05, "loss": 0.7758, "step": 221980 }, { "epoch": 1.4182308370494359, "grad_norm": 0.9378303289413452, "learning_rate": 1.9499136508664396e-05, "loss": 1.0748, "step": 221990 }, { "epoch": 1.4182947241991746, "grad_norm": 0.7751262784004211, "learning_rate": 1.949516070827369e-05, "loss": 0.761, "step": 222000 }, { "epoch": 1.4183586113489133, "grad_norm": 1.126213550567627, "learning_rate": 1.949118521509309e-05, "loss": 0.851, "step": 222010 }, { "epoch": 1.418422498498652, "grad_norm": 0.7336819171905518, "learning_rate": 1.9487210029162663e-05, "loss": 0.7735, "step": 222020 }, { "epoch": 1.4184863856483907, "grad_norm": 1.2958660125732422, "learning_rate": 1.9483235150522413e-05, "loss": 0.7772, "step": 222030 }, { "epoch": 1.4185502727981294, "grad_norm": 0.699873685836792, "learning_rate": 1.9479260579212404e-05, "loss": 0.7898, "step": 222040 }, { "epoch": 1.418614159947868, "grad_norm": 1.1349695920944214, "learning_rate": 1.9475286315272627e-05, "loss": 0.8856, "step": 222050 }, { "epoch": 1.4186780470976068, "grad_norm": 1.0587198734283447, "learning_rate": 1.9471312358743126e-05, "loss": 0.7165, "step": 222060 }, { "epoch": 1.4187419342473455, "grad_norm": 0.7114539742469788, "learning_rate": 1.9467338709663935e-05, "loss": 0.83, "step": 222070 }, { "epoch": 1.4188058213970842, "grad_norm": 1.09430992603302, "learning_rate": 1.946336536807504e-05, "loss": 0.9272, "step": 222080 }, { "epoch": 1.418869708546823, "grad_norm": 1.2134573459625244, "learning_rate": 1.9459392334016496e-05, "loss": 0.9753, "step": 222090 }, { "epoch": 1.4189335956965616, "grad_norm": 0.9118198156356812, "learning_rate": 1.9455419607528268e-05, "loss": 1.0493, "step": 222100 }, { "epoch": 1.4189974828463003, "grad_norm": 0.5956824421882629, "learning_rate": 1.945144718865042e-05, "loss": 0.6476, "step": 222110 }, { "epoch": 1.419061369996039, "grad_norm": 0.8295848965644836, "learning_rate": 1.9447475077422895e-05, "loss": 1.0601, "step": 222120 }, { "epoch": 1.4191252571457778, "grad_norm": 0.9054329991340637, "learning_rate": 1.9443503273885755e-05, "loss": 0.7685, "step": 222130 }, { "epoch": 1.4191891442955165, "grad_norm": 0.832612931728363, "learning_rate": 1.9439531778078947e-05, "loss": 0.9382, "step": 222140 }, { "epoch": 1.4192530314452552, "grad_norm": 0.8252206444740295, "learning_rate": 1.9435560590042512e-05, "loss": 0.7185, "step": 222150 }, { "epoch": 1.4193169185949939, "grad_norm": 0.9640223979949951, "learning_rate": 1.943158970981641e-05, "loss": 0.7232, "step": 222160 }, { "epoch": 1.4193808057447326, "grad_norm": 1.1477142572402954, "learning_rate": 1.9427619137440663e-05, "loss": 0.9039, "step": 222170 }, { "epoch": 1.4194446928944713, "grad_norm": 1.1876455545425415, "learning_rate": 1.9423648872955218e-05, "loss": 0.7363, "step": 222180 }, { "epoch": 1.41950858004421, "grad_norm": 0.6598142385482788, "learning_rate": 1.9419678916400104e-05, "loss": 0.7316, "step": 222190 }, { "epoch": 1.4195724671939485, "grad_norm": 0.8525545001029968, "learning_rate": 1.9415709267815252e-05, "loss": 0.891, "step": 222200 }, { "epoch": 1.4196363543436874, "grad_norm": 0.7906132340431213, "learning_rate": 1.9411739927240692e-05, "loss": 0.8702, "step": 222210 }, { "epoch": 1.4197002414934259, "grad_norm": 0.7197248935699463, "learning_rate": 1.9407770894716354e-05, "loss": 0.7535, "step": 222220 }, { "epoch": 1.4197641286431648, "grad_norm": 0.9192577004432678, "learning_rate": 1.940380217028225e-05, "loss": 0.954, "step": 222230 }, { "epoch": 1.4198280157929033, "grad_norm": 0.985266387462616, "learning_rate": 1.9399833753978308e-05, "loss": 0.7107, "step": 222240 }, { "epoch": 1.4198919029426422, "grad_norm": 1.0751519203186035, "learning_rate": 1.939586564584453e-05, "loss": 0.8886, "step": 222250 }, { "epoch": 1.4199557900923807, "grad_norm": 1.5335538387298584, "learning_rate": 1.9391897845920842e-05, "loss": 1.1657, "step": 222260 }, { "epoch": 1.4200196772421196, "grad_norm": 1.431660532951355, "learning_rate": 1.9387930354247224e-05, "loss": 0.952, "step": 222270 }, { "epoch": 1.4200835643918581, "grad_norm": 1.3148257732391357, "learning_rate": 1.938396317086365e-05, "loss": 0.9239, "step": 222280 }, { "epoch": 1.420147451541597, "grad_norm": 2.135805606842041, "learning_rate": 1.937999629581003e-05, "loss": 0.8645, "step": 222290 }, { "epoch": 1.4202113386913355, "grad_norm": 1.0679904222488403, "learning_rate": 1.937602972912636e-05, "loss": 0.799, "step": 222300 }, { "epoch": 1.4202752258410745, "grad_norm": 0.8440000414848328, "learning_rate": 1.9372063470852547e-05, "loss": 0.9153, "step": 222310 }, { "epoch": 1.420339112990813, "grad_norm": 1.7078415155410767, "learning_rate": 1.936809752102857e-05, "loss": 1.2632, "step": 222320 }, { "epoch": 1.4204030001405517, "grad_norm": 2.0827977657318115, "learning_rate": 1.936413187969433e-05, "loss": 0.7392, "step": 222330 }, { "epoch": 1.4204668872902904, "grad_norm": 1.0674457550048828, "learning_rate": 1.936016654688981e-05, "loss": 1.0084, "step": 222340 }, { "epoch": 1.420530774440029, "grad_norm": 0.8765248656272888, "learning_rate": 1.9356201522654892e-05, "loss": 1.0464, "step": 222350 }, { "epoch": 1.4205946615897678, "grad_norm": 1.205122947692871, "learning_rate": 1.9352236807029562e-05, "loss": 0.8186, "step": 222360 }, { "epoch": 1.4206585487395065, "grad_norm": 1.0993788242340088, "learning_rate": 1.93482724000537e-05, "loss": 1.1414, "step": 222370 }, { "epoch": 1.4207224358892452, "grad_norm": 0.7830728888511658, "learning_rate": 1.9344308301767274e-05, "loss": 0.6976, "step": 222380 }, { "epoch": 1.420786323038984, "grad_norm": 2.2178773880004883, "learning_rate": 1.9340344512210163e-05, "loss": 0.8951, "step": 222390 }, { "epoch": 1.4208502101887226, "grad_norm": 0.6909429430961609, "learning_rate": 1.9336381031422325e-05, "loss": 0.8694, "step": 222400 }, { "epoch": 1.4209140973384613, "grad_norm": 1.1704915761947632, "learning_rate": 1.933241785944366e-05, "loss": 0.8337, "step": 222410 }, { "epoch": 1.4209779844882, "grad_norm": 1.000505805015564, "learning_rate": 1.9328454996314055e-05, "loss": 0.9537, "step": 222420 }, { "epoch": 1.4210418716379387, "grad_norm": 0.865317702293396, "learning_rate": 1.932449244207346e-05, "loss": 0.8764, "step": 222430 }, { "epoch": 1.4211057587876774, "grad_norm": 2.7685742378234863, "learning_rate": 1.9320530196761753e-05, "loss": 0.965, "step": 222440 }, { "epoch": 1.4211696459374161, "grad_norm": 1.276869773864746, "learning_rate": 1.9316568260418867e-05, "loss": 1.4253, "step": 222450 }, { "epoch": 1.4212335330871548, "grad_norm": 1.2430510520935059, "learning_rate": 1.931260663308466e-05, "loss": 0.9924, "step": 222460 }, { "epoch": 1.4212974202368935, "grad_norm": 1.0641168355941772, "learning_rate": 1.9308645314799073e-05, "loss": 1.1704, "step": 222470 }, { "epoch": 1.4213613073866322, "grad_norm": 0.7764881253242493, "learning_rate": 1.930468430560196e-05, "loss": 0.7378, "step": 222480 }, { "epoch": 1.421425194536371, "grad_norm": 1.2852715253829956, "learning_rate": 1.9300723605533255e-05, "loss": 1.1468, "step": 222490 }, { "epoch": 1.4214890816861097, "grad_norm": 1.244436264038086, "learning_rate": 1.9296763214632796e-05, "loss": 0.6906, "step": 222500 }, { "epoch": 1.4215529688358484, "grad_norm": 0.7294203639030457, "learning_rate": 1.9292803132940518e-05, "loss": 0.6666, "step": 222510 }, { "epoch": 1.421616855985587, "grad_norm": 1.364741325378418, "learning_rate": 1.9288843360496255e-05, "loss": 0.9499, "step": 222520 }, { "epoch": 1.4216807431353258, "grad_norm": 1.0707260370254517, "learning_rate": 1.9284883897339927e-05, "loss": 0.9658, "step": 222530 }, { "epoch": 1.4217446302850645, "grad_norm": 1.0712471008300781, "learning_rate": 1.9280924743511382e-05, "loss": 0.9658, "step": 222540 }, { "epoch": 1.4218085174348032, "grad_norm": 1.0781949758529663, "learning_rate": 1.9276965899050507e-05, "loss": 1.0543, "step": 222550 }, { "epoch": 1.421872404584542, "grad_norm": 0.5351880192756653, "learning_rate": 1.9273007363997148e-05, "loss": 0.7134, "step": 222560 }, { "epoch": 1.4219362917342806, "grad_norm": 1.1996464729309082, "learning_rate": 1.926904913839121e-05, "loss": 0.8995, "step": 222570 }, { "epoch": 1.4220001788840193, "grad_norm": 0.900219738483429, "learning_rate": 1.9265091222272513e-05, "loss": 0.9481, "step": 222580 }, { "epoch": 1.422064066033758, "grad_norm": 2.1757538318634033, "learning_rate": 1.926113361568094e-05, "loss": 0.8004, "step": 222590 }, { "epoch": 1.4221279531834967, "grad_norm": 0.8302018046379089, "learning_rate": 1.9257176318656356e-05, "loss": 0.8924, "step": 222600 }, { "epoch": 1.4221918403332354, "grad_norm": 1.2007355690002441, "learning_rate": 1.9253219331238586e-05, "loss": 0.7996, "step": 222610 }, { "epoch": 1.4222557274829741, "grad_norm": 1.2114557027816772, "learning_rate": 1.924926265346752e-05, "loss": 0.9217, "step": 222620 }, { "epoch": 1.4223196146327128, "grad_norm": 0.8614585995674133, "learning_rate": 1.9245306285382957e-05, "loss": 0.7761, "step": 222630 }, { "epoch": 1.4223835017824515, "grad_norm": 1.270330786705017, "learning_rate": 1.924135022702479e-05, "loss": 0.5521, "step": 222640 }, { "epoch": 1.4224473889321902, "grad_norm": 1.1912163496017456, "learning_rate": 1.9237394478432818e-05, "loss": 0.8739, "step": 222650 }, { "epoch": 1.422511276081929, "grad_norm": 1.322155237197876, "learning_rate": 1.9233439039646917e-05, "loss": 0.8325, "step": 222660 }, { "epoch": 1.4225751632316677, "grad_norm": 0.9432783722877502, "learning_rate": 1.922948391070688e-05, "loss": 0.7395, "step": 222670 }, { "epoch": 1.4226390503814064, "grad_norm": 1.2607407569885254, "learning_rate": 1.9225529091652577e-05, "loss": 0.9211, "step": 222680 }, { "epoch": 1.4227029375311449, "grad_norm": 0.6521483063697815, "learning_rate": 1.9221574582523804e-05, "loss": 0.7439, "step": 222690 }, { "epoch": 1.4227668246808838, "grad_norm": 1.0877735614776611, "learning_rate": 1.921762038336042e-05, "loss": 1.1125, "step": 222700 }, { "epoch": 1.4228307118306223, "grad_norm": 0.8707333207130432, "learning_rate": 1.9213666494202216e-05, "loss": 0.8505, "step": 222710 }, { "epoch": 1.4228945989803612, "grad_norm": 0.8127386569976807, "learning_rate": 1.9209712915089035e-05, "loss": 1.0918, "step": 222720 }, { "epoch": 1.4229584861300997, "grad_norm": 0.8321516513824463, "learning_rate": 1.9205759646060668e-05, "loss": 0.8188, "step": 222730 }, { "epoch": 1.4230223732798386, "grad_norm": 0.5849027633666992, "learning_rate": 1.920180668715696e-05, "loss": 0.8457, "step": 222740 }, { "epoch": 1.423086260429577, "grad_norm": 0.9630666375160217, "learning_rate": 1.919785403841768e-05, "loss": 1.0351, "step": 222750 }, { "epoch": 1.423150147579316, "grad_norm": 1.0991421937942505, "learning_rate": 1.9193901699882683e-05, "loss": 0.922, "step": 222760 }, { "epoch": 1.4232140347290545, "grad_norm": 1.1798986196517944, "learning_rate": 1.9189949671591724e-05, "loss": 0.9662, "step": 222770 }, { "epoch": 1.4232779218787934, "grad_norm": 0.5852562189102173, "learning_rate": 1.9185997953584644e-05, "loss": 0.8046, "step": 222780 }, { "epoch": 1.423341809028532, "grad_norm": 0.9567358493804932, "learning_rate": 1.9182046545901204e-05, "loss": 0.9986, "step": 222790 }, { "epoch": 1.4234056961782708, "grad_norm": 1.1330355405807495, "learning_rate": 1.9178095448581224e-05, "loss": 0.7967, "step": 222800 }, { "epoch": 1.4234695833280093, "grad_norm": 1.0684672594070435, "learning_rate": 1.9174144661664493e-05, "loss": 1.1382, "step": 222810 }, { "epoch": 1.423533470477748, "grad_norm": 0.8078852891921997, "learning_rate": 1.9170194185190786e-05, "loss": 1.0976, "step": 222820 }, { "epoch": 1.4235973576274867, "grad_norm": 1.4376271963119507, "learning_rate": 1.9166244019199913e-05, "loss": 0.8219, "step": 222830 }, { "epoch": 1.4236612447772254, "grad_norm": 0.7955139875411987, "learning_rate": 1.9162294163731616e-05, "loss": 0.9684, "step": 222840 }, { "epoch": 1.4237251319269641, "grad_norm": 1.2134015560150146, "learning_rate": 1.9158344618825713e-05, "loss": 0.8365, "step": 222850 }, { "epoch": 1.4237890190767029, "grad_norm": 0.7520911693572998, "learning_rate": 1.9154395384521944e-05, "loss": 0.8027, "step": 222860 }, { "epoch": 1.4238529062264416, "grad_norm": 0.9657458066940308, "learning_rate": 1.9150446460860118e-05, "loss": 0.8404, "step": 222870 }, { "epoch": 1.4239167933761803, "grad_norm": 1.2599238157272339, "learning_rate": 1.9146497847879986e-05, "loss": 0.7761, "step": 222880 }, { "epoch": 1.423980680525919, "grad_norm": 0.6829902529716492, "learning_rate": 1.9142549545621307e-05, "loss": 0.8754, "step": 222890 }, { "epoch": 1.4240445676756577, "grad_norm": 1.3122057914733887, "learning_rate": 1.9138601554123837e-05, "loss": 0.9307, "step": 222900 }, { "epoch": 1.4241084548253964, "grad_norm": 0.788246214389801, "learning_rate": 1.9134653873427344e-05, "loss": 0.6951, "step": 222910 }, { "epoch": 1.424172341975135, "grad_norm": 1.1151480674743652, "learning_rate": 1.913070650357161e-05, "loss": 0.9483, "step": 222920 }, { "epoch": 1.4242362291248738, "grad_norm": 0.9124578833580017, "learning_rate": 1.9126759444596348e-05, "loss": 0.6496, "step": 222930 }, { "epoch": 1.4243001162746125, "grad_norm": 0.904630184173584, "learning_rate": 1.9122812696541348e-05, "loss": 0.72, "step": 222940 }, { "epoch": 1.4243640034243512, "grad_norm": 1.0377484560012817, "learning_rate": 1.9118866259446315e-05, "loss": 0.7401, "step": 222950 }, { "epoch": 1.42442789057409, "grad_norm": 0.8512157201766968, "learning_rate": 1.9114920133351033e-05, "loss": 0.9919, "step": 222960 }, { "epoch": 1.4244917777238286, "grad_norm": 0.9018188714981079, "learning_rate": 1.911097431829521e-05, "loss": 0.6751, "step": 222970 }, { "epoch": 1.4245556648735673, "grad_norm": 1.2587921619415283, "learning_rate": 1.9107028814318616e-05, "loss": 0.8381, "step": 222980 }, { "epoch": 1.424619552023306, "grad_norm": 0.847082257270813, "learning_rate": 1.910308362146095e-05, "loss": 0.878, "step": 222990 }, { "epoch": 1.4246834391730447, "grad_norm": 0.8812136650085449, "learning_rate": 1.9099138739761973e-05, "loss": 0.6974, "step": 223000 }, { "epoch": 1.4247473263227834, "grad_norm": 0.986804187297821, "learning_rate": 1.9095194169261394e-05, "loss": 1.0691, "step": 223010 }, { "epoch": 1.4248112134725222, "grad_norm": 0.7562146782875061, "learning_rate": 1.909124990999896e-05, "loss": 0.6436, "step": 223020 }, { "epoch": 1.4248751006222609, "grad_norm": 1.4277758598327637, "learning_rate": 1.9087305962014356e-05, "loss": 0.8546, "step": 223030 }, { "epoch": 1.4249389877719996, "grad_norm": 0.6766421794891357, "learning_rate": 1.9083362325347348e-05, "loss": 0.9694, "step": 223040 }, { "epoch": 1.4250028749217383, "grad_norm": 1.1126245260238647, "learning_rate": 1.9079419000037613e-05, "loss": 0.8527, "step": 223050 }, { "epoch": 1.425066762071477, "grad_norm": 1.1563136577606201, "learning_rate": 1.907547598612489e-05, "loss": 0.8045, "step": 223060 }, { "epoch": 1.4251306492212157, "grad_norm": 0.8701530694961548, "learning_rate": 1.9071533283648863e-05, "loss": 1.1362, "step": 223070 }, { "epoch": 1.4251945363709544, "grad_norm": 1.362408995628357, "learning_rate": 1.9067590892649266e-05, "loss": 0.8057, "step": 223080 }, { "epoch": 1.425258423520693, "grad_norm": 1.4804731607437134, "learning_rate": 1.906364881316578e-05, "loss": 1.2727, "step": 223090 }, { "epoch": 1.4253223106704318, "grad_norm": 1.0505961179733276, "learning_rate": 1.9059707045238125e-05, "loss": 0.7074, "step": 223100 }, { "epoch": 1.4253861978201705, "grad_norm": 0.7411073446273804, "learning_rate": 1.905576558890597e-05, "loss": 0.8723, "step": 223110 }, { "epoch": 1.4254500849699092, "grad_norm": 0.8923389911651611, "learning_rate": 1.9051824444209033e-05, "loss": 0.6809, "step": 223120 }, { "epoch": 1.425513972119648, "grad_norm": 1.6303932666778564, "learning_rate": 1.9047883611187006e-05, "loss": 0.8095, "step": 223130 }, { "epoch": 1.4255778592693866, "grad_norm": 2.7295801639556885, "learning_rate": 1.9043943089879557e-05, "loss": 0.8861, "step": 223140 }, { "epoch": 1.4256417464191253, "grad_norm": 0.7593581676483154, "learning_rate": 1.9040002880326397e-05, "loss": 0.8349, "step": 223150 }, { "epoch": 1.425705633568864, "grad_norm": 0.8572759628295898, "learning_rate": 1.9036062982567172e-05, "loss": 1.0088, "step": 223160 }, { "epoch": 1.4257695207186027, "grad_norm": 0.750809371471405, "learning_rate": 1.9032123396641605e-05, "loss": 0.8898, "step": 223170 }, { "epoch": 1.4258334078683412, "grad_norm": 1.0740511417388916, "learning_rate": 1.902818412258932e-05, "loss": 0.7032, "step": 223180 }, { "epoch": 1.4258972950180802, "grad_norm": 0.8483978509902954, "learning_rate": 1.902424516045004e-05, "loss": 0.8263, "step": 223190 }, { "epoch": 1.4259611821678186, "grad_norm": 0.8091272115707397, "learning_rate": 1.9020306510263392e-05, "loss": 0.6744, "step": 223200 }, { "epoch": 1.4260250693175576, "grad_norm": 0.9533054232597351, "learning_rate": 1.901636817206907e-05, "loss": 0.8155, "step": 223210 }, { "epoch": 1.426088956467296, "grad_norm": 1.1179161071777344, "learning_rate": 1.9012430145906714e-05, "loss": 0.9055, "step": 223220 }, { "epoch": 1.426152843617035, "grad_norm": 1.0814383029937744, "learning_rate": 1.9008492431816005e-05, "loss": 1.1812, "step": 223230 }, { "epoch": 1.4262167307667735, "grad_norm": 1.3198683261871338, "learning_rate": 1.900455502983658e-05, "loss": 0.8627, "step": 223240 }, { "epoch": 1.4262806179165124, "grad_norm": 2.693085193634033, "learning_rate": 1.9000617940008107e-05, "loss": 0.7759, "step": 223250 }, { "epoch": 1.4263445050662509, "grad_norm": 1.1490528583526611, "learning_rate": 1.899668116237022e-05, "loss": 0.9932, "step": 223260 }, { "epoch": 1.4264083922159898, "grad_norm": 1.3757820129394531, "learning_rate": 1.8992744696962594e-05, "loss": 0.7928, "step": 223270 }, { "epoch": 1.4264722793657283, "grad_norm": 1.5062553882598877, "learning_rate": 1.898880854382483e-05, "loss": 0.9733, "step": 223280 }, { "epoch": 1.426536166515467, "grad_norm": 0.8266945481300354, "learning_rate": 1.898487270299662e-05, "loss": 0.8319, "step": 223290 }, { "epoch": 1.4266000536652057, "grad_norm": 0.8893870115280151, "learning_rate": 1.898093717451755e-05, "loss": 0.7207, "step": 223300 }, { "epoch": 1.4266639408149444, "grad_norm": 4.273061752319336, "learning_rate": 1.8977001958427295e-05, "loss": 0.9901, "step": 223310 }, { "epoch": 1.4267278279646831, "grad_norm": 0.5634547472000122, "learning_rate": 1.8973067054765453e-05, "loss": 0.793, "step": 223320 }, { "epoch": 1.4267917151144218, "grad_norm": 1.0069791078567505, "learning_rate": 1.8969132463571664e-05, "loss": 0.8604, "step": 223330 }, { "epoch": 1.4268556022641605, "grad_norm": 1.3176125288009644, "learning_rate": 1.896519818488558e-05, "loss": 0.7742, "step": 223340 }, { "epoch": 1.4269194894138992, "grad_norm": 1.01974356174469, "learning_rate": 1.8961264218746776e-05, "loss": 0.854, "step": 223350 }, { "epoch": 1.426983376563638, "grad_norm": 1.006564974784851, "learning_rate": 1.8957330565194915e-05, "loss": 0.9466, "step": 223360 }, { "epoch": 1.4270472637133766, "grad_norm": 0.7808417081832886, "learning_rate": 1.8953397224269593e-05, "loss": 0.8487, "step": 223370 }, { "epoch": 1.4271111508631154, "grad_norm": 1.5651687383651733, "learning_rate": 1.8949464196010414e-05, "loss": 0.8292, "step": 223380 }, { "epoch": 1.427175038012854, "grad_norm": 0.9617428779602051, "learning_rate": 1.894553148045698e-05, "loss": 0.9321, "step": 223390 }, { "epoch": 1.4272389251625928, "grad_norm": 0.7266619801521301, "learning_rate": 1.8941599077648925e-05, "loss": 0.8823, "step": 223400 }, { "epoch": 1.4273028123123315, "grad_norm": 0.8688862919807434, "learning_rate": 1.8937666987625817e-05, "loss": 0.9043, "step": 223410 }, { "epoch": 1.4273666994620702, "grad_norm": 1.1119881868362427, "learning_rate": 1.89337352104273e-05, "loss": 0.8779, "step": 223420 }, { "epoch": 1.4274305866118089, "grad_norm": 1.5051636695861816, "learning_rate": 1.8929803746092923e-05, "loss": 1.0522, "step": 223430 }, { "epoch": 1.4274944737615476, "grad_norm": 1.0099921226501465, "learning_rate": 1.8925872594662304e-05, "loss": 1.1269, "step": 223440 }, { "epoch": 1.4275583609112863, "grad_norm": 1.209182620048523, "learning_rate": 1.8921941756175045e-05, "loss": 0.8918, "step": 223450 }, { "epoch": 1.427622248061025, "grad_norm": 1.6870150566101074, "learning_rate": 1.8918011230670708e-05, "loss": 0.8373, "step": 223460 }, { "epoch": 1.4276861352107637, "grad_norm": 1.0700960159301758, "learning_rate": 1.8914081018188895e-05, "loss": 0.8941, "step": 223470 }, { "epoch": 1.4277500223605024, "grad_norm": 0.8452657461166382, "learning_rate": 1.891015111876917e-05, "loss": 0.8807, "step": 223480 }, { "epoch": 1.4278139095102411, "grad_norm": 0.7489930987358093, "learning_rate": 1.8906221532451134e-05, "loss": 0.8034, "step": 223490 }, { "epoch": 1.4278777966599798, "grad_norm": 1.563072919845581, "learning_rate": 1.890229225927433e-05, "loss": 0.7062, "step": 223500 }, { "epoch": 1.4279416838097185, "grad_norm": 0.8351766467094421, "learning_rate": 1.8898363299278364e-05, "loss": 0.9484, "step": 223510 }, { "epoch": 1.4280055709594572, "grad_norm": 0.9991663098335266, "learning_rate": 1.8894434652502767e-05, "loss": 0.7625, "step": 223520 }, { "epoch": 1.428069458109196, "grad_norm": 0.950833261013031, "learning_rate": 1.8890506318987144e-05, "loss": 0.9521, "step": 223530 }, { "epoch": 1.4281333452589346, "grad_norm": 0.8913379311561584, "learning_rate": 1.8886578298771017e-05, "loss": 0.7806, "step": 223540 }, { "epoch": 1.4281972324086734, "grad_norm": 1.2399307489395142, "learning_rate": 1.888265059189398e-05, "loss": 0.7326, "step": 223550 }, { "epoch": 1.428261119558412, "grad_norm": 0.7313088774681091, "learning_rate": 1.887872319839556e-05, "loss": 1.1297, "step": 223560 }, { "epoch": 1.4283250067081508, "grad_norm": 1.8566665649414062, "learning_rate": 1.887479611831533e-05, "loss": 0.7582, "step": 223570 }, { "epoch": 1.4283888938578895, "grad_norm": 1.242580533027649, "learning_rate": 1.887086935169282e-05, "loss": 0.946, "step": 223580 }, { "epoch": 1.4284527810076282, "grad_norm": 0.9577218294143677, "learning_rate": 1.8866942898567596e-05, "loss": 0.9002, "step": 223590 }, { "epoch": 1.4285166681573669, "grad_norm": 0.7212193608283997, "learning_rate": 1.886301675897918e-05, "loss": 0.8361, "step": 223600 }, { "epoch": 1.4285805553071056, "grad_norm": 0.7417780160903931, "learning_rate": 1.885909093296714e-05, "loss": 0.8958, "step": 223610 }, { "epoch": 1.4286444424568443, "grad_norm": 1.352882981300354, "learning_rate": 1.8855165420570974e-05, "loss": 0.7512, "step": 223620 }, { "epoch": 1.428708329606583, "grad_norm": 0.7283663153648376, "learning_rate": 1.8851240221830258e-05, "loss": 0.7656, "step": 223630 }, { "epoch": 1.4287722167563217, "grad_norm": 0.8927812576293945, "learning_rate": 1.8847315336784477e-05, "loss": 0.9373, "step": 223640 }, { "epoch": 1.4288361039060604, "grad_norm": 1.8338420391082764, "learning_rate": 1.8843390765473184e-05, "loss": 0.7657, "step": 223650 }, { "epoch": 1.4288999910557991, "grad_norm": 0.784820556640625, "learning_rate": 1.8839466507935923e-05, "loss": 0.9875, "step": 223660 }, { "epoch": 1.4289638782055376, "grad_norm": 0.7442781329154968, "learning_rate": 1.8835542564212168e-05, "loss": 1.0937, "step": 223670 }, { "epoch": 1.4290277653552765, "grad_norm": 1.2108781337738037, "learning_rate": 1.883161893434148e-05, "loss": 0.791, "step": 223680 }, { "epoch": 1.429091652505015, "grad_norm": 1.1448874473571777, "learning_rate": 1.8827695618363334e-05, "loss": 1.29, "step": 223690 }, { "epoch": 1.429155539654754, "grad_norm": 0.8458133935928345, "learning_rate": 1.882377261631728e-05, "loss": 0.8695, "step": 223700 }, { "epoch": 1.4292194268044924, "grad_norm": 0.9109838604927063, "learning_rate": 1.8819849928242793e-05, "loss": 0.8017, "step": 223710 }, { "epoch": 1.4292833139542314, "grad_norm": 1.242732286453247, "learning_rate": 1.8815927554179408e-05, "loss": 0.7131, "step": 223720 }, { "epoch": 1.4293472011039698, "grad_norm": 0.6500130295753479, "learning_rate": 1.881200549416659e-05, "loss": 0.7161, "step": 223730 }, { "epoch": 1.4294110882537088, "grad_norm": 0.7395265102386475, "learning_rate": 1.8808083748243878e-05, "loss": 0.7819, "step": 223740 }, { "epoch": 1.4294749754034473, "grad_norm": 0.9619272947311401, "learning_rate": 1.880416231645073e-05, "loss": 0.8497, "step": 223750 }, { "epoch": 1.4295388625531862, "grad_norm": 0.80619215965271, "learning_rate": 1.8800241198826675e-05, "loss": 0.9684, "step": 223760 }, { "epoch": 1.4296027497029247, "grad_norm": 1.2067186832427979, "learning_rate": 1.879632039541116e-05, "loss": 0.8832, "step": 223770 }, { "epoch": 1.4296666368526634, "grad_norm": 0.5644568204879761, "learning_rate": 1.8792399906243712e-05, "loss": 0.9305, "step": 223780 }, { "epoch": 1.429730524002402, "grad_norm": 0.5826699733734131, "learning_rate": 1.8788479731363785e-05, "loss": 1.0084, "step": 223790 }, { "epoch": 1.4297944111521408, "grad_norm": 0.9067062735557556, "learning_rate": 1.878455987081088e-05, "loss": 0.9709, "step": 223800 }, { "epoch": 1.4298582983018795, "grad_norm": 1.0680228471755981, "learning_rate": 1.8781032265095477e-05, "loss": 0.7959, "step": 223810 }, { "epoch": 1.4299221854516182, "grad_norm": 0.8127917647361755, "learning_rate": 1.8777113001872633e-05, "loss": 0.8962, "step": 223820 }, { "epoch": 1.429986072601357, "grad_norm": 1.4489061832427979, "learning_rate": 1.8773194053091285e-05, "loss": 0.8774, "step": 223830 }, { "epoch": 1.4300499597510956, "grad_norm": 1.1648927927017212, "learning_rate": 1.876927541879088e-05, "loss": 1.0562, "step": 223840 }, { "epoch": 1.4301138469008343, "grad_norm": 1.0999516248703003, "learning_rate": 1.8765357099010898e-05, "loss": 0.6795, "step": 223850 }, { "epoch": 1.430177734050573, "grad_norm": 0.9541165828704834, "learning_rate": 1.876143909379078e-05, "loss": 0.8735, "step": 223860 }, { "epoch": 1.4302416212003117, "grad_norm": 0.7938796877861023, "learning_rate": 1.875752140317001e-05, "loss": 0.8024, "step": 223870 }, { "epoch": 1.4303055083500504, "grad_norm": 1.1307613849639893, "learning_rate": 1.875360402718801e-05, "loss": 0.8533, "step": 223880 }, { "epoch": 1.4303693954997891, "grad_norm": 1.4370204210281372, "learning_rate": 1.8749686965884273e-05, "loss": 0.81, "step": 223890 }, { "epoch": 1.4304332826495278, "grad_norm": 0.9484549164772034, "learning_rate": 1.8745770219298196e-05, "loss": 1.0642, "step": 223900 }, { "epoch": 1.4304971697992666, "grad_norm": 0.7258232831954956, "learning_rate": 1.8741853787469278e-05, "loss": 1.0068, "step": 223910 }, { "epoch": 1.4305610569490053, "grad_norm": 0.8766778111457825, "learning_rate": 1.8737937670436912e-05, "loss": 0.7022, "step": 223920 }, { "epoch": 1.430624944098744, "grad_norm": 0.6516861319541931, "learning_rate": 1.873402186824058e-05, "loss": 0.8096, "step": 223930 }, { "epoch": 1.4306888312484827, "grad_norm": 0.9304824471473694, "learning_rate": 1.8730106380919676e-05, "loss": 0.7141, "step": 223940 }, { "epoch": 1.4307527183982214, "grad_norm": 0.8284528255462646, "learning_rate": 1.8726191208513673e-05, "loss": 0.7919, "step": 223950 }, { "epoch": 1.43081660554796, "grad_norm": 1.0690491199493408, "learning_rate": 1.8722276351061963e-05, "loss": 0.8234, "step": 223960 }, { "epoch": 1.4308804926976988, "grad_norm": 1.2148635387420654, "learning_rate": 1.8718361808603984e-05, "loss": 0.7975, "step": 223970 }, { "epoch": 1.4309443798474375, "grad_norm": 0.83355712890625, "learning_rate": 1.871444758117919e-05, "loss": 1.0509, "step": 223980 }, { "epoch": 1.4310082669971762, "grad_norm": 0.8912057280540466, "learning_rate": 1.8710533668826953e-05, "loss": 0.9617, "step": 223990 }, { "epoch": 1.431072154146915, "grad_norm": 0.8748745918273926, "learning_rate": 1.8706620071586745e-05, "loss": 0.7828, "step": 224000 }, { "epoch": 1.4311360412966536, "grad_norm": 0.9734835028648376, "learning_rate": 1.870270678949791e-05, "loss": 0.9372, "step": 224010 }, { "epoch": 1.4311999284463923, "grad_norm": 1.132683277130127, "learning_rate": 1.8698793822599915e-05, "loss": 0.6881, "step": 224020 }, { "epoch": 1.431263815596131, "grad_norm": 1.0529520511627197, "learning_rate": 1.869488117093212e-05, "loss": 0.9852, "step": 224030 }, { "epoch": 1.4313277027458697, "grad_norm": 1.1331969499588013, "learning_rate": 1.8690968834533984e-05, "loss": 0.5732, "step": 224040 }, { "epoch": 1.4313915898956084, "grad_norm": 1.1039819717407227, "learning_rate": 1.8687056813444854e-05, "loss": 0.8991, "step": 224050 }, { "epoch": 1.4314554770453471, "grad_norm": 0.7155099511146545, "learning_rate": 1.8683145107704163e-05, "loss": 0.7931, "step": 224060 }, { "epoch": 1.4315193641950859, "grad_norm": 3.0403034687042236, "learning_rate": 1.8679233717351284e-05, "loss": 0.8228, "step": 224070 }, { "epoch": 1.4315832513448246, "grad_norm": 1.0688437223434448, "learning_rate": 1.8675322642425617e-05, "loss": 1.0601, "step": 224080 }, { "epoch": 1.4316471384945633, "grad_norm": 1.0098079442977905, "learning_rate": 1.8671411882966567e-05, "loss": 0.7916, "step": 224090 }, { "epoch": 1.431711025644302, "grad_norm": 0.9161421060562134, "learning_rate": 1.8667501439013487e-05, "loss": 0.7391, "step": 224100 }, { "epoch": 1.4317749127940407, "grad_norm": 0.9645119309425354, "learning_rate": 1.8663591310605786e-05, "loss": 1.0217, "step": 224110 }, { "epoch": 1.4318387999437794, "grad_norm": 1.7049353122711182, "learning_rate": 1.8659681497782823e-05, "loss": 0.7817, "step": 224120 }, { "epoch": 1.431902687093518, "grad_norm": 1.1278208494186401, "learning_rate": 1.8655772000583987e-05, "loss": 0.778, "step": 224130 }, { "epoch": 1.4319665742432566, "grad_norm": 1.1097828149795532, "learning_rate": 1.865186281904863e-05, "loss": 0.9847, "step": 224140 }, { "epoch": 1.4320304613929955, "grad_norm": 0.9624657034873962, "learning_rate": 1.864795395321615e-05, "loss": 0.6916, "step": 224150 }, { "epoch": 1.432094348542734, "grad_norm": 0.8190516829490662, "learning_rate": 1.8644045403125886e-05, "loss": 0.793, "step": 224160 }, { "epoch": 1.432158235692473, "grad_norm": 0.8392983675003052, "learning_rate": 1.8640137168817223e-05, "loss": 0.833, "step": 224170 }, { "epoch": 1.4322221228422114, "grad_norm": 1.075068712234497, "learning_rate": 1.8636229250329497e-05, "loss": 1.0602, "step": 224180 }, { "epoch": 1.4322860099919503, "grad_norm": 1.0051250457763672, "learning_rate": 1.863232164770209e-05, "loss": 0.7617, "step": 224190 }, { "epoch": 1.4323498971416888, "grad_norm": 1.2177984714508057, "learning_rate": 1.8628414360974323e-05, "loss": 0.7633, "step": 224200 }, { "epoch": 1.4324137842914277, "grad_norm": 0.8915864825248718, "learning_rate": 1.862450739018558e-05, "loss": 1.1079, "step": 224210 }, { "epoch": 1.4324776714411662, "grad_norm": 0.4424319565296173, "learning_rate": 1.8620600735375176e-05, "loss": 0.646, "step": 224220 }, { "epoch": 1.4325415585909052, "grad_norm": 1.0826150178909302, "learning_rate": 1.8616694396582484e-05, "loss": 0.9242, "step": 224230 }, { "epoch": 1.4326054457406436, "grad_norm": 0.7918615937232971, "learning_rate": 1.8612788373846817e-05, "loss": 1.115, "step": 224240 }, { "epoch": 1.4326693328903826, "grad_norm": 0.8647782206535339, "learning_rate": 1.860888266720754e-05, "loss": 0.8596, "step": 224250 }, { "epoch": 1.432733220040121, "grad_norm": 0.8612581491470337, "learning_rate": 1.8604977276703955e-05, "loss": 0.7004, "step": 224260 }, { "epoch": 1.4327971071898598, "grad_norm": 0.9314303398132324, "learning_rate": 1.8601072202375423e-05, "loss": 0.8431, "step": 224270 }, { "epoch": 1.4328609943395985, "grad_norm": 0.630257248878479, "learning_rate": 1.8597167444261247e-05, "loss": 0.7244, "step": 224280 }, { "epoch": 1.4329248814893372, "grad_norm": 0.8772875070571899, "learning_rate": 1.8593263002400758e-05, "loss": 0.7054, "step": 224290 }, { "epoch": 1.4329887686390759, "grad_norm": 0.9034510254859924, "learning_rate": 1.85893588768333e-05, "loss": 0.7146, "step": 224300 }, { "epoch": 1.4330526557888146, "grad_norm": 1.1122403144836426, "learning_rate": 1.8585455067598156e-05, "loss": 0.9885, "step": 224310 }, { "epoch": 1.4331165429385533, "grad_norm": 0.8106998205184937, "learning_rate": 1.8581551574734675e-05, "loss": 1.0353, "step": 224320 }, { "epoch": 1.433180430088292, "grad_norm": 0.9256942272186279, "learning_rate": 1.8577648398282127e-05, "loss": 0.9948, "step": 224330 }, { "epoch": 1.4332443172380307, "grad_norm": 1.1601678133010864, "learning_rate": 1.8573745538279864e-05, "loss": 0.9708, "step": 224340 }, { "epoch": 1.4333082043877694, "grad_norm": 0.8076596260070801, "learning_rate": 1.8569842994767156e-05, "loss": 0.9267, "step": 224350 }, { "epoch": 1.433372091537508, "grad_norm": 0.8912524580955505, "learning_rate": 1.8565940767783336e-05, "loss": 1.119, "step": 224360 }, { "epoch": 1.4334359786872468, "grad_norm": 1.3349778652191162, "learning_rate": 1.856203885736767e-05, "loss": 0.9859, "step": 224370 }, { "epoch": 1.4334998658369855, "grad_norm": 3.2930209636688232, "learning_rate": 1.8558137263559484e-05, "loss": 1.0891, "step": 224380 }, { "epoch": 1.4335637529867242, "grad_norm": 0.980534017086029, "learning_rate": 1.8554235986398045e-05, "loss": 0.8293, "step": 224390 }, { "epoch": 1.433627640136463, "grad_norm": 0.53654944896698, "learning_rate": 1.8550335025922667e-05, "loss": 0.896, "step": 224400 }, { "epoch": 1.4336915272862016, "grad_norm": 1.1580787897109985, "learning_rate": 1.854643438217261e-05, "loss": 1.0501, "step": 224410 }, { "epoch": 1.4337554144359403, "grad_norm": 0.7166798710823059, "learning_rate": 1.8542534055187182e-05, "loss": 0.7975, "step": 224420 }, { "epoch": 1.433819301585679, "grad_norm": 0.838466227054596, "learning_rate": 1.8538634045005637e-05, "loss": 0.8701, "step": 224430 }, { "epoch": 1.4338831887354178, "grad_norm": 1.2870148420333862, "learning_rate": 1.8534734351667284e-05, "loss": 0.8109, "step": 224440 }, { "epoch": 1.4339470758851565, "grad_norm": 2.708895206451416, "learning_rate": 1.853083497521136e-05, "loss": 0.7707, "step": 224450 }, { "epoch": 1.4340109630348952, "grad_norm": 0.9632241725921631, "learning_rate": 1.8526935915677168e-05, "loss": 0.7554, "step": 224460 }, { "epoch": 1.4340748501846339, "grad_norm": 1.179732084274292, "learning_rate": 1.8523037173103942e-05, "loss": 0.8122, "step": 224470 }, { "epoch": 1.4341387373343726, "grad_norm": 1.2000482082366943, "learning_rate": 1.8519138747530978e-05, "loss": 0.8552, "step": 224480 }, { "epoch": 1.4342026244841113, "grad_norm": 1.123895287513733, "learning_rate": 1.8515240638997523e-05, "loss": 0.9792, "step": 224490 }, { "epoch": 1.43426651163385, "grad_norm": 0.743156373500824, "learning_rate": 1.851134284754282e-05, "loss": 1.0077, "step": 224500 }, { "epoch": 1.4343303987835887, "grad_norm": 0.8791418671607971, "learning_rate": 1.8507445373206143e-05, "loss": 0.8268, "step": 224510 }, { "epoch": 1.4343942859333274, "grad_norm": 0.8102611303329468, "learning_rate": 1.850354821602673e-05, "loss": 0.8767, "step": 224520 }, { "epoch": 1.4344581730830661, "grad_norm": 1.0167714357376099, "learning_rate": 1.8499651376043846e-05, "loss": 0.9542, "step": 224530 }, { "epoch": 1.4345220602328048, "grad_norm": 1.4743082523345947, "learning_rate": 1.849575485329671e-05, "loss": 0.7113, "step": 224540 }, { "epoch": 1.4345859473825435, "grad_norm": 0.9514703154563904, "learning_rate": 1.8491858647824595e-05, "loss": 1.0415, "step": 224550 }, { "epoch": 1.4346498345322822, "grad_norm": 0.9601063132286072, "learning_rate": 1.8487962759666706e-05, "loss": 0.7705, "step": 224560 }, { "epoch": 1.434713721682021, "grad_norm": 0.943200409412384, "learning_rate": 1.8484067188862304e-05, "loss": 0.7712, "step": 224570 }, { "epoch": 1.4347776088317596, "grad_norm": 0.9697480201721191, "learning_rate": 1.8480171935450597e-05, "loss": 0.8612, "step": 224580 }, { "epoch": 1.4348414959814983, "grad_norm": 0.8412326574325562, "learning_rate": 1.847627699947085e-05, "loss": 0.9723, "step": 224590 }, { "epoch": 1.434905383131237, "grad_norm": 1.015438199043274, "learning_rate": 1.847238238096224e-05, "loss": 0.7827, "step": 224600 }, { "epoch": 1.4349692702809758, "grad_norm": 1.5689594745635986, "learning_rate": 1.8468488079964018e-05, "loss": 0.7896, "step": 224610 }, { "epoch": 1.4350331574307145, "grad_norm": 0.921557605266571, "learning_rate": 1.846459409651542e-05, "loss": 0.7594, "step": 224620 }, { "epoch": 1.435097044580453, "grad_norm": 1.0404754877090454, "learning_rate": 1.846070043065562e-05, "loss": 0.8123, "step": 224630 }, { "epoch": 1.4351609317301919, "grad_norm": 1.056254267692566, "learning_rate": 1.845680708242387e-05, "loss": 1.0377, "step": 224640 }, { "epoch": 1.4352248188799304, "grad_norm": 0.922844648361206, "learning_rate": 1.8452914051859344e-05, "loss": 0.9183, "step": 224650 }, { "epoch": 1.4352887060296693, "grad_norm": 0.7790452837944031, "learning_rate": 1.844902133900128e-05, "loss": 0.8256, "step": 224660 }, { "epoch": 1.4353525931794078, "grad_norm": 1.0011779069900513, "learning_rate": 1.8445128943888858e-05, "loss": 1.1991, "step": 224670 }, { "epoch": 1.4354164803291467, "grad_norm": 0.6932054162025452, "learning_rate": 1.8441236866561296e-05, "loss": 1.0215, "step": 224680 }, { "epoch": 1.4354803674788852, "grad_norm": 1.6760739088058472, "learning_rate": 1.843734510705777e-05, "loss": 1.1536, "step": 224690 }, { "epoch": 1.4355442546286241, "grad_norm": 0.8367016315460205, "learning_rate": 1.8433453665417493e-05, "loss": 0.9362, "step": 224700 }, { "epoch": 1.4356081417783626, "grad_norm": 0.8584269881248474, "learning_rate": 1.8429562541679633e-05, "loss": 0.8732, "step": 224710 }, { "epoch": 1.4356720289281015, "grad_norm": 0.7792062163352966, "learning_rate": 1.842567173588341e-05, "loss": 0.7278, "step": 224720 }, { "epoch": 1.43573591607784, "grad_norm": 0.6521714925765991, "learning_rate": 1.842178124806797e-05, "loss": 0.8583, "step": 224730 }, { "epoch": 1.435799803227579, "grad_norm": 0.612637460231781, "learning_rate": 1.841789107827253e-05, "loss": 0.5808, "step": 224740 }, { "epoch": 1.4358636903773174, "grad_norm": 1.4582802057266235, "learning_rate": 1.841400122653623e-05, "loss": 0.7482, "step": 224750 }, { "epoch": 1.4359275775270561, "grad_norm": 0.7831106781959534, "learning_rate": 1.8410111692898286e-05, "loss": 0.8334, "step": 224760 }, { "epoch": 1.4359914646767948, "grad_norm": 1.045304536819458, "learning_rate": 1.8406222477397822e-05, "loss": 0.8801, "step": 224770 }, { "epoch": 1.4360553518265335, "grad_norm": 0.7949215173721313, "learning_rate": 1.840233358007405e-05, "loss": 0.7311, "step": 224780 }, { "epoch": 1.4361192389762722, "grad_norm": 0.5786644816398621, "learning_rate": 1.83984450009661e-05, "loss": 0.8258, "step": 224790 }, { "epoch": 1.436183126126011, "grad_norm": 0.8888071775436401, "learning_rate": 1.8394556740113162e-05, "loss": 0.907, "step": 224800 }, { "epoch": 1.4362470132757497, "grad_norm": 1.349461555480957, "learning_rate": 1.8390668797554367e-05, "loss": 1.0114, "step": 224810 }, { "epoch": 1.4363109004254884, "grad_norm": 0.9334039688110352, "learning_rate": 1.8386781173328877e-05, "loss": 0.9106, "step": 224820 }, { "epoch": 1.436374787575227, "grad_norm": 0.9088615775108337, "learning_rate": 1.838289386747587e-05, "loss": 1.1062, "step": 224830 }, { "epoch": 1.4364386747249658, "grad_norm": 0.9782391786575317, "learning_rate": 1.837900688003446e-05, "loss": 0.9834, "step": 224840 }, { "epoch": 1.4365025618747045, "grad_norm": 0.988823652267456, "learning_rate": 1.8375120211043823e-05, "loss": 0.743, "step": 224850 }, { "epoch": 1.4365664490244432, "grad_norm": 1.2976255416870117, "learning_rate": 1.8371233860543063e-05, "loss": 0.829, "step": 224860 }, { "epoch": 1.436630336174182, "grad_norm": 0.9039852619171143, "learning_rate": 1.8367347828571364e-05, "loss": 0.9521, "step": 224870 }, { "epoch": 1.4366942233239206, "grad_norm": 1.0037953853607178, "learning_rate": 1.8363462115167818e-05, "loss": 0.8553, "step": 224880 }, { "epoch": 1.4367581104736593, "grad_norm": 1.209885835647583, "learning_rate": 1.8359576720371595e-05, "loss": 0.9151, "step": 224890 }, { "epoch": 1.436821997623398, "grad_norm": 0.9278016090393066, "learning_rate": 1.835569164422179e-05, "loss": 0.8871, "step": 224900 }, { "epoch": 1.4368858847731367, "grad_norm": 1.0095714330673218, "learning_rate": 1.8351806886757565e-05, "loss": 0.7992, "step": 224910 }, { "epoch": 1.4369497719228754, "grad_norm": 1.1271530389785767, "learning_rate": 1.8347922448018007e-05, "loss": 0.8998, "step": 224920 }, { "epoch": 1.4370136590726141, "grad_norm": 2.4450058937072754, "learning_rate": 1.8344038328042267e-05, "loss": 0.8528, "step": 224930 }, { "epoch": 1.4370775462223528, "grad_norm": 1.007347583770752, "learning_rate": 1.8340154526869437e-05, "loss": 0.7698, "step": 224940 }, { "epoch": 1.4371414333720915, "grad_norm": 1.0634715557098389, "learning_rate": 1.833627104453865e-05, "loss": 0.9392, "step": 224950 }, { "epoch": 1.4372053205218303, "grad_norm": 0.6562912464141846, "learning_rate": 1.8332387881088993e-05, "loss": 1.0421, "step": 224960 }, { "epoch": 1.437269207671569, "grad_norm": 0.7901965379714966, "learning_rate": 1.832850503655961e-05, "loss": 0.8792, "step": 224970 }, { "epoch": 1.4373330948213077, "grad_norm": 0.9408652186393738, "learning_rate": 1.832462251098957e-05, "loss": 0.9602, "step": 224980 }, { "epoch": 1.4373969819710464, "grad_norm": 0.5572311878204346, "learning_rate": 1.832074030441797e-05, "loss": 0.8207, "step": 224990 }, { "epoch": 1.437460869120785, "grad_norm": 0.9873363971710205, "learning_rate": 1.831685841688394e-05, "loss": 0.8795, "step": 225000 }, { "epoch": 1.4375247562705238, "grad_norm": 1.398949146270752, "learning_rate": 1.831297684842654e-05, "loss": 1.1101, "step": 225010 }, { "epoch": 1.4375886434202625, "grad_norm": 1.1199678182601929, "learning_rate": 1.8309095599084893e-05, "loss": 0.8379, "step": 225020 }, { "epoch": 1.4376525305700012, "grad_norm": 0.8787399530410767, "learning_rate": 1.8305214668898053e-05, "loss": 1.0063, "step": 225030 }, { "epoch": 1.43771641771974, "grad_norm": 0.870714545249939, "learning_rate": 1.8301334057905134e-05, "loss": 0.6885, "step": 225040 }, { "epoch": 1.4377803048694786, "grad_norm": 0.7601709365844727, "learning_rate": 1.8297453766145194e-05, "loss": 0.7629, "step": 225050 }, { "epoch": 1.4378441920192173, "grad_norm": 1.3224676847457886, "learning_rate": 1.8293573793657332e-05, "loss": 0.8404, "step": 225060 }, { "epoch": 1.437908079168956, "grad_norm": 1.1418801546096802, "learning_rate": 1.82896941404806e-05, "loss": 0.7309, "step": 225070 }, { "epoch": 1.4379719663186947, "grad_norm": 0.9552497863769531, "learning_rate": 1.8285814806654096e-05, "loss": 0.8668, "step": 225080 }, { "epoch": 1.4380358534684334, "grad_norm": 1.5807976722717285, "learning_rate": 1.8281935792216852e-05, "loss": 0.9506, "step": 225090 }, { "epoch": 1.4380997406181721, "grad_norm": 0.8164640665054321, "learning_rate": 1.827805709720798e-05, "loss": 0.7146, "step": 225100 }, { "epoch": 1.4381636277679108, "grad_norm": 0.911655843257904, "learning_rate": 1.8274178721666496e-05, "loss": 0.8573, "step": 225110 }, { "epoch": 1.4382275149176493, "grad_norm": 0.742917537689209, "learning_rate": 1.8270300665631497e-05, "loss": 0.9148, "step": 225120 }, { "epoch": 1.4382914020673883, "grad_norm": 0.615420937538147, "learning_rate": 1.8266422929142002e-05, "loss": 0.606, "step": 225130 }, { "epoch": 1.4383552892171267, "grad_norm": 0.9167844653129578, "learning_rate": 1.826254551223708e-05, "loss": 0.944, "step": 225140 }, { "epoch": 1.4384191763668657, "grad_norm": 0.9639812707901001, "learning_rate": 1.8258668414955797e-05, "loss": 0.6993, "step": 225150 }, { "epoch": 1.4384830635166042, "grad_norm": 1.29408597946167, "learning_rate": 1.825479163733717e-05, "loss": 1.037, "step": 225160 }, { "epoch": 1.438546950666343, "grad_norm": 0.8886136412620544, "learning_rate": 1.8250915179420273e-05, "loss": 0.841, "step": 225170 }, { "epoch": 1.4386108378160816, "grad_norm": 1.0680568218231201, "learning_rate": 1.8247039041244108e-05, "loss": 0.8608, "step": 225180 }, { "epoch": 1.4386747249658205, "grad_norm": 0.783477783203125, "learning_rate": 1.824316322284775e-05, "loss": 0.8877, "step": 225190 }, { "epoch": 1.438738612115559, "grad_norm": 0.8530393838882446, "learning_rate": 1.8239287724270187e-05, "loss": 0.9734, "step": 225200 }, { "epoch": 1.438802499265298, "grad_norm": 0.8309034109115601, "learning_rate": 1.82354125455505e-05, "loss": 0.9732, "step": 225210 }, { "epoch": 1.4388663864150364, "grad_norm": 0.9732787609100342, "learning_rate": 1.8231537686727667e-05, "loss": 1.0601, "step": 225220 }, { "epoch": 1.4389302735647753, "grad_norm": 1.3157035112380981, "learning_rate": 1.822766314784075e-05, "loss": 0.7097, "step": 225230 }, { "epoch": 1.4389941607145138, "grad_norm": 1.3295843601226807, "learning_rate": 1.8223788928928738e-05, "loss": 0.7674, "step": 225240 }, { "epoch": 1.4390580478642525, "grad_norm": 1.2309082746505737, "learning_rate": 1.8219915030030675e-05, "loss": 0.851, "step": 225250 }, { "epoch": 1.4391219350139912, "grad_norm": 1.1842759847640991, "learning_rate": 1.8216041451185545e-05, "loss": 0.765, "step": 225260 }, { "epoch": 1.43918582216373, "grad_norm": 1.7547402381896973, "learning_rate": 1.8212168192432395e-05, "loss": 0.9925, "step": 225270 }, { "epoch": 1.4392497093134686, "grad_norm": 0.7879651188850403, "learning_rate": 1.8208295253810186e-05, "loss": 0.9249, "step": 225280 }, { "epoch": 1.4393135964632073, "grad_norm": 0.8185400366783142, "learning_rate": 1.8204422635357975e-05, "loss": 0.8169, "step": 225290 }, { "epoch": 1.439377483612946, "grad_norm": 1.1932501792907715, "learning_rate": 1.8200550337114715e-05, "loss": 0.9108, "step": 225300 }, { "epoch": 1.4394413707626847, "grad_norm": 0.8779699206352234, "learning_rate": 1.819667835911944e-05, "loss": 0.9995, "step": 225310 }, { "epoch": 1.4395052579124235, "grad_norm": 0.8722737431526184, "learning_rate": 1.8192806701411107e-05, "loss": 0.9463, "step": 225320 }, { "epoch": 1.4395691450621622, "grad_norm": 1.0146985054016113, "learning_rate": 1.8188935364028747e-05, "loss": 0.7525, "step": 225330 }, { "epoch": 1.4396330322119009, "grad_norm": 0.8703792095184326, "learning_rate": 1.8185064347011317e-05, "loss": 0.8022, "step": 225340 }, { "epoch": 1.4396969193616396, "grad_norm": 0.5923279523849487, "learning_rate": 1.8181193650397805e-05, "loss": 1.019, "step": 225350 }, { "epoch": 1.4397608065113783, "grad_norm": 0.685735821723938, "learning_rate": 1.817732327422722e-05, "loss": 0.927, "step": 225360 }, { "epoch": 1.439824693661117, "grad_norm": 0.9039795398712158, "learning_rate": 1.8173453218538505e-05, "loss": 0.9359, "step": 225370 }, { "epoch": 1.4398885808108557, "grad_norm": 1.2408785820007324, "learning_rate": 1.816958348337066e-05, "loss": 0.8988, "step": 225380 }, { "epoch": 1.4399524679605944, "grad_norm": 1.1909972429275513, "learning_rate": 1.816571406876264e-05, "loss": 1.0245, "step": 225390 }, { "epoch": 1.440016355110333, "grad_norm": 1.1718319654464722, "learning_rate": 1.8161844974753427e-05, "loss": 0.7103, "step": 225400 }, { "epoch": 1.4400802422600718, "grad_norm": 1.1767466068267822, "learning_rate": 1.815797620138197e-05, "loss": 1.0089, "step": 225410 }, { "epoch": 1.4401441294098105, "grad_norm": 1.2119587659835815, "learning_rate": 1.8154107748687254e-05, "loss": 0.9051, "step": 225420 }, { "epoch": 1.4402080165595492, "grad_norm": 0.7241948843002319, "learning_rate": 1.815023961670821e-05, "loss": 0.7867, "step": 225430 }, { "epoch": 1.440271903709288, "grad_norm": 1.3505481481552124, "learning_rate": 1.8146371805483813e-05, "loss": 0.8559, "step": 225440 }, { "epoch": 1.4403357908590266, "grad_norm": 0.9351546764373779, "learning_rate": 1.8142504315053006e-05, "loss": 1.0054, "step": 225450 }, { "epoch": 1.4403996780087653, "grad_norm": 1.3706575632095337, "learning_rate": 1.813863714545475e-05, "loss": 1.1069, "step": 225460 }, { "epoch": 1.440463565158504, "grad_norm": 0.9399012923240662, "learning_rate": 1.8134770296727984e-05, "loss": 0.8457, "step": 225470 }, { "epoch": 1.4405274523082428, "grad_norm": 1.141176462173462, "learning_rate": 1.8130903768911628e-05, "loss": 0.6414, "step": 225480 }, { "epoch": 1.4405913394579815, "grad_norm": 2.1579673290252686, "learning_rate": 1.8127037562044662e-05, "loss": 0.9051, "step": 225490 }, { "epoch": 1.4406552266077202, "grad_norm": 1.083505392074585, "learning_rate": 1.8123171676165985e-05, "loss": 0.6264, "step": 225500 }, { "epoch": 1.4407191137574589, "grad_norm": 1.1727869510650635, "learning_rate": 1.8119306111314567e-05, "loss": 0.9338, "step": 225510 }, { "epoch": 1.4407830009071976, "grad_norm": 0.8360462188720703, "learning_rate": 1.8115440867529293e-05, "loss": 0.9365, "step": 225520 }, { "epoch": 1.4408468880569363, "grad_norm": 0.9862266182899475, "learning_rate": 1.8111575944849135e-05, "loss": 0.8103, "step": 225530 }, { "epoch": 1.440910775206675, "grad_norm": 0.784279465675354, "learning_rate": 1.8107711343312977e-05, "loss": 0.8937, "step": 225540 }, { "epoch": 1.4409746623564137, "grad_norm": 0.7874135971069336, "learning_rate": 1.8103847062959772e-05, "loss": 0.9192, "step": 225550 }, { "epoch": 1.4410385495061524, "grad_norm": 0.7712077498435974, "learning_rate": 1.8099983103828404e-05, "loss": 0.7963, "step": 225560 }, { "epoch": 1.441102436655891, "grad_norm": 1.632049560546875, "learning_rate": 1.809611946595782e-05, "loss": 1.0526, "step": 225570 }, { "epoch": 1.4411663238056298, "grad_norm": 1.2270588874816895, "learning_rate": 1.8092256149386904e-05, "loss": 0.9838, "step": 225580 }, { "epoch": 1.4412302109553685, "grad_norm": 0.9508424401283264, "learning_rate": 1.808839315415458e-05, "loss": 1.1952, "step": 225590 }, { "epoch": 1.4412940981051072, "grad_norm": 1.569566249847412, "learning_rate": 1.8084530480299734e-05, "loss": 0.6849, "step": 225600 }, { "epoch": 1.4413579852548457, "grad_norm": 1.066719651222229, "learning_rate": 1.8080668127861294e-05, "loss": 1.0987, "step": 225610 }, { "epoch": 1.4414218724045846, "grad_norm": 3.388587236404419, "learning_rate": 1.807680609687812e-05, "loss": 0.8551, "step": 225620 }, { "epoch": 1.4414857595543231, "grad_norm": 0.7674310207366943, "learning_rate": 1.8072944387389144e-05, "loss": 0.6982, "step": 225630 }, { "epoch": 1.441549646704062, "grad_norm": 0.7472887635231018, "learning_rate": 1.8069082999433223e-05, "loss": 0.8768, "step": 225640 }, { "epoch": 1.4416135338538005, "grad_norm": 0.6819210052490234, "learning_rate": 1.8065221933049277e-05, "loss": 0.6991, "step": 225650 }, { "epoch": 1.4416774210035395, "grad_norm": 1.1873059272766113, "learning_rate": 1.8061361188276155e-05, "loss": 0.9916, "step": 225660 }, { "epoch": 1.441741308153278, "grad_norm": 0.9788697957992554, "learning_rate": 1.8057500765152757e-05, "loss": 0.6409, "step": 225670 }, { "epoch": 1.4418051953030169, "grad_norm": 0.880154550075531, "learning_rate": 1.8053640663717974e-05, "loss": 1.0943, "step": 225680 }, { "epoch": 1.4418690824527554, "grad_norm": 0.5648699402809143, "learning_rate": 1.8049780884010658e-05, "loss": 0.8134, "step": 225690 }, { "epoch": 1.4419329696024943, "grad_norm": 1.1649391651153564, "learning_rate": 1.8045921426069702e-05, "loss": 0.9198, "step": 225700 }, { "epoch": 1.4419968567522328, "grad_norm": 0.8868089914321899, "learning_rate": 1.804206228993394e-05, "loss": 0.7574, "step": 225710 }, { "epoch": 1.4420607439019715, "grad_norm": 0.9655132293701172, "learning_rate": 1.8038203475642286e-05, "loss": 0.8014, "step": 225720 }, { "epoch": 1.4421246310517102, "grad_norm": 1.2397197484970093, "learning_rate": 1.8034344983233546e-05, "loss": 0.7869, "step": 225730 }, { "epoch": 1.4421885182014489, "grad_norm": 0.8126650452613831, "learning_rate": 1.8030486812746634e-05, "loss": 0.8521, "step": 225740 }, { "epoch": 1.4422524053511876, "grad_norm": 1.6487863063812256, "learning_rate": 1.802662896422035e-05, "loss": 0.7892, "step": 225750 }, { "epoch": 1.4423162925009263, "grad_norm": 0.9608396291732788, "learning_rate": 1.8022771437693596e-05, "loss": 0.8121, "step": 225760 }, { "epoch": 1.442380179650665, "grad_norm": 1.257879614830017, "learning_rate": 1.8018914233205182e-05, "loss": 1.0084, "step": 225770 }, { "epoch": 1.4424440668004037, "grad_norm": 0.9482038617134094, "learning_rate": 1.8015057350793984e-05, "loss": 0.8868, "step": 225780 }, { "epoch": 1.4425079539501424, "grad_norm": 0.8022263050079346, "learning_rate": 1.8011200790498812e-05, "loss": 0.7972, "step": 225790 }, { "epoch": 1.4425718410998811, "grad_norm": 0.7212386727333069, "learning_rate": 1.800734455235854e-05, "loss": 0.8484, "step": 225800 }, { "epoch": 1.4426357282496198, "grad_norm": 1.0760418176651, "learning_rate": 1.800348863641197e-05, "loss": 0.7978, "step": 225810 }, { "epoch": 1.4426996153993585, "grad_norm": 0.711896538734436, "learning_rate": 1.7999633042697962e-05, "loss": 0.6596, "step": 225820 }, { "epoch": 1.4427635025490972, "grad_norm": 1.0557421445846558, "learning_rate": 1.799577777125532e-05, "loss": 0.822, "step": 225830 }, { "epoch": 1.442827389698836, "grad_norm": 0.8317104578018188, "learning_rate": 1.7991922822122904e-05, "loss": 0.9163, "step": 225840 }, { "epoch": 1.4428912768485747, "grad_norm": 1.3670239448547363, "learning_rate": 1.7988068195339493e-05, "loss": 0.8233, "step": 225850 }, { "epoch": 1.4429551639983134, "grad_norm": 0.8564910292625427, "learning_rate": 1.7984213890943933e-05, "loss": 0.8689, "step": 225860 }, { "epoch": 1.443019051148052, "grad_norm": 1.9226771593093872, "learning_rate": 1.7980359908975053e-05, "loss": 0.7754, "step": 225870 }, { "epoch": 1.4430829382977908, "grad_norm": 0.7119132280349731, "learning_rate": 1.797650624947163e-05, "loss": 1.0531, "step": 225880 }, { "epoch": 1.4431468254475295, "grad_norm": 1.0629172325134277, "learning_rate": 1.7972652912472514e-05, "loss": 1.038, "step": 225890 }, { "epoch": 1.4432107125972682, "grad_norm": 0.6397799253463745, "learning_rate": 1.7968799898016474e-05, "loss": 0.8435, "step": 225900 }, { "epoch": 1.443274599747007, "grad_norm": 1.2140674591064453, "learning_rate": 1.7964947206142347e-05, "loss": 1.0597, "step": 225910 }, { "epoch": 1.4433384868967456, "grad_norm": 1.007383942604065, "learning_rate": 1.7961094836888898e-05, "loss": 0.9592, "step": 225920 }, { "epoch": 1.4434023740464843, "grad_norm": 0.9357951879501343, "learning_rate": 1.795724279029496e-05, "loss": 0.8951, "step": 225930 }, { "epoch": 1.443466261196223, "grad_norm": 1.1374599933624268, "learning_rate": 1.7953391066399304e-05, "loss": 1.0299, "step": 225940 }, { "epoch": 1.4435301483459617, "grad_norm": 0.6394752264022827, "learning_rate": 1.794953966524073e-05, "loss": 0.6978, "step": 225950 }, { "epoch": 1.4435940354957004, "grad_norm": 0.7873253226280212, "learning_rate": 1.7945688586857996e-05, "loss": 0.8146, "step": 225960 }, { "epoch": 1.4436579226454391, "grad_norm": 0.937562882900238, "learning_rate": 1.794183783128993e-05, "loss": 1.2297, "step": 225970 }, { "epoch": 1.4437218097951778, "grad_norm": 1.135293960571289, "learning_rate": 1.7937987398575275e-05, "loss": 0.7278, "step": 225980 }, { "epoch": 1.4437856969449165, "grad_norm": 2.8310680389404297, "learning_rate": 1.7934137288752823e-05, "loss": 0.8589, "step": 225990 }, { "epoch": 1.4438495840946552, "grad_norm": 0.6206763386726379, "learning_rate": 1.7930287501861364e-05, "loss": 0.7586, "step": 226000 }, { "epoch": 1.443913471244394, "grad_norm": 1.2204722166061401, "learning_rate": 1.7926438037939635e-05, "loss": 0.7127, "step": 226010 }, { "epoch": 1.4439773583941327, "grad_norm": 2.4062082767486572, "learning_rate": 1.792258889702645e-05, "loss": 0.8781, "step": 226020 }, { "epoch": 1.4440412455438714, "grad_norm": 1.2518004179000854, "learning_rate": 1.791874007916052e-05, "loss": 0.7874, "step": 226030 }, { "epoch": 1.44410513269361, "grad_norm": 1.1318409442901611, "learning_rate": 1.791489158438065e-05, "loss": 0.6976, "step": 226040 }, { "epoch": 1.4441690198433488, "grad_norm": 1.1307138204574585, "learning_rate": 1.7911043412725565e-05, "loss": 0.7689, "step": 226050 }, { "epoch": 1.4442329069930875, "grad_norm": 0.7476751804351807, "learning_rate": 1.7907195564234047e-05, "loss": 0.6618, "step": 226060 }, { "epoch": 1.4442967941428262, "grad_norm": 0.9509174227714539, "learning_rate": 1.7903348038944816e-05, "loss": 0.8388, "step": 226070 }, { "epoch": 1.444360681292565, "grad_norm": 1.105707049369812, "learning_rate": 1.789950083689666e-05, "loss": 0.9766, "step": 226080 }, { "epoch": 1.4444245684423036, "grad_norm": 0.9921726584434509, "learning_rate": 1.7895653958128285e-05, "loss": 1.2553, "step": 226090 }, { "epoch": 1.444488455592042, "grad_norm": 0.7300543189048767, "learning_rate": 1.7891807402678463e-05, "loss": 0.8451, "step": 226100 }, { "epoch": 1.444552342741781, "grad_norm": 1.0600464344024658, "learning_rate": 1.78879611705859e-05, "loss": 0.9788, "step": 226110 }, { "epoch": 1.4446162298915195, "grad_norm": 0.6954507827758789, "learning_rate": 1.7884115261889368e-05, "loss": 0.9408, "step": 226120 }, { "epoch": 1.4446801170412584, "grad_norm": 1.2967103719711304, "learning_rate": 1.7880269676627558e-05, "loss": 1.1109, "step": 226130 }, { "epoch": 1.444744004190997, "grad_norm": 1.16764235496521, "learning_rate": 1.7876424414839244e-05, "loss": 1.3395, "step": 226140 }, { "epoch": 1.4448078913407358, "grad_norm": 0.8881130218505859, "learning_rate": 1.78725794765631e-05, "loss": 0.7589, "step": 226150 }, { "epoch": 1.4448717784904743, "grad_norm": 0.9801353812217712, "learning_rate": 1.7868734861837898e-05, "loss": 1.0322, "step": 226160 }, { "epoch": 1.4449356656402133, "grad_norm": 0.7944816946983337, "learning_rate": 1.786489057070232e-05, "loss": 0.7409, "step": 226170 }, { "epoch": 1.4449995527899517, "grad_norm": 0.832903265953064, "learning_rate": 1.786104660319511e-05, "loss": 1.2924, "step": 226180 }, { "epoch": 1.4450634399396907, "grad_norm": 2.067941427230835, "learning_rate": 1.7857202959354945e-05, "loss": 1.0379, "step": 226190 }, { "epoch": 1.4451273270894291, "grad_norm": 1.9666894674301147, "learning_rate": 1.7853359639220558e-05, "loss": 0.8461, "step": 226200 }, { "epoch": 1.4451912142391679, "grad_norm": 5.305588245391846, "learning_rate": 1.784951664283066e-05, "loss": 0.9341, "step": 226210 }, { "epoch": 1.4452551013889066, "grad_norm": 0.7343180179595947, "learning_rate": 1.7845673970223932e-05, "loss": 0.8269, "step": 226220 }, { "epoch": 1.4453189885386453, "grad_norm": 1.2359371185302734, "learning_rate": 1.78418316214391e-05, "loss": 0.8709, "step": 226230 }, { "epoch": 1.445382875688384, "grad_norm": 1.012980341911316, "learning_rate": 1.7837989596514826e-05, "loss": 0.7815, "step": 226240 }, { "epoch": 1.4454467628381227, "grad_norm": 0.7704878449440002, "learning_rate": 1.783414789548984e-05, "loss": 1.1135, "step": 226250 }, { "epoch": 1.4455106499878614, "grad_norm": 1.0692133903503418, "learning_rate": 1.7830306518402796e-05, "loss": 0.7503, "step": 226260 }, { "epoch": 1.4455745371376, "grad_norm": 1.0675780773162842, "learning_rate": 1.7826465465292407e-05, "loss": 0.8263, "step": 226270 }, { "epoch": 1.4456384242873388, "grad_norm": 2.0007741451263428, "learning_rate": 1.7822624736197334e-05, "loss": 0.7149, "step": 226280 }, { "epoch": 1.4457023114370775, "grad_norm": 2.1513524055480957, "learning_rate": 1.7818784331156285e-05, "loss": 1.0759, "step": 226290 }, { "epoch": 1.4457661985868162, "grad_norm": 1.7059158086776733, "learning_rate": 1.78149442502079e-05, "loss": 0.8618, "step": 226300 }, { "epoch": 1.445830085736555, "grad_norm": 0.6934781074523926, "learning_rate": 1.7811104493390885e-05, "loss": 0.9491, "step": 226310 }, { "epoch": 1.4458939728862936, "grad_norm": 1.6286593675613403, "learning_rate": 1.7807265060743876e-05, "loss": 0.7483, "step": 226320 }, { "epoch": 1.4459578600360323, "grad_norm": 0.6673516035079956, "learning_rate": 1.7803425952305585e-05, "loss": 0.9391, "step": 226330 }, { "epoch": 1.446021747185771, "grad_norm": 0.9682520031929016, "learning_rate": 1.7799587168114623e-05, "loss": 0.7737, "step": 226340 }, { "epoch": 1.4460856343355097, "grad_norm": 0.745613157749176, "learning_rate": 1.7795748708209696e-05, "loss": 0.8105, "step": 226350 }, { "epoch": 1.4461495214852484, "grad_norm": 0.8082693815231323, "learning_rate": 1.7791910572629423e-05, "loss": 0.7544, "step": 226360 }, { "epoch": 1.4462134086349872, "grad_norm": 1.3045746088027954, "learning_rate": 1.778807276141249e-05, "loss": 0.8672, "step": 226370 }, { "epoch": 1.4462772957847259, "grad_norm": 0.8008405566215515, "learning_rate": 1.7784235274597515e-05, "loss": 1.0634, "step": 226380 }, { "epoch": 1.4463411829344646, "grad_norm": 1.4106078147888184, "learning_rate": 1.7780398112223163e-05, "loss": 0.9883, "step": 226390 }, { "epoch": 1.4464050700842033, "grad_norm": 1.0247710943222046, "learning_rate": 1.777656127432809e-05, "loss": 0.9253, "step": 226400 }, { "epoch": 1.446468957233942, "grad_norm": 0.7325378656387329, "learning_rate": 1.777272476095091e-05, "loss": 0.7098, "step": 226410 }, { "epoch": 1.4465328443836807, "grad_norm": 1.0278514623641968, "learning_rate": 1.7768888572130287e-05, "loss": 0.8852, "step": 226420 }, { "epoch": 1.4465967315334194, "grad_norm": 1.6052353382110596, "learning_rate": 1.776505270790484e-05, "loss": 0.6666, "step": 226430 }, { "epoch": 1.446660618683158, "grad_norm": 1.0923691987991333, "learning_rate": 1.77612171683132e-05, "loss": 0.8607, "step": 226440 }, { "epoch": 1.4467245058328968, "grad_norm": 2.1576809883117676, "learning_rate": 1.7757381953393975e-05, "loss": 0.8478, "step": 226450 }, { "epoch": 1.4467883929826355, "grad_norm": 0.8072786927223206, "learning_rate": 1.7753547063185823e-05, "loss": 0.6595, "step": 226460 }, { "epoch": 1.4468522801323742, "grad_norm": 3.7347018718719482, "learning_rate": 1.7749712497727334e-05, "loss": 0.8379, "step": 226470 }, { "epoch": 1.446916167282113, "grad_norm": 0.7923364043235779, "learning_rate": 1.7745878257057158e-05, "loss": 1.0198, "step": 226480 }, { "epoch": 1.4469800544318516, "grad_norm": 1.173071026802063, "learning_rate": 1.7742044341213874e-05, "loss": 0.79, "step": 226490 }, { "epoch": 1.4470439415815903, "grad_norm": 0.7277421951293945, "learning_rate": 1.773821075023613e-05, "loss": 1.0715, "step": 226500 }, { "epoch": 1.447107828731329, "grad_norm": 5.723413467407227, "learning_rate": 1.7734377484162496e-05, "loss": 0.9474, "step": 226510 }, { "epoch": 1.4471717158810677, "grad_norm": 1.176943302154541, "learning_rate": 1.7730544543031592e-05, "loss": 0.6639, "step": 226520 }, { "epoch": 1.4472356030308065, "grad_norm": 2.4388535022735596, "learning_rate": 1.772671192688204e-05, "loss": 1.013, "step": 226530 }, { "epoch": 1.4472994901805452, "grad_norm": 0.7879016995429993, "learning_rate": 1.7722879635752405e-05, "loss": 0.7353, "step": 226540 }, { "epoch": 1.4473633773302839, "grad_norm": 0.557325541973114, "learning_rate": 1.771904766968131e-05, "loss": 0.9431, "step": 226550 }, { "epoch": 1.4474272644800226, "grad_norm": 1.2949154376983643, "learning_rate": 1.7715216028707315e-05, "loss": 0.9866, "step": 226560 }, { "epoch": 1.447491151629761, "grad_norm": 0.670309841632843, "learning_rate": 1.7711384712869038e-05, "loss": 0.8569, "step": 226570 }, { "epoch": 1.4475550387795, "grad_norm": 1.074346899986267, "learning_rate": 1.770755372220504e-05, "loss": 0.6498, "step": 226580 }, { "epoch": 1.4476189259292385, "grad_norm": 0.6561318635940552, "learning_rate": 1.770372305675393e-05, "loss": 0.8314, "step": 226590 }, { "epoch": 1.4476828130789774, "grad_norm": 1.1744028329849243, "learning_rate": 1.7699892716554252e-05, "loss": 0.9351, "step": 226600 }, { "epoch": 1.4477467002287159, "grad_norm": 0.9757578372955322, "learning_rate": 1.769606270164461e-05, "loss": 0.7917, "step": 226610 }, { "epoch": 1.4478105873784548, "grad_norm": 1.1972298622131348, "learning_rate": 1.7692233012063552e-05, "loss": 1.0842, "step": 226620 }, { "epoch": 1.4478744745281933, "grad_norm": 0.9641197323799133, "learning_rate": 1.7688403647849673e-05, "loss": 0.7745, "step": 226630 }, { "epoch": 1.4479383616779322, "grad_norm": 0.8222277760505676, "learning_rate": 1.76845746090415e-05, "loss": 0.985, "step": 226640 }, { "epoch": 1.4480022488276707, "grad_norm": 1.6356098651885986, "learning_rate": 1.768074589567764e-05, "loss": 0.9724, "step": 226650 }, { "epoch": 1.4480661359774096, "grad_norm": 0.8448604941368103, "learning_rate": 1.7676917507796614e-05, "loss": 0.8945, "step": 226660 }, { "epoch": 1.4481300231271481, "grad_norm": 0.9055709838867188, "learning_rate": 1.7673089445437004e-05, "loss": 0.8706, "step": 226670 }, { "epoch": 1.448193910276887, "grad_norm": 0.8757638335227966, "learning_rate": 1.7669261708637336e-05, "loss": 1.1719, "step": 226680 }, { "epoch": 1.4482577974266255, "grad_norm": 0.6214796900749207, "learning_rate": 1.766543429743619e-05, "loss": 0.7492, "step": 226690 }, { "epoch": 1.4483216845763642, "grad_norm": 1.0025808811187744, "learning_rate": 1.766160721187207e-05, "loss": 0.8919, "step": 226700 }, { "epoch": 1.448385571726103, "grad_norm": 1.1682748794555664, "learning_rate": 1.765778045198355e-05, "loss": 1.2256, "step": 226710 }, { "epoch": 1.4484494588758416, "grad_norm": 0.6261559128761292, "learning_rate": 1.7653954017809178e-05, "loss": 0.8631, "step": 226720 }, { "epoch": 1.4485133460255804, "grad_norm": 0.8332759737968445, "learning_rate": 1.7650127909387453e-05, "loss": 0.6217, "step": 226730 }, { "epoch": 1.448577233175319, "grad_norm": 0.7914277911186218, "learning_rate": 1.764630212675695e-05, "loss": 0.736, "step": 226740 }, { "epoch": 1.4486411203250578, "grad_norm": 1.0079381465911865, "learning_rate": 1.764247666995615e-05, "loss": 0.7504, "step": 226750 }, { "epoch": 1.4487050074747965, "grad_norm": 0.8110386729240417, "learning_rate": 1.763865153902362e-05, "loss": 0.7769, "step": 226760 }, { "epoch": 1.4487688946245352, "grad_norm": 1.0138696432113647, "learning_rate": 1.763482673399785e-05, "loss": 0.9981, "step": 226770 }, { "epoch": 1.4488327817742739, "grad_norm": 0.9576422572135925, "learning_rate": 1.7631002254917388e-05, "loss": 0.9525, "step": 226780 }, { "epoch": 1.4488966689240126, "grad_norm": 1.2405056953430176, "learning_rate": 1.7627178101820725e-05, "loss": 1.0738, "step": 226790 }, { "epoch": 1.4489605560737513, "grad_norm": 1.3423398733139038, "learning_rate": 1.76233542747464e-05, "loss": 0.7747, "step": 226800 }, { "epoch": 1.44902444322349, "grad_norm": 1.0618234872817993, "learning_rate": 1.761953077373289e-05, "loss": 0.8785, "step": 226810 }, { "epoch": 1.4490883303732287, "grad_norm": 1.3146846294403076, "learning_rate": 1.7615707598818738e-05, "loss": 1.1244, "step": 226820 }, { "epoch": 1.4491522175229674, "grad_norm": 1.4140381813049316, "learning_rate": 1.7611884750042406e-05, "loss": 0.8067, "step": 226830 }, { "epoch": 1.4492161046727061, "grad_norm": 0.9436303377151489, "learning_rate": 1.7608062227442435e-05, "loss": 0.7848, "step": 226840 }, { "epoch": 1.4492799918224448, "grad_norm": 1.2600364685058594, "learning_rate": 1.760424003105728e-05, "loss": 0.7632, "step": 226850 }, { "epoch": 1.4493438789721835, "grad_norm": 0.9329853057861328, "learning_rate": 1.7600418160925473e-05, "loss": 0.8161, "step": 226860 }, { "epoch": 1.4494077661219222, "grad_norm": 0.8663994073867798, "learning_rate": 1.7596596617085463e-05, "loss": 1.1164, "step": 226870 }, { "epoch": 1.449471653271661, "grad_norm": 1.1688934564590454, "learning_rate": 1.7592775399575782e-05, "loss": 0.9746, "step": 226880 }, { "epoch": 1.4495355404213996, "grad_norm": 1.3450571298599243, "learning_rate": 1.758895450843487e-05, "loss": 0.658, "step": 226890 }, { "epoch": 1.4495994275711384, "grad_norm": 0.6766774654388428, "learning_rate": 1.7585133943701242e-05, "loss": 0.6398, "step": 226900 }, { "epoch": 1.449663314720877, "grad_norm": 0.8949316143989563, "learning_rate": 1.758131370541336e-05, "loss": 0.8675, "step": 226910 }, { "epoch": 1.4497272018706158, "grad_norm": 1.316635012626648, "learning_rate": 1.7577493793609675e-05, "loss": 0.8526, "step": 226920 }, { "epoch": 1.4497910890203545, "grad_norm": 0.887630045413971, "learning_rate": 1.7573674208328695e-05, "loss": 0.8046, "step": 226930 }, { "epoch": 1.4498549761700932, "grad_norm": 1.6671459674835205, "learning_rate": 1.756985494960885e-05, "loss": 0.8499, "step": 226940 }, { "epoch": 1.4499188633198319, "grad_norm": 0.9612981081008911, "learning_rate": 1.756603601748864e-05, "loss": 1.0565, "step": 226950 }, { "epoch": 1.4499827504695706, "grad_norm": 0.6422664523124695, "learning_rate": 1.7562217412006494e-05, "loss": 0.7408, "step": 226960 }, { "epoch": 1.4500466376193093, "grad_norm": 0.6129812002182007, "learning_rate": 1.7558399133200893e-05, "loss": 0.9403, "step": 226970 }, { "epoch": 1.450110524769048, "grad_norm": 1.100138783454895, "learning_rate": 1.7554581181110265e-05, "loss": 0.9612, "step": 226980 }, { "epoch": 1.4501744119187867, "grad_norm": 1.314050316810608, "learning_rate": 1.7550763555773086e-05, "loss": 0.9189, "step": 226990 }, { "epoch": 1.4502382990685254, "grad_norm": 1.7175153493881226, "learning_rate": 1.7546946257227774e-05, "loss": 1.1167, "step": 227000 }, { "epoch": 1.4503021862182641, "grad_norm": 0.9711244702339172, "learning_rate": 1.754312928551281e-05, "loss": 0.6749, "step": 227010 }, { "epoch": 1.4503660733680028, "grad_norm": 1.205511212348938, "learning_rate": 1.7539312640666593e-05, "loss": 0.7577, "step": 227020 }, { "epoch": 1.4504299605177415, "grad_norm": 1.204145908355713, "learning_rate": 1.75354963227276e-05, "loss": 0.7739, "step": 227030 }, { "epoch": 1.4504938476674802, "grad_norm": 0.8410221338272095, "learning_rate": 1.7531680331734225e-05, "loss": 0.7122, "step": 227040 }, { "epoch": 1.450557734817219, "grad_norm": 1.1859508752822876, "learning_rate": 1.752786466772492e-05, "loss": 0.937, "step": 227050 }, { "epoch": 1.4506216219669574, "grad_norm": 0.9122669696807861, "learning_rate": 1.752404933073813e-05, "loss": 1.0059, "step": 227060 }, { "epoch": 1.4506855091166964, "grad_norm": 0.9483950734138489, "learning_rate": 1.7520234320812233e-05, "loss": 0.7755, "step": 227070 }, { "epoch": 1.4507493962664348, "grad_norm": 1.1015524864196777, "learning_rate": 1.7516419637985703e-05, "loss": 0.9223, "step": 227080 }, { "epoch": 1.4508132834161738, "grad_norm": 0.5838099718093872, "learning_rate": 1.7512605282296907e-05, "loss": 0.9171, "step": 227090 }, { "epoch": 1.4508771705659123, "grad_norm": 1.1983695030212402, "learning_rate": 1.75087912537843e-05, "loss": 0.8051, "step": 227100 }, { "epoch": 1.4509410577156512, "grad_norm": 0.9621850252151489, "learning_rate": 1.7504977552486255e-05, "loss": 0.806, "step": 227110 }, { "epoch": 1.4510049448653897, "grad_norm": 0.7084076404571533, "learning_rate": 1.7501164178441215e-05, "loss": 0.8508, "step": 227120 }, { "epoch": 1.4510688320151286, "grad_norm": 0.736041784286499, "learning_rate": 1.7497351131687557e-05, "loss": 0.6109, "step": 227130 }, { "epoch": 1.451132719164867, "grad_norm": 0.7992726564407349, "learning_rate": 1.74935384122637e-05, "loss": 0.8049, "step": 227140 }, { "epoch": 1.451196606314606, "grad_norm": 0.921377420425415, "learning_rate": 1.7489726020208018e-05, "loss": 0.6698, "step": 227150 }, { "epoch": 1.4512604934643445, "grad_norm": 0.7148461937904358, "learning_rate": 1.7485913955558942e-05, "loss": 0.9502, "step": 227160 }, { "epoch": 1.4513243806140834, "grad_norm": 0.5825332403182983, "learning_rate": 1.7482102218354823e-05, "loss": 0.7701, "step": 227170 }, { "epoch": 1.451388267763822, "grad_norm": 1.0439517498016357, "learning_rate": 1.747829080863408e-05, "loss": 0.9723, "step": 227180 }, { "epoch": 1.4514521549135606, "grad_norm": 0.8032168745994568, "learning_rate": 1.7474479726435065e-05, "loss": 0.6788, "step": 227190 }, { "epoch": 1.4515160420632993, "grad_norm": 2.783485174179077, "learning_rate": 1.74706689717962e-05, "loss": 0.7815, "step": 227200 }, { "epoch": 1.451579929213038, "grad_norm": 0.992385983467102, "learning_rate": 1.7466858544755825e-05, "loss": 0.7934, "step": 227210 }, { "epoch": 1.4516438163627767, "grad_norm": 1.3935444355010986, "learning_rate": 1.7463048445352343e-05, "loss": 1.0874, "step": 227220 }, { "epoch": 1.4517077035125154, "grad_norm": 0.7856149673461914, "learning_rate": 1.7459238673624094e-05, "loss": 0.7179, "step": 227230 }, { "epoch": 1.4517715906622541, "grad_norm": 0.9185128808021545, "learning_rate": 1.7455429229609465e-05, "loss": 0.7735, "step": 227240 }, { "epoch": 1.4518354778119928, "grad_norm": 2.548475980758667, "learning_rate": 1.7451620113346834e-05, "loss": 1.1837, "step": 227250 }, { "epoch": 1.4518993649617316, "grad_norm": 1.0396366119384766, "learning_rate": 1.7447811324874536e-05, "loss": 1.1245, "step": 227260 }, { "epoch": 1.4519632521114703, "grad_norm": 0.7229011654853821, "learning_rate": 1.7444002864230945e-05, "loss": 0.7909, "step": 227270 }, { "epoch": 1.452027139261209, "grad_norm": 0.6902373433113098, "learning_rate": 1.74401947314544e-05, "loss": 0.7689, "step": 227280 }, { "epoch": 1.4520910264109477, "grad_norm": 1.0050629377365112, "learning_rate": 1.7436386926583283e-05, "loss": 0.8924, "step": 227290 }, { "epoch": 1.4521549135606864, "grad_norm": 1.3460990190505981, "learning_rate": 1.74325794496559e-05, "loss": 0.836, "step": 227300 }, { "epoch": 1.452218800710425, "grad_norm": 0.8463780879974365, "learning_rate": 1.7428772300710633e-05, "loss": 0.7391, "step": 227310 }, { "epoch": 1.4522826878601638, "grad_norm": 0.9536442756652832, "learning_rate": 1.742496547978579e-05, "loss": 1.1644, "step": 227320 }, { "epoch": 1.4523465750099025, "grad_norm": 0.8862224817276001, "learning_rate": 1.7421158986919744e-05, "loss": 1.1116, "step": 227330 }, { "epoch": 1.4524104621596412, "grad_norm": 0.8460167050361633, "learning_rate": 1.7417352822150794e-05, "loss": 0.783, "step": 227340 }, { "epoch": 1.45247434930938, "grad_norm": 0.5743412375450134, "learning_rate": 1.741354698551731e-05, "loss": 0.7744, "step": 227350 }, { "epoch": 1.4525382364591186, "grad_norm": 0.9879737496376038, "learning_rate": 1.7409741477057578e-05, "loss": 0.771, "step": 227360 }, { "epoch": 1.4526021236088573, "grad_norm": 1.0089986324310303, "learning_rate": 1.740593629680996e-05, "loss": 0.955, "step": 227370 }, { "epoch": 1.452666010758596, "grad_norm": 0.8803655505180359, "learning_rate": 1.7402131444812748e-05, "loss": 0.8349, "step": 227380 }, { "epoch": 1.4527298979083347, "grad_norm": 1.1196205615997314, "learning_rate": 1.739832692110429e-05, "loss": 0.81, "step": 227390 }, { "epoch": 1.4527937850580734, "grad_norm": 1.3624825477600098, "learning_rate": 1.7394522725722885e-05, "loss": 0.7161, "step": 227400 }, { "epoch": 1.4528576722078121, "grad_norm": 0.8905812501907349, "learning_rate": 1.739071885870682e-05, "loss": 0.8102, "step": 227410 }, { "epoch": 1.4529215593575509, "grad_norm": 0.9938466548919678, "learning_rate": 1.7386915320094454e-05, "loss": 0.7328, "step": 227420 }, { "epoch": 1.4529854465072896, "grad_norm": 0.9256237149238586, "learning_rate": 1.7383112109924042e-05, "loss": 0.8058, "step": 227430 }, { "epoch": 1.4530493336570283, "grad_norm": 1.2441478967666626, "learning_rate": 1.7379309228233925e-05, "loss": 0.9255, "step": 227440 }, { "epoch": 1.453113220806767, "grad_norm": 0.9369105696678162, "learning_rate": 1.737550667506237e-05, "loss": 0.6952, "step": 227450 }, { "epoch": 1.4531771079565057, "grad_norm": 1.246598720550537, "learning_rate": 1.7371704450447706e-05, "loss": 0.8575, "step": 227460 }, { "epoch": 1.4532409951062444, "grad_norm": 1.074830412864685, "learning_rate": 1.7367902554428183e-05, "loss": 0.8451, "step": 227470 }, { "epoch": 1.453304882255983, "grad_norm": 1.5546703338623047, "learning_rate": 1.7364100987042135e-05, "loss": 1.0819, "step": 227480 }, { "epoch": 1.4533687694057218, "grad_norm": 0.923184335231781, "learning_rate": 1.7360299748327806e-05, "loss": 0.7363, "step": 227490 }, { "epoch": 1.4534326565554605, "grad_norm": 0.8427280187606812, "learning_rate": 1.735649883832351e-05, "loss": 0.5411, "step": 227500 }, { "epoch": 1.4534965437051992, "grad_norm": 0.8301636576652527, "learning_rate": 1.7352698257067495e-05, "loss": 0.9946, "step": 227510 }, { "epoch": 1.453560430854938, "grad_norm": 0.5679813027381897, "learning_rate": 1.7348898004598075e-05, "loss": 0.7921, "step": 227520 }, { "epoch": 1.4536243180046766, "grad_norm": 0.7748306393623352, "learning_rate": 1.7345098080953475e-05, "loss": 1.2827, "step": 227530 }, { "epoch": 1.4536882051544153, "grad_norm": 2.134530782699585, "learning_rate": 1.734129848617201e-05, "loss": 0.7154, "step": 227540 }, { "epoch": 1.4537520923041538, "grad_norm": 1.778371810913086, "learning_rate": 1.7337499220291903e-05, "loss": 0.9307, "step": 227550 }, { "epoch": 1.4538159794538927, "grad_norm": 0.7461889982223511, "learning_rate": 1.733370028335144e-05, "loss": 0.6509, "step": 227560 }, { "epoch": 1.4538798666036312, "grad_norm": 0.9498295783996582, "learning_rate": 1.7329901675388887e-05, "loss": 1.3496, "step": 227570 }, { "epoch": 1.4539437537533701, "grad_norm": 0.7788515686988831, "learning_rate": 1.7326103396442473e-05, "loss": 0.757, "step": 227580 }, { "epoch": 1.4540076409031086, "grad_norm": 0.7038117051124573, "learning_rate": 1.732230544655048e-05, "loss": 1.0964, "step": 227590 }, { "epoch": 1.4540715280528476, "grad_norm": 0.7378969192504883, "learning_rate": 1.731850782575113e-05, "loss": 0.8474, "step": 227600 }, { "epoch": 1.454135415202586, "grad_norm": 1.4201772212982178, "learning_rate": 1.731471053408269e-05, "loss": 0.7944, "step": 227610 }, { "epoch": 1.454199302352325, "grad_norm": 1.4799860715866089, "learning_rate": 1.7310913571583375e-05, "loss": 0.8094, "step": 227620 }, { "epoch": 1.4542631895020635, "grad_norm": 1.1239525079727173, "learning_rate": 1.7307116938291463e-05, "loss": 1.1415, "step": 227630 }, { "epoch": 1.4543270766518024, "grad_norm": 1.0189942121505737, "learning_rate": 1.7303320634245148e-05, "loss": 0.766, "step": 227640 }, { "epoch": 1.4543909638015409, "grad_norm": 0.9115725755691528, "learning_rate": 1.7299524659482697e-05, "loss": 0.7498, "step": 227650 }, { "epoch": 1.4544548509512796, "grad_norm": 0.8899692893028259, "learning_rate": 1.7295729014042306e-05, "loss": 0.8486, "step": 227660 }, { "epoch": 1.4545187381010183, "grad_norm": 1.07687509059906, "learning_rate": 1.7291933697962233e-05, "loss": 1.0783, "step": 227670 }, { "epoch": 1.454582625250757, "grad_norm": 0.5755985975265503, "learning_rate": 1.7288138711280666e-05, "loss": 0.8134, "step": 227680 }, { "epoch": 1.4546465124004957, "grad_norm": 1.454787015914917, "learning_rate": 1.728434405403586e-05, "loss": 1.1284, "step": 227690 }, { "epoch": 1.4547103995502344, "grad_norm": 0.8672309517860413, "learning_rate": 1.7280549726265994e-05, "loss": 0.8704, "step": 227700 }, { "epoch": 1.454774286699973, "grad_norm": 3.574970245361328, "learning_rate": 1.7276755728009318e-05, "loss": 1.0213, "step": 227710 }, { "epoch": 1.4548381738497118, "grad_norm": 1.3924107551574707, "learning_rate": 1.7272962059304004e-05, "loss": 1.0223, "step": 227720 }, { "epoch": 1.4549020609994505, "grad_norm": 0.8274136781692505, "learning_rate": 1.7269168720188296e-05, "loss": 0.9841, "step": 227730 }, { "epoch": 1.4549659481491892, "grad_norm": 2.5468506813049316, "learning_rate": 1.726537571070035e-05, "loss": 0.6942, "step": 227740 }, { "epoch": 1.455029835298928, "grad_norm": 0.7780070304870605, "learning_rate": 1.7261583030878414e-05, "loss": 0.7812, "step": 227750 }, { "epoch": 1.4550937224486666, "grad_norm": 1.2256584167480469, "learning_rate": 1.725779068076064e-05, "loss": 0.8563, "step": 227760 }, { "epoch": 1.4551576095984053, "grad_norm": 1.1407594680786133, "learning_rate": 1.725399866038524e-05, "loss": 0.7595, "step": 227770 }, { "epoch": 1.455221496748144, "grad_norm": 1.1632260084152222, "learning_rate": 1.7250206969790416e-05, "loss": 0.7344, "step": 227780 }, { "epoch": 1.4552853838978828, "grad_norm": 0.7721037268638611, "learning_rate": 1.7246415609014327e-05, "loss": 0.7795, "step": 227790 }, { "epoch": 1.4553492710476215, "grad_norm": 0.8553909659385681, "learning_rate": 1.7242624578095184e-05, "loss": 0.7139, "step": 227800 }, { "epoch": 1.4554131581973602, "grad_norm": 0.8728143572807312, "learning_rate": 1.7238833877071136e-05, "loss": 0.8254, "step": 227810 }, { "epoch": 1.4554770453470989, "grad_norm": 0.7298803329467773, "learning_rate": 1.723504350598039e-05, "loss": 0.7893, "step": 227820 }, { "epoch": 1.4555409324968376, "grad_norm": 0.7812074422836304, "learning_rate": 1.7231253464861087e-05, "loss": 0.8362, "step": 227830 }, { "epoch": 1.4556048196465763, "grad_norm": 0.685131847858429, "learning_rate": 1.722746375375142e-05, "loss": 0.8985, "step": 227840 }, { "epoch": 1.455668706796315, "grad_norm": 0.671725332736969, "learning_rate": 1.7223674372689535e-05, "loss": 0.6595, "step": 227850 }, { "epoch": 1.4557325939460537, "grad_norm": 2.0617687702178955, "learning_rate": 1.721988532171362e-05, "loss": 1.1418, "step": 227860 }, { "epoch": 1.4557964810957924, "grad_norm": 1.009682536125183, "learning_rate": 1.7216096600861803e-05, "loss": 0.8784, "step": 227870 }, { "epoch": 1.4558603682455311, "grad_norm": 1.326684832572937, "learning_rate": 1.721230821017227e-05, "loss": 0.9218, "step": 227880 }, { "epoch": 1.4559242553952698, "grad_norm": 0.7491835355758667, "learning_rate": 1.720852014968316e-05, "loss": 0.6816, "step": 227890 }, { "epoch": 1.4559881425450085, "grad_norm": 1.153235912322998, "learning_rate": 1.72047324194326e-05, "loss": 0.6852, "step": 227900 }, { "epoch": 1.4560520296947472, "grad_norm": 0.826145589351654, "learning_rate": 1.7201323744592614e-05, "loss": 0.819, "step": 227910 }, { "epoch": 1.456115916844486, "grad_norm": 1.335146427154541, "learning_rate": 1.719753664190043e-05, "loss": 0.9684, "step": 227920 }, { "epoch": 1.4561798039942246, "grad_norm": 1.198949933052063, "learning_rate": 1.7193749869557446e-05, "loss": 0.6834, "step": 227930 }, { "epoch": 1.4562436911439633, "grad_norm": 1.216091275215149, "learning_rate": 1.7190342056928825e-05, "loss": 0.8983, "step": 227940 }, { "epoch": 1.456307578293702, "grad_norm": 0.6597400307655334, "learning_rate": 1.718655591235436e-05, "loss": 0.7476, "step": 227950 }, { "epoch": 1.4563714654434408, "grad_norm": 1.405436396598816, "learning_rate": 1.7182770098239642e-05, "loss": 0.5881, "step": 227960 }, { "epoch": 1.4564353525931795, "grad_norm": 0.6479114294052124, "learning_rate": 1.7178984614622833e-05, "loss": 0.9238, "step": 227970 }, { "epoch": 1.4564992397429182, "grad_norm": 0.8640186190605164, "learning_rate": 1.717519946154203e-05, "loss": 0.7785, "step": 227980 }, { "epoch": 1.4565631268926569, "grad_norm": 2.1302618980407715, "learning_rate": 1.717141463903539e-05, "loss": 0.8929, "step": 227990 }, { "epoch": 1.4566270140423956, "grad_norm": 0.9887571334838867, "learning_rate": 1.7167630147140977e-05, "loss": 0.8549, "step": 228000 }, { "epoch": 1.4566909011921343, "grad_norm": 0.932548463344574, "learning_rate": 1.7163845985896938e-05, "loss": 1.0528, "step": 228010 }, { "epoch": 1.456754788341873, "grad_norm": 0.897533655166626, "learning_rate": 1.7160062155341395e-05, "loss": 0.9261, "step": 228020 }, { "epoch": 1.4568186754916117, "grad_norm": 1.0525333881378174, "learning_rate": 1.7156278655512415e-05, "loss": 0.8245, "step": 228030 }, { "epoch": 1.4568825626413502, "grad_norm": 0.9059542417526245, "learning_rate": 1.7152495486448144e-05, "loss": 0.9365, "step": 228040 }, { "epoch": 1.4569464497910891, "grad_norm": 0.9503512978553772, "learning_rate": 1.7148712648186644e-05, "loss": 1.0488, "step": 228050 }, { "epoch": 1.4570103369408276, "grad_norm": 1.1666412353515625, "learning_rate": 1.7144930140766042e-05, "loss": 0.9226, "step": 228060 }, { "epoch": 1.4570742240905665, "grad_norm": 1.5587124824523926, "learning_rate": 1.7141147964224404e-05, "loss": 0.9057, "step": 228070 }, { "epoch": 1.457138111240305, "grad_norm": 4.082630634307861, "learning_rate": 1.7137366118599846e-05, "loss": 0.7744, "step": 228080 }, { "epoch": 1.457201998390044, "grad_norm": 0.8567389845848083, "learning_rate": 1.7133584603930423e-05, "loss": 1.1713, "step": 228090 }, { "epoch": 1.4572658855397824, "grad_norm": 0.7330909371376038, "learning_rate": 1.712980342025426e-05, "loss": 0.8745, "step": 228100 }, { "epoch": 1.4573297726895214, "grad_norm": 0.8207191824913025, "learning_rate": 1.712602256760939e-05, "loss": 0.8402, "step": 228110 }, { "epoch": 1.4573936598392598, "grad_norm": 0.7916305065155029, "learning_rate": 1.712224204603394e-05, "loss": 0.8344, "step": 228120 }, { "epoch": 1.4574575469889988, "grad_norm": 1.1310572624206543, "learning_rate": 1.7118461855565925e-05, "loss": 0.9267, "step": 228130 }, { "epoch": 1.4575214341387372, "grad_norm": 1.3407129049301147, "learning_rate": 1.711468199624347e-05, "loss": 0.6926, "step": 228140 }, { "epoch": 1.457585321288476, "grad_norm": 1.2362877130508423, "learning_rate": 1.71109024681046e-05, "loss": 0.8751, "step": 228150 }, { "epoch": 1.4576492084382147, "grad_norm": 1.0826531648635864, "learning_rate": 1.7107123271187414e-05, "loss": 0.8926, "step": 228160 }, { "epoch": 1.4577130955879534, "grad_norm": 0.696463942527771, "learning_rate": 1.7103344405529932e-05, "loss": 0.8336, "step": 228170 }, { "epoch": 1.457776982737692, "grad_norm": 0.6624709367752075, "learning_rate": 1.7099565871170248e-05, "loss": 0.8775, "step": 228180 }, { "epoch": 1.4578408698874308, "grad_norm": 0.9407052993774414, "learning_rate": 1.70957876681464e-05, "loss": 0.7396, "step": 228190 }, { "epoch": 1.4579047570371695, "grad_norm": 0.8339453935623169, "learning_rate": 1.7092009796496424e-05, "loss": 0.7492, "step": 228200 }, { "epoch": 1.4579686441869082, "grad_norm": 1.2756327390670776, "learning_rate": 1.708823225625839e-05, "loss": 0.8155, "step": 228210 }, { "epoch": 1.458032531336647, "grad_norm": 1.3156120777130127, "learning_rate": 1.7084455047470317e-05, "loss": 0.7071, "step": 228220 }, { "epoch": 1.4580964184863856, "grad_norm": 0.874346137046814, "learning_rate": 1.708067817017027e-05, "loss": 0.8141, "step": 228230 }, { "epoch": 1.4581603056361243, "grad_norm": 0.44407644867897034, "learning_rate": 1.7076901624396265e-05, "loss": 0.9211, "step": 228240 }, { "epoch": 1.458224192785863, "grad_norm": 1.41887629032135, "learning_rate": 1.7073125410186346e-05, "loss": 0.9911, "step": 228250 }, { "epoch": 1.4582880799356017, "grad_norm": 1.2208428382873535, "learning_rate": 1.7069349527578534e-05, "loss": 0.8122, "step": 228260 }, { "epoch": 1.4583519670853404, "grad_norm": 1.1697001457214355, "learning_rate": 1.7065573976610876e-05, "loss": 1.1614, "step": 228270 }, { "epoch": 1.4584158542350791, "grad_norm": 0.5886402130126953, "learning_rate": 1.706179875732136e-05, "loss": 0.728, "step": 228280 }, { "epoch": 1.4584797413848178, "grad_norm": 0.9518161416053772, "learning_rate": 1.7058023869748048e-05, "loss": 0.8771, "step": 228290 }, { "epoch": 1.4585436285345565, "grad_norm": 0.942564845085144, "learning_rate": 1.7054249313928917e-05, "loss": 1.1118, "step": 228300 }, { "epoch": 1.4586075156842953, "grad_norm": 1.543463945388794, "learning_rate": 1.7050475089902014e-05, "loss": 0.8808, "step": 228310 }, { "epoch": 1.458671402834034, "grad_norm": 2.35699200630188, "learning_rate": 1.7046701197705313e-05, "loss": 0.718, "step": 228320 }, { "epoch": 1.4587352899837727, "grad_norm": 0.9139242768287659, "learning_rate": 1.704292763737684e-05, "loss": 0.8443, "step": 228330 }, { "epoch": 1.4587991771335114, "grad_norm": 0.9385525584220886, "learning_rate": 1.703915440895461e-05, "loss": 0.9222, "step": 228340 }, { "epoch": 1.45886306428325, "grad_norm": 0.983020007610321, "learning_rate": 1.70353815124766e-05, "loss": 0.964, "step": 228350 }, { "epoch": 1.4589269514329888, "grad_norm": 0.8044835329055786, "learning_rate": 1.7031608947980833e-05, "loss": 0.8631, "step": 228360 }, { "epoch": 1.4589908385827275, "grad_norm": 1.0413472652435303, "learning_rate": 1.702783671550527e-05, "loss": 1.0598, "step": 228370 }, { "epoch": 1.4590547257324662, "grad_norm": 1.6564788818359375, "learning_rate": 1.702406481508793e-05, "loss": 0.8751, "step": 228380 }, { "epoch": 1.459118612882205, "grad_norm": 1.0101597309112549, "learning_rate": 1.702029324676677e-05, "loss": 0.8339, "step": 228390 }, { "epoch": 1.4591825000319436, "grad_norm": 1.1082416772842407, "learning_rate": 1.7016522010579806e-05, "loss": 0.9407, "step": 228400 }, { "epoch": 1.4592463871816823, "grad_norm": 0.9075333476066589, "learning_rate": 1.7012751106564978e-05, "loss": 0.8674, "step": 228410 }, { "epoch": 1.459310274331421, "grad_norm": 1.3690296411514282, "learning_rate": 1.700898053476031e-05, "loss": 1.0353, "step": 228420 }, { "epoch": 1.4593741614811597, "grad_norm": 0.9637429714202881, "learning_rate": 1.7005210295203727e-05, "loss": 0.9846, "step": 228430 }, { "epoch": 1.4594380486308984, "grad_norm": 0.9706971049308777, "learning_rate": 1.700144038793324e-05, "loss": 0.7751, "step": 228440 }, { "epoch": 1.4595019357806371, "grad_norm": 1.0732990503311157, "learning_rate": 1.6997670812986776e-05, "loss": 0.9152, "step": 228450 }, { "epoch": 1.4595658229303758, "grad_norm": 1.644718885421753, "learning_rate": 1.6993901570402337e-05, "loss": 1.0086, "step": 228460 }, { "epoch": 1.4596297100801146, "grad_norm": 1.098197340965271, "learning_rate": 1.6990132660217845e-05, "loss": 0.7548, "step": 228470 }, { "epoch": 1.4596935972298533, "grad_norm": 0.7047260403633118, "learning_rate": 1.6986364082471294e-05, "loss": 0.9532, "step": 228480 }, { "epoch": 1.459757484379592, "grad_norm": 0.8027589917182922, "learning_rate": 1.6982595837200598e-05, "loss": 0.8873, "step": 228490 }, { "epoch": 1.4598213715293307, "grad_norm": 3.7240960597991943, "learning_rate": 1.6978827924443747e-05, "loss": 0.9685, "step": 228500 }, { "epoch": 1.4598852586790692, "grad_norm": 1.015761375427246, "learning_rate": 1.6975060344238645e-05, "loss": 0.9572, "step": 228510 }, { "epoch": 1.459949145828808, "grad_norm": 0.8119301795959473, "learning_rate": 1.6971293096623276e-05, "loss": 0.7355, "step": 228520 }, { "epoch": 1.4600130329785466, "grad_norm": 1.2989156246185303, "learning_rate": 1.696752618163554e-05, "loss": 0.7846, "step": 228530 }, { "epoch": 1.4600769201282855, "grad_norm": 0.833233118057251, "learning_rate": 1.6963759599313394e-05, "loss": 1.2733, "step": 228540 }, { "epoch": 1.460140807278024, "grad_norm": 0.9700032472610474, "learning_rate": 1.6959993349694785e-05, "loss": 0.8575, "step": 228550 }, { "epoch": 1.460204694427763, "grad_norm": 0.8506675958633423, "learning_rate": 1.6956227432817613e-05, "loss": 1.049, "step": 228560 }, { "epoch": 1.4602685815775014, "grad_norm": 1.2259025573730469, "learning_rate": 1.695246184871983e-05, "loss": 0.7984, "step": 228570 }, { "epoch": 1.4603324687272403, "grad_norm": 1.0135867595672607, "learning_rate": 1.6948696597439333e-05, "loss": 1.1484, "step": 228580 }, { "epoch": 1.4603963558769788, "grad_norm": 0.8687092661857605, "learning_rate": 1.694493167901407e-05, "loss": 0.6181, "step": 228590 }, { "epoch": 1.4604602430267177, "grad_norm": 1.1422390937805176, "learning_rate": 1.6941167093481923e-05, "loss": 1.0024, "step": 228600 }, { "epoch": 1.4605241301764562, "grad_norm": 0.9797633290290833, "learning_rate": 1.6937402840880846e-05, "loss": 0.7148, "step": 228610 }, { "epoch": 1.4605880173261951, "grad_norm": 0.5749666094779968, "learning_rate": 1.69336389212487e-05, "loss": 0.8237, "step": 228620 }, { "epoch": 1.4606519044759336, "grad_norm": 0.5369682908058167, "learning_rate": 1.692987533462344e-05, "loss": 0.7458, "step": 228630 }, { "epoch": 1.4607157916256723, "grad_norm": 1.1534923315048218, "learning_rate": 1.6926112081042926e-05, "loss": 0.7847, "step": 228640 }, { "epoch": 1.460779678775411, "grad_norm": 0.7996623516082764, "learning_rate": 1.6922349160545096e-05, "loss": 0.8961, "step": 228650 }, { "epoch": 1.4608435659251497, "grad_norm": 1.0210667848587036, "learning_rate": 1.6918586573167804e-05, "loss": 0.7339, "step": 228660 }, { "epoch": 1.4609074530748885, "grad_norm": 2.753202199935913, "learning_rate": 1.691482431894899e-05, "loss": 0.6541, "step": 228670 }, { "epoch": 1.4609713402246272, "grad_norm": 0.8186949491500854, "learning_rate": 1.6911062397926515e-05, "loss": 0.6773, "step": 228680 }, { "epoch": 1.4610352273743659, "grad_norm": 0.6040871739387512, "learning_rate": 1.6907300810138245e-05, "loss": 0.8906, "step": 228690 }, { "epoch": 1.4610991145241046, "grad_norm": 0.9389637112617493, "learning_rate": 1.6903539555622106e-05, "loss": 0.9025, "step": 228700 }, { "epoch": 1.4611630016738433, "grad_norm": 0.7031495571136475, "learning_rate": 1.6899778634415934e-05, "loss": 0.7657, "step": 228710 }, { "epoch": 1.461226888823582, "grad_norm": 0.6498950123786926, "learning_rate": 1.6896018046557655e-05, "loss": 0.8931, "step": 228720 }, { "epoch": 1.4612907759733207, "grad_norm": 2.9347357749938965, "learning_rate": 1.6892257792085086e-05, "loss": 0.6996, "step": 228730 }, { "epoch": 1.4613546631230594, "grad_norm": 0.7390297651290894, "learning_rate": 1.6888497871036148e-05, "loss": 0.8342, "step": 228740 }, { "epoch": 1.461418550272798, "grad_norm": 0.6747456192970276, "learning_rate": 1.6884738283448658e-05, "loss": 0.6615, "step": 228750 }, { "epoch": 1.4614824374225368, "grad_norm": 1.3063819408416748, "learning_rate": 1.6880979029360523e-05, "loss": 0.9759, "step": 228760 }, { "epoch": 1.4615463245722755, "grad_norm": 1.4095808267593384, "learning_rate": 1.6877220108809567e-05, "loss": 0.8749, "step": 228770 }, { "epoch": 1.4616102117220142, "grad_norm": 0.8128808736801147, "learning_rate": 1.6873461521833672e-05, "loss": 0.9157, "step": 228780 }, { "epoch": 1.461674098871753, "grad_norm": 0.7370621562004089, "learning_rate": 1.686970326847066e-05, "loss": 0.8539, "step": 228790 }, { "epoch": 1.4617379860214916, "grad_norm": 1.128772497177124, "learning_rate": 1.6865945348758417e-05, "loss": 1.0237, "step": 228800 }, { "epoch": 1.4618018731712303, "grad_norm": 0.8914177417755127, "learning_rate": 1.6862187762734755e-05, "loss": 0.9627, "step": 228810 }, { "epoch": 1.461865760320969, "grad_norm": 0.8150872588157654, "learning_rate": 1.6858430510437544e-05, "loss": 0.738, "step": 228820 }, { "epoch": 1.4619296474707077, "grad_norm": 0.9016690850257874, "learning_rate": 1.6854673591904597e-05, "loss": 0.9736, "step": 228830 }, { "epoch": 1.4619935346204465, "grad_norm": 0.8050869703292847, "learning_rate": 1.685091700717377e-05, "loss": 0.9467, "step": 228840 }, { "epoch": 1.4620574217701852, "grad_norm": 1.197709560394287, "learning_rate": 1.6847160756282875e-05, "loss": 0.8094, "step": 228850 }, { "epoch": 1.4621213089199239, "grad_norm": 0.8978109359741211, "learning_rate": 1.6843404839269754e-05, "loss": 1.0452, "step": 228860 }, { "epoch": 1.4621851960696626, "grad_norm": 0.9194414019584656, "learning_rate": 1.6839649256172245e-05, "loss": 0.9372, "step": 228870 }, { "epoch": 1.4622490832194013, "grad_norm": 0.8657510280609131, "learning_rate": 1.683589400702814e-05, "loss": 0.8444, "step": 228880 }, { "epoch": 1.46231297036914, "grad_norm": 0.8180578351020813, "learning_rate": 1.6832139091875293e-05, "loss": 0.8992, "step": 228890 }, { "epoch": 1.4623768575188787, "grad_norm": 0.7911380529403687, "learning_rate": 1.6828384510751478e-05, "loss": 0.9684, "step": 228900 }, { "epoch": 1.4624407446686174, "grad_norm": 0.9842470288276672, "learning_rate": 1.6824630263694553e-05, "loss": 0.8405, "step": 228910 }, { "epoch": 1.462504631818356, "grad_norm": 1.0976924896240234, "learning_rate": 1.6820876350742277e-05, "loss": 1.0123, "step": 228920 }, { "epoch": 1.4625685189680948, "grad_norm": 0.8579769730567932, "learning_rate": 1.6817122771932498e-05, "loss": 0.728, "step": 228930 }, { "epoch": 1.4626324061178335, "grad_norm": 0.9228938221931458, "learning_rate": 1.6813369527302986e-05, "loss": 0.9207, "step": 228940 }, { "epoch": 1.4626962932675722, "grad_norm": 1.1510818004608154, "learning_rate": 1.6809616616891567e-05, "loss": 1.0057, "step": 228950 }, { "epoch": 1.462760180417311, "grad_norm": 0.5346214771270752, "learning_rate": 1.680586404073601e-05, "loss": 0.9359, "step": 228960 }, { "epoch": 1.4628240675670496, "grad_norm": 0.8829362392425537, "learning_rate": 1.6802111798874133e-05, "loss": 0.792, "step": 228970 }, { "epoch": 1.4628879547167883, "grad_norm": 1.4143726825714111, "learning_rate": 1.6798359891343697e-05, "loss": 0.8763, "step": 228980 }, { "epoch": 1.462951841866527, "grad_norm": 1.787758231163025, "learning_rate": 1.6794608318182508e-05, "loss": 0.9209, "step": 228990 }, { "epoch": 1.4630157290162655, "grad_norm": 0.9715995788574219, "learning_rate": 1.6790857079428334e-05, "loss": 0.6902, "step": 229000 }, { "epoch": 1.4630796161660045, "grad_norm": 0.8735285401344299, "learning_rate": 1.6787106175118973e-05, "loss": 1.0034, "step": 229010 }, { "epoch": 1.463143503315743, "grad_norm": 2.247833728790283, "learning_rate": 1.6783355605292166e-05, "loss": 0.9573, "step": 229020 }, { "epoch": 1.4632073904654819, "grad_norm": 1.173517107963562, "learning_rate": 1.6779605369985724e-05, "loss": 0.8683, "step": 229030 }, { "epoch": 1.4632712776152204, "grad_norm": 0.9730435609817505, "learning_rate": 1.6775855469237377e-05, "loss": 0.7438, "step": 229040 }, { "epoch": 1.4633351647649593, "grad_norm": 1.0451921224594116, "learning_rate": 1.6772105903084924e-05, "loss": 1.0182, "step": 229050 }, { "epoch": 1.4633990519146978, "grad_norm": 0.7566806077957153, "learning_rate": 1.6768356671566098e-05, "loss": 0.8164, "step": 229060 }, { "epoch": 1.4634629390644367, "grad_norm": 0.6601537466049194, "learning_rate": 1.6764607774718666e-05, "loss": 0.7758, "step": 229070 }, { "epoch": 1.4635268262141752, "grad_norm": 0.9205562472343445, "learning_rate": 1.6760859212580403e-05, "loss": 0.69, "step": 229080 }, { "epoch": 1.463590713363914, "grad_norm": 1.6375494003295898, "learning_rate": 1.6757110985189035e-05, "loss": 0.8699, "step": 229090 }, { "epoch": 1.4636546005136526, "grad_norm": 1.4572193622589111, "learning_rate": 1.675336309258233e-05, "loss": 0.7878, "step": 229100 }, { "epoch": 1.4637184876633915, "grad_norm": 0.8702135682106018, "learning_rate": 1.6749615534798003e-05, "loss": 0.6901, "step": 229110 }, { "epoch": 1.46378237481313, "grad_norm": 0.8251786828041077, "learning_rate": 1.674586831187383e-05, "loss": 0.7376, "step": 229120 }, { "epoch": 1.4638462619628687, "grad_norm": 0.8899903297424316, "learning_rate": 1.674212142384752e-05, "loss": 0.7067, "step": 229130 }, { "epoch": 1.4639101491126074, "grad_norm": 0.600141704082489, "learning_rate": 1.673837487075683e-05, "loss": 0.8646, "step": 229140 }, { "epoch": 1.4639740362623461, "grad_norm": 1.1960006952285767, "learning_rate": 1.673462865263948e-05, "loss": 0.8086, "step": 229150 }, { "epoch": 1.4640379234120848, "grad_norm": 2.2285172939300537, "learning_rate": 1.67308827695332e-05, "loss": 0.6467, "step": 229160 }, { "epoch": 1.4641018105618235, "grad_norm": 1.7123388051986694, "learning_rate": 1.6727137221475696e-05, "loss": 0.9564, "step": 229170 }, { "epoch": 1.4641656977115622, "grad_norm": 1.2680044174194336, "learning_rate": 1.6723392008504707e-05, "loss": 0.9674, "step": 229180 }, { "epoch": 1.464229584861301, "grad_norm": 2.0865049362182617, "learning_rate": 1.6719647130657966e-05, "loss": 0.8682, "step": 229190 }, { "epoch": 1.4642934720110397, "grad_norm": 0.9947324395179749, "learning_rate": 1.6715902587973154e-05, "loss": 0.6539, "step": 229200 }, { "epoch": 1.4643573591607784, "grad_norm": 0.7031934261322021, "learning_rate": 1.6712158380488007e-05, "loss": 0.9585, "step": 229210 }, { "epoch": 1.464421246310517, "grad_norm": 1.1023660898208618, "learning_rate": 1.670841450824021e-05, "loss": 0.8712, "step": 229220 }, { "epoch": 1.4644851334602558, "grad_norm": 0.5734823942184448, "learning_rate": 1.67046709712675e-05, "loss": 0.7178, "step": 229230 }, { "epoch": 1.4645490206099945, "grad_norm": 1.032142996788025, "learning_rate": 1.6700927769607544e-05, "loss": 0.7241, "step": 229240 }, { "epoch": 1.4646129077597332, "grad_norm": 1.1997605562210083, "learning_rate": 1.6697184903298062e-05, "loss": 0.7722, "step": 229250 }, { "epoch": 1.464676794909472, "grad_norm": 0.8793839812278748, "learning_rate": 1.6693442372376727e-05, "loss": 0.9902, "step": 229260 }, { "epoch": 1.4647406820592106, "grad_norm": 0.8298856019973755, "learning_rate": 1.6689700176881256e-05, "loss": 0.7513, "step": 229270 }, { "epoch": 1.4648045692089493, "grad_norm": 0.7414980530738831, "learning_rate": 1.6685958316849304e-05, "loss": 0.8875, "step": 229280 }, { "epoch": 1.464868456358688, "grad_norm": 1.0754121541976929, "learning_rate": 1.6682216792318595e-05, "loss": 0.8855, "step": 229290 }, { "epoch": 1.4649323435084267, "grad_norm": 0.7558190226554871, "learning_rate": 1.6678475603326767e-05, "loss": 0.7526, "step": 229300 }, { "epoch": 1.4649962306581654, "grad_norm": 2.532780170440674, "learning_rate": 1.667473474991153e-05, "loss": 1.0711, "step": 229310 }, { "epoch": 1.4650601178079041, "grad_norm": 0.7748706340789795, "learning_rate": 1.667099423211053e-05, "loss": 0.7864, "step": 229320 }, { "epoch": 1.4651240049576428, "grad_norm": 0.6396449208259583, "learning_rate": 1.6667254049961472e-05, "loss": 0.8154, "step": 229330 }, { "epoch": 1.4651878921073815, "grad_norm": 0.7421424388885498, "learning_rate": 1.6663514203501985e-05, "loss": 0.8693, "step": 229340 }, { "epoch": 1.4652517792571202, "grad_norm": 1.017627239227295, "learning_rate": 1.6659774692769763e-05, "loss": 0.8065, "step": 229350 }, { "epoch": 1.465315666406859, "grad_norm": 1.8426539897918701, "learning_rate": 1.6656035517802442e-05, "loss": 0.882, "step": 229360 }, { "epoch": 1.4653795535565977, "grad_norm": 0.957169234752655, "learning_rate": 1.6652296678637704e-05, "loss": 0.7843, "step": 229370 }, { "epoch": 1.4654434407063364, "grad_norm": 1.5271551609039307, "learning_rate": 1.6648558175313167e-05, "loss": 0.8364, "step": 229380 }, { "epoch": 1.465507327856075, "grad_norm": 0.784913957118988, "learning_rate": 1.664482000786651e-05, "loss": 0.7729, "step": 229390 }, { "epoch": 1.4655712150058138, "grad_norm": 0.7997502088546753, "learning_rate": 1.6641082176335383e-05, "loss": 0.9061, "step": 229400 }, { "epoch": 1.4656351021555525, "grad_norm": 0.7640141844749451, "learning_rate": 1.6637344680757406e-05, "loss": 0.8026, "step": 229410 }, { "epoch": 1.4656989893052912, "grad_norm": 1.3364169597625732, "learning_rate": 1.663360752117024e-05, "loss": 0.9229, "step": 229420 }, { "epoch": 1.46576287645503, "grad_norm": 1.1318371295928955, "learning_rate": 1.6629870697611503e-05, "loss": 0.9731, "step": 229430 }, { "epoch": 1.4658267636047686, "grad_norm": 0.7715478539466858, "learning_rate": 1.6626134210118848e-05, "loss": 0.8884, "step": 229440 }, { "epoch": 1.4658906507545073, "grad_norm": 2.0042197704315186, "learning_rate": 1.6622398058729883e-05, "loss": 1.0224, "step": 229450 }, { "epoch": 1.465954537904246, "grad_norm": 3.34346342086792, "learning_rate": 1.6618662243482263e-05, "loss": 0.908, "step": 229460 }, { "epoch": 1.4660184250539847, "grad_norm": 0.6757034659385681, "learning_rate": 1.6614926764413574e-05, "loss": 1.0681, "step": 229470 }, { "epoch": 1.4660823122037234, "grad_norm": 1.661514401435852, "learning_rate": 1.6611191621561467e-05, "loss": 1.1794, "step": 229480 }, { "epoch": 1.466146199353462, "grad_norm": 0.9512356519699097, "learning_rate": 1.6607456814963534e-05, "loss": 0.7131, "step": 229490 }, { "epoch": 1.4662100865032008, "grad_norm": 1.0155463218688965, "learning_rate": 1.6603722344657413e-05, "loss": 0.726, "step": 229500 }, { "epoch": 1.4662739736529393, "grad_norm": 1.3397324085235596, "learning_rate": 1.6599988210680683e-05, "loss": 0.9391, "step": 229510 }, { "epoch": 1.4663378608026783, "grad_norm": 1.2345083951950073, "learning_rate": 1.659625441307099e-05, "loss": 0.8885, "step": 229520 }, { "epoch": 1.4664017479524167, "grad_norm": 1.044854998588562, "learning_rate": 1.659252095186589e-05, "loss": 0.778, "step": 229530 }, { "epoch": 1.4664656351021557, "grad_norm": 0.9372422099113464, "learning_rate": 1.6588787827103025e-05, "loss": 0.7025, "step": 229540 }, { "epoch": 1.4665295222518941, "grad_norm": 1.096632719039917, "learning_rate": 1.658505503881996e-05, "loss": 0.9675, "step": 229550 }, { "epoch": 1.466593409401633, "grad_norm": 1.0477042198181152, "learning_rate": 1.6581322587054304e-05, "loss": 0.8676, "step": 229560 }, { "epoch": 1.4666572965513716, "grad_norm": 1.234499216079712, "learning_rate": 1.6577590471843628e-05, "loss": 0.9303, "step": 229570 }, { "epoch": 1.4667211837011105, "grad_norm": 0.9738841652870178, "learning_rate": 1.6573858693225536e-05, "loss": 0.7914, "step": 229580 }, { "epoch": 1.466785070850849, "grad_norm": 0.8118196725845337, "learning_rate": 1.6570127251237622e-05, "loss": 0.9868, "step": 229590 }, { "epoch": 1.466848958000588, "grad_norm": 0.6751111149787903, "learning_rate": 1.6566396145917424e-05, "loss": 0.801, "step": 229600 }, { "epoch": 1.4669128451503264, "grad_norm": 1.2858651876449585, "learning_rate": 1.656266537730256e-05, "loss": 0.932, "step": 229610 }, { "epoch": 1.466976732300065, "grad_norm": 0.7679579854011536, "learning_rate": 1.6558934945430564e-05, "loss": 1.07, "step": 229620 }, { "epoch": 1.4670406194498038, "grad_norm": 0.7404538989067078, "learning_rate": 1.6555204850339047e-05, "loss": 0.7723, "step": 229630 }, { "epoch": 1.4671045065995425, "grad_norm": 0.8464112281799316, "learning_rate": 1.6551475092065543e-05, "loss": 0.8583, "step": 229640 }, { "epoch": 1.4671683937492812, "grad_norm": 1.2769086360931396, "learning_rate": 1.654774567064763e-05, "loss": 0.8978, "step": 229650 }, { "epoch": 1.46723228089902, "grad_norm": 0.8352043032646179, "learning_rate": 1.6544016586122835e-05, "loss": 0.7327, "step": 229660 }, { "epoch": 1.4672961680487586, "grad_norm": 0.8383384943008423, "learning_rate": 1.6540287838528756e-05, "loss": 1.1571, "step": 229670 }, { "epoch": 1.4673600551984973, "grad_norm": 1.0359272956848145, "learning_rate": 1.653655942790291e-05, "loss": 0.8446, "step": 229680 }, { "epoch": 1.467423942348236, "grad_norm": 2.1989591121673584, "learning_rate": 1.6532831354282874e-05, "loss": 1.0813, "step": 229690 }, { "epoch": 1.4674878294979747, "grad_norm": 2.8567075729370117, "learning_rate": 1.6529103617706165e-05, "loss": 0.9508, "step": 229700 }, { "epoch": 1.4675517166477134, "grad_norm": 1.0511747598648071, "learning_rate": 1.652537621821034e-05, "loss": 0.7498, "step": 229710 }, { "epoch": 1.4676156037974522, "grad_norm": 1.0333011150360107, "learning_rate": 1.6521649155832953e-05, "loss": 0.8748, "step": 229720 }, { "epoch": 1.4676794909471909, "grad_norm": 1.0516149997711182, "learning_rate": 1.6517922430611503e-05, "loss": 0.9187, "step": 229730 }, { "epoch": 1.4677433780969296, "grad_norm": 0.6968880891799927, "learning_rate": 1.6514196042583556e-05, "loss": 0.8594, "step": 229740 }, { "epoch": 1.4678072652466683, "grad_norm": 1.0043814182281494, "learning_rate": 1.6510469991786608e-05, "loss": 0.6192, "step": 229750 }, { "epoch": 1.467871152396407, "grad_norm": 0.7312933802604675, "learning_rate": 1.6506744278258217e-05, "loss": 0.9998, "step": 229760 }, { "epoch": 1.4679350395461457, "grad_norm": 1.1748160123825073, "learning_rate": 1.6503018902035872e-05, "loss": 1.02, "step": 229770 }, { "epoch": 1.4679989266958844, "grad_norm": 0.9027350544929504, "learning_rate": 1.649929386315712e-05, "loss": 0.6065, "step": 229780 }, { "epoch": 1.468062813845623, "grad_norm": 0.8316593766212463, "learning_rate": 1.6495569161659454e-05, "loss": 1.1349, "step": 229790 }, { "epoch": 1.4681267009953618, "grad_norm": 0.7100276947021484, "learning_rate": 1.6491844797580396e-05, "loss": 0.8488, "step": 229800 }, { "epoch": 1.4681905881451005, "grad_norm": 0.6455823183059692, "learning_rate": 1.648812077095744e-05, "loss": 0.9165, "step": 229810 }, { "epoch": 1.4682544752948392, "grad_norm": 0.9278150796890259, "learning_rate": 1.6484397081828105e-05, "loss": 0.9549, "step": 229820 }, { "epoch": 1.468318362444578, "grad_norm": 0.804965615272522, "learning_rate": 1.6480673730229885e-05, "loss": 0.8013, "step": 229830 }, { "epoch": 1.4683822495943166, "grad_norm": 2.876382350921631, "learning_rate": 1.6476950716200284e-05, "loss": 1.0493, "step": 229840 }, { "epoch": 1.4684461367440553, "grad_norm": 0.7755618691444397, "learning_rate": 1.6473228039776782e-05, "loss": 0.9287, "step": 229850 }, { "epoch": 1.468510023893794, "grad_norm": 1.1979588270187378, "learning_rate": 1.646950570099689e-05, "loss": 1.0427, "step": 229860 }, { "epoch": 1.4685739110435327, "grad_norm": 0.9614741802215576, "learning_rate": 1.6465783699898074e-05, "loss": 0.8418, "step": 229870 }, { "epoch": 1.4686377981932714, "grad_norm": 0.9963449835777283, "learning_rate": 1.646206203651784e-05, "loss": 0.8061, "step": 229880 }, { "epoch": 1.4687016853430102, "grad_norm": 1.0735199451446533, "learning_rate": 1.6458340710893632e-05, "loss": 0.9624, "step": 229890 }, { "epoch": 1.4687655724927489, "grad_norm": 0.8822026252746582, "learning_rate": 1.6454619723062976e-05, "loss": 1.2394, "step": 229900 }, { "epoch": 1.4688294596424876, "grad_norm": 0.9153038859367371, "learning_rate": 1.6450899073063303e-05, "loss": 0.9479, "step": 229910 }, { "epoch": 1.4688933467922263, "grad_norm": 1.3811986446380615, "learning_rate": 1.6447178760932096e-05, "loss": 0.9423, "step": 229920 }, { "epoch": 1.468957233941965, "grad_norm": 1.9339368343353271, "learning_rate": 1.6443458786706845e-05, "loss": 0.8623, "step": 229930 }, { "epoch": 1.4690211210917037, "grad_norm": 1.1579439640045166, "learning_rate": 1.6439739150424982e-05, "loss": 0.8004, "step": 229940 }, { "epoch": 1.4690850082414424, "grad_norm": 0.6219136714935303, "learning_rate": 1.643601985212399e-05, "loss": 0.8937, "step": 229950 }, { "epoch": 1.469148895391181, "grad_norm": 0.7490658760070801, "learning_rate": 1.64323008918413e-05, "loss": 0.8881, "step": 229960 }, { "epoch": 1.4692127825409198, "grad_norm": 0.9120808839797974, "learning_rate": 1.64285822696144e-05, "loss": 1.4033, "step": 229970 }, { "epoch": 1.4692766696906583, "grad_norm": 1.0183281898498535, "learning_rate": 1.6424863985480697e-05, "loss": 0.9336, "step": 229980 }, { "epoch": 1.4693405568403972, "grad_norm": 1.4285860061645508, "learning_rate": 1.6421146039477685e-05, "loss": 1.0088, "step": 229990 }, { "epoch": 1.4694044439901357, "grad_norm": 0.916454553604126, "learning_rate": 1.641742843164276e-05, "loss": 1.2467, "step": 230000 }, { "epoch": 1.4694683311398746, "grad_norm": 0.772546648979187, "learning_rate": 1.64137111620134e-05, "loss": 0.8583, "step": 230010 }, { "epoch": 1.4695322182896131, "grad_norm": 0.8761088252067566, "learning_rate": 1.640999423062701e-05, "loss": 0.969, "step": 230020 }, { "epoch": 1.469596105439352, "grad_norm": 1.112427830696106, "learning_rate": 1.6406277637521055e-05, "loss": 1.0489, "step": 230030 }, { "epoch": 1.4696599925890905, "grad_norm": 0.8705644607543945, "learning_rate": 1.6402561382732933e-05, "loss": 0.9679, "step": 230040 }, { "epoch": 1.4697238797388295, "grad_norm": 1.6397324800491333, "learning_rate": 1.6398845466300094e-05, "loss": 1.2361, "step": 230050 }, { "epoch": 1.469787766888568, "grad_norm": 2.675877332687378, "learning_rate": 1.6395129888259942e-05, "loss": 0.8386, "step": 230060 }, { "epoch": 1.4698516540383069, "grad_norm": 1.344370722770691, "learning_rate": 1.6391414648649915e-05, "loss": 0.76, "step": 230070 }, { "epoch": 1.4699155411880453, "grad_norm": 1.809248447418213, "learning_rate": 1.6387699747507402e-05, "loss": 0.9025, "step": 230080 }, { "epoch": 1.469979428337784, "grad_norm": 0.7666621804237366, "learning_rate": 1.638398518486985e-05, "loss": 0.9313, "step": 230090 }, { "epoch": 1.4700433154875228, "grad_norm": 0.8414475917816162, "learning_rate": 1.638027096077463e-05, "loss": 0.844, "step": 230100 }, { "epoch": 1.4701072026372615, "grad_norm": 0.6243281960487366, "learning_rate": 1.637655707525917e-05, "loss": 0.8231, "step": 230110 }, { "epoch": 1.4701710897870002, "grad_norm": 0.6101037859916687, "learning_rate": 1.637284352836089e-05, "loss": 0.9168, "step": 230120 }, { "epoch": 1.4702349769367389, "grad_norm": 1.7307742834091187, "learning_rate": 1.636913032011715e-05, "loss": 0.9839, "step": 230130 }, { "epoch": 1.4702988640864776, "grad_norm": 0.975806713104248, "learning_rate": 1.6365417450565374e-05, "loss": 0.992, "step": 230140 }, { "epoch": 1.4703627512362163, "grad_norm": 1.1599698066711426, "learning_rate": 1.636170491974292e-05, "loss": 0.9875, "step": 230150 }, { "epoch": 1.470426638385955, "grad_norm": 0.5582944750785828, "learning_rate": 1.635799272768722e-05, "loss": 0.8191, "step": 230160 }, { "epoch": 1.4704905255356937, "grad_norm": 1.2626965045928955, "learning_rate": 1.6354280874435624e-05, "loss": 0.7218, "step": 230170 }, { "epoch": 1.4705544126854324, "grad_norm": 0.8063907623291016, "learning_rate": 1.6350569360025538e-05, "loss": 0.7508, "step": 230180 }, { "epoch": 1.4706182998351711, "grad_norm": 0.47269266843795776, "learning_rate": 1.634685818449432e-05, "loss": 0.9022, "step": 230190 }, { "epoch": 1.4706821869849098, "grad_norm": 1.0264742374420166, "learning_rate": 1.634314734787936e-05, "loss": 0.8665, "step": 230200 }, { "epoch": 1.4707460741346485, "grad_norm": 1.1556494235992432, "learning_rate": 1.6339436850218015e-05, "loss": 0.7588, "step": 230210 }, { "epoch": 1.4708099612843872, "grad_norm": 0.7982380986213684, "learning_rate": 1.6335726691547674e-05, "loss": 1.1942, "step": 230220 }, { "epoch": 1.470873848434126, "grad_norm": 0.853100597858429, "learning_rate": 1.6332016871905676e-05, "loss": 0.857, "step": 230230 }, { "epoch": 1.4709377355838646, "grad_norm": 0.8381783366203308, "learning_rate": 1.6328307391329394e-05, "loss": 1.019, "step": 230240 }, { "epoch": 1.4710016227336034, "grad_norm": 1.0367997884750366, "learning_rate": 1.6324598249856204e-05, "loss": 1.0161, "step": 230250 }, { "epoch": 1.471065509883342, "grad_norm": 1.1487911939620972, "learning_rate": 1.6320889447523425e-05, "loss": 0.8627, "step": 230260 }, { "epoch": 1.4711293970330808, "grad_norm": 1.105258822441101, "learning_rate": 1.6317180984368442e-05, "loss": 0.9296, "step": 230270 }, { "epoch": 1.4711932841828195, "grad_norm": 0.8666543960571289, "learning_rate": 1.631347286042857e-05, "loss": 0.9805, "step": 230280 }, { "epoch": 1.4712571713325582, "grad_norm": 1.2708656787872314, "learning_rate": 1.630976507574119e-05, "loss": 0.8902, "step": 230290 }, { "epoch": 1.4713210584822969, "grad_norm": 0.9094721078872681, "learning_rate": 1.6306057630343595e-05, "loss": 0.9106, "step": 230300 }, { "epoch": 1.4713849456320356, "grad_norm": 0.9870732426643372, "learning_rate": 1.6302350524273175e-05, "loss": 0.8808, "step": 230310 }, { "epoch": 1.4714488327817743, "grad_norm": 0.5650733113288879, "learning_rate": 1.629864375756722e-05, "loss": 1.1285, "step": 230320 }, { "epoch": 1.471512719931513, "grad_norm": 1.3587806224822998, "learning_rate": 1.6294937330263093e-05, "loss": 0.9673, "step": 230330 }, { "epoch": 1.4715766070812517, "grad_norm": 1.2703903913497925, "learning_rate": 1.629123124239809e-05, "loss": 0.7315, "step": 230340 }, { "epoch": 1.4716404942309904, "grad_norm": 0.8942323923110962, "learning_rate": 1.6287525494009565e-05, "loss": 0.8602, "step": 230350 }, { "epoch": 1.4717043813807291, "grad_norm": 0.8783580660820007, "learning_rate": 1.628382008513481e-05, "loss": 0.915, "step": 230360 }, { "epoch": 1.4717682685304678, "grad_norm": 0.8455802202224731, "learning_rate": 1.628011501581117e-05, "loss": 0.7608, "step": 230370 }, { "epoch": 1.4718321556802065, "grad_norm": 0.9563354253768921, "learning_rate": 1.627641028607593e-05, "loss": 0.799, "step": 230380 }, { "epoch": 1.4718960428299452, "grad_norm": 1.0895625352859497, "learning_rate": 1.6272705895966428e-05, "loss": 0.7895, "step": 230390 }, { "epoch": 1.471959929979684, "grad_norm": 1.0790303945541382, "learning_rate": 1.626900184551994e-05, "loss": 0.8456, "step": 230400 }, { "epoch": 1.4720238171294227, "grad_norm": 1.095496654510498, "learning_rate": 1.62652981347738e-05, "loss": 0.9899, "step": 230410 }, { "epoch": 1.4720877042791614, "grad_norm": 1.1831773519515991, "learning_rate": 1.6261594763765282e-05, "loss": 0.9879, "step": 230420 }, { "epoch": 1.4721515914289, "grad_norm": 0.7561997771263123, "learning_rate": 1.625789173253168e-05, "loss": 0.9086, "step": 230430 }, { "epoch": 1.4722154785786388, "grad_norm": 1.0318355560302734, "learning_rate": 1.6254189041110328e-05, "loss": 0.9287, "step": 230440 }, { "epoch": 1.4722793657283775, "grad_norm": 0.9705587029457092, "learning_rate": 1.6250486689538465e-05, "loss": 0.8275, "step": 230450 }, { "epoch": 1.4723432528781162, "grad_norm": 0.8411953449249268, "learning_rate": 1.6246784677853415e-05, "loss": 0.8982, "step": 230460 }, { "epoch": 1.4724071400278547, "grad_norm": 0.7869654297828674, "learning_rate": 1.624308300609243e-05, "loss": 0.8577, "step": 230470 }, { "epoch": 1.4724710271775936, "grad_norm": 1.2532565593719482, "learning_rate": 1.6239381674292813e-05, "loss": 1.009, "step": 230480 }, { "epoch": 1.472534914327332, "grad_norm": 1.5746049880981445, "learning_rate": 1.6235680682491823e-05, "loss": 0.9964, "step": 230490 }, { "epoch": 1.472598801477071, "grad_norm": 0.7447236180305481, "learning_rate": 1.6231980030726745e-05, "loss": 0.8126, "step": 230500 }, { "epoch": 1.4726626886268095, "grad_norm": 0.9821268916130066, "learning_rate": 1.6228279719034835e-05, "loss": 0.7112, "step": 230510 }, { "epoch": 1.4727265757765484, "grad_norm": 1.128045678138733, "learning_rate": 1.6224579747453372e-05, "loss": 0.9051, "step": 230520 }, { "epoch": 1.472790462926287, "grad_norm": 1.0752300024032593, "learning_rate": 1.6220880116019598e-05, "loss": 1.0858, "step": 230530 }, { "epoch": 1.4728543500760258, "grad_norm": 1.2056881189346313, "learning_rate": 1.6217180824770807e-05, "loss": 0.8552, "step": 230540 }, { "epoch": 1.4729182372257643, "grad_norm": 1.2044570446014404, "learning_rate": 1.6213481873744207e-05, "loss": 0.699, "step": 230550 }, { "epoch": 1.4729821243755032, "grad_norm": 0.8839747905731201, "learning_rate": 1.6209783262977095e-05, "loss": 0.8121, "step": 230560 }, { "epoch": 1.4730460115252417, "grad_norm": 1.1134631633758545, "learning_rate": 1.6206084992506675e-05, "loss": 0.6309, "step": 230570 }, { "epoch": 1.4731098986749804, "grad_norm": 2.019157648086548, "learning_rate": 1.6202387062370238e-05, "loss": 0.6585, "step": 230580 }, { "epoch": 1.4731737858247191, "grad_norm": 1.0488548278808594, "learning_rate": 1.619868947260499e-05, "loss": 1.065, "step": 230590 }, { "epoch": 1.4732376729744578, "grad_norm": 1.4154844284057617, "learning_rate": 1.619499222324819e-05, "loss": 0.6922, "step": 230600 }, { "epoch": 1.4733015601241966, "grad_norm": 0.7536170482635498, "learning_rate": 1.6191295314337062e-05, "loss": 0.7893, "step": 230610 }, { "epoch": 1.4733654472739353, "grad_norm": 0.8556029796600342, "learning_rate": 1.6187598745908826e-05, "loss": 0.7962, "step": 230620 }, { "epoch": 1.473429334423674, "grad_norm": 1.414725661277771, "learning_rate": 1.6183902518000744e-05, "loss": 0.7572, "step": 230630 }, { "epoch": 1.4734932215734127, "grad_norm": 1.4327750205993652, "learning_rate": 1.6180206630649996e-05, "loss": 0.8687, "step": 230640 }, { "epoch": 1.4735571087231514, "grad_norm": 0.8788250088691711, "learning_rate": 1.6176511083893843e-05, "loss": 0.8099, "step": 230650 }, { "epoch": 1.47362099587289, "grad_norm": 0.7542177438735962, "learning_rate": 1.6172815877769472e-05, "loss": 0.8901, "step": 230660 }, { "epoch": 1.4736848830226288, "grad_norm": 0.8953860402107239, "learning_rate": 1.616912101231412e-05, "loss": 1.1104, "step": 230670 }, { "epoch": 1.4737487701723675, "grad_norm": 0.8468190431594849, "learning_rate": 1.6165426487564972e-05, "loss": 1.037, "step": 230680 }, { "epoch": 1.4738126573221062, "grad_norm": 0.5810591578483582, "learning_rate": 1.6161732303559267e-05, "loss": 0.755, "step": 230690 }, { "epoch": 1.473876544471845, "grad_norm": 0.9378907084465027, "learning_rate": 1.615803846033418e-05, "loss": 0.9148, "step": 230700 }, { "epoch": 1.4739404316215836, "grad_norm": 0.8563784956932068, "learning_rate": 1.6154344957926937e-05, "loss": 1.0995, "step": 230710 }, { "epoch": 1.4740043187713223, "grad_norm": 0.8726912140846252, "learning_rate": 1.6150651796374706e-05, "loss": 0.922, "step": 230720 }, { "epoch": 1.474068205921061, "grad_norm": 1.8336883783340454, "learning_rate": 1.614695897571471e-05, "loss": 1.0036, "step": 230730 }, { "epoch": 1.4741320930707997, "grad_norm": 3.0733964443206787, "learning_rate": 1.6143266495984105e-05, "loss": 0.7163, "step": 230740 }, { "epoch": 1.4741959802205384, "grad_norm": 0.9470083713531494, "learning_rate": 1.6139574357220116e-05, "loss": 0.7829, "step": 230750 }, { "epoch": 1.4742598673702771, "grad_norm": 0.9975496530532837, "learning_rate": 1.613588255945989e-05, "loss": 0.7918, "step": 230760 }, { "epoch": 1.4743237545200159, "grad_norm": 1.0645257234573364, "learning_rate": 1.6132191102740624e-05, "loss": 1.0736, "step": 230770 }, { "epoch": 1.4743876416697546, "grad_norm": 0.9394400715827942, "learning_rate": 1.612849998709951e-05, "loss": 0.8007, "step": 230780 }, { "epoch": 1.4744515288194933, "grad_norm": 0.9448645710945129, "learning_rate": 1.612480921257369e-05, "loss": 0.85, "step": 230790 }, { "epoch": 1.474515415969232, "grad_norm": 1.0276811122894287, "learning_rate": 1.6121118779200356e-05, "loss": 0.8035, "step": 230800 }, { "epoch": 1.4745793031189707, "grad_norm": 0.8116633892059326, "learning_rate": 1.6117428687016656e-05, "loss": 0.6989, "step": 230810 }, { "epoch": 1.4746431902687094, "grad_norm": 0.8088856935501099, "learning_rate": 1.6113738936059774e-05, "loss": 0.9791, "step": 230820 }, { "epoch": 1.474707077418448, "grad_norm": 1.0470901727676392, "learning_rate": 1.6110049526366843e-05, "loss": 0.8447, "step": 230830 }, { "epoch": 1.4747709645681868, "grad_norm": 1.2791444063186646, "learning_rate": 1.610636045797505e-05, "loss": 0.6795, "step": 230840 }, { "epoch": 1.4748348517179255, "grad_norm": 0.9264209270477295, "learning_rate": 1.610267173092151e-05, "loss": 0.7939, "step": 230850 }, { "epoch": 1.4748987388676642, "grad_norm": 0.9389378428459167, "learning_rate": 1.6098983345243405e-05, "loss": 0.6904, "step": 230860 }, { "epoch": 1.474962626017403, "grad_norm": 1.025505781173706, "learning_rate": 1.609529530097785e-05, "loss": 0.8899, "step": 230870 }, { "epoch": 1.4750265131671416, "grad_norm": 0.7423310279846191, "learning_rate": 1.609160759816203e-05, "loss": 0.9437, "step": 230880 }, { "epoch": 1.4750904003168803, "grad_norm": 0.9326030611991882, "learning_rate": 1.608792023683303e-05, "loss": 0.8262, "step": 230890 }, { "epoch": 1.475154287466619, "grad_norm": 1.2312084436416626, "learning_rate": 1.6084233217028033e-05, "loss": 0.7731, "step": 230900 }, { "epoch": 1.4752181746163577, "grad_norm": 0.9910838603973389, "learning_rate": 1.6080546538784124e-05, "loss": 0.965, "step": 230910 }, { "epoch": 1.4752820617660964, "grad_norm": 0.8265828490257263, "learning_rate": 1.6076860202138483e-05, "loss": 0.8836, "step": 230920 }, { "epoch": 1.4753459489158351, "grad_norm": 0.7746514081954956, "learning_rate": 1.6073174207128185e-05, "loss": 0.9498, "step": 230930 }, { "epoch": 1.4754098360655736, "grad_norm": 1.3665982484817505, "learning_rate": 1.606948855379039e-05, "loss": 0.8485, "step": 230940 }, { "epoch": 1.4754737232153126, "grad_norm": 0.9096262454986572, "learning_rate": 1.6065803242162182e-05, "loss": 1.0628, "step": 230950 }, { "epoch": 1.475537610365051, "grad_norm": 1.2939201593399048, "learning_rate": 1.6062118272280695e-05, "loss": 0.9087, "step": 230960 }, { "epoch": 1.47560149751479, "grad_norm": 0.8000929951667786, "learning_rate": 1.6058433644183056e-05, "loss": 0.9918, "step": 230970 }, { "epoch": 1.4756653846645285, "grad_norm": 1.0761744976043701, "learning_rate": 1.6054749357906336e-05, "loss": 1.0643, "step": 230980 }, { "epoch": 1.4757292718142674, "grad_norm": 0.8242314457893372, "learning_rate": 1.6051065413487672e-05, "loss": 0.7486, "step": 230990 }, { "epoch": 1.4757931589640059, "grad_norm": 1.2581071853637695, "learning_rate": 1.604738181096413e-05, "loss": 0.9587, "step": 231000 }, { "epoch": 1.4758570461137448, "grad_norm": 1.2790287733078003, "learning_rate": 1.6043698550372842e-05, "loss": 0.8309, "step": 231010 }, { "epoch": 1.4759209332634833, "grad_norm": 1.0953774452209473, "learning_rate": 1.6040015631750877e-05, "loss": 0.8387, "step": 231020 }, { "epoch": 1.4759848204132222, "grad_norm": 0.9266490340232849, "learning_rate": 1.6036333055135344e-05, "loss": 0.6225, "step": 231030 }, { "epoch": 1.4760487075629607, "grad_norm": 0.7282007336616516, "learning_rate": 1.603265082056331e-05, "loss": 1.0751, "step": 231040 }, { "epoch": 1.4761125947126996, "grad_norm": 0.6650501489639282, "learning_rate": 1.602896892807188e-05, "loss": 1.058, "step": 231050 }, { "epoch": 1.476176481862438, "grad_norm": 1.3024554252624512, "learning_rate": 1.6025287377698105e-05, "loss": 0.8926, "step": 231060 }, { "epoch": 1.4762403690121768, "grad_norm": 0.6745061278343201, "learning_rate": 1.6021606169479098e-05, "loss": 0.8608, "step": 231070 }, { "epoch": 1.4763042561619155, "grad_norm": 0.7933433055877686, "learning_rate": 1.6017925303451898e-05, "loss": 1.0975, "step": 231080 }, { "epoch": 1.4763681433116542, "grad_norm": 0.8587223291397095, "learning_rate": 1.6014244779653598e-05, "loss": 0.8711, "step": 231090 }, { "epoch": 1.476432030461393, "grad_norm": 1.0614300966262817, "learning_rate": 1.6010564598121257e-05, "loss": 1.0366, "step": 231100 }, { "epoch": 1.4764959176111316, "grad_norm": 1.0521320104599, "learning_rate": 1.6006884758891922e-05, "loss": 1.0693, "step": 231110 }, { "epoch": 1.4765598047608703, "grad_norm": 1.2860974073410034, "learning_rate": 1.600320526200268e-05, "loss": 0.6898, "step": 231120 }, { "epoch": 1.476623691910609, "grad_norm": 4.338561534881592, "learning_rate": 1.5999526107490557e-05, "loss": 0.8247, "step": 231130 }, { "epoch": 1.4766875790603478, "grad_norm": 0.7639591693878174, "learning_rate": 1.5995847295392636e-05, "loss": 1.2375, "step": 231140 }, { "epoch": 1.4767514662100865, "grad_norm": 0.5624483227729797, "learning_rate": 1.5992168825745933e-05, "loss": 1.2762, "step": 231150 }, { "epoch": 1.4768153533598252, "grad_norm": 0.9101094603538513, "learning_rate": 1.5988490698587534e-05, "loss": 0.9293, "step": 231160 }, { "epoch": 1.4768792405095639, "grad_norm": 1.2439855337142944, "learning_rate": 1.5984812913954435e-05, "loss": 0.8092, "step": 231170 }, { "epoch": 1.4769431276593026, "grad_norm": 0.6541301012039185, "learning_rate": 1.5981135471883713e-05, "loss": 0.7527, "step": 231180 }, { "epoch": 1.4770070148090413, "grad_norm": 1.1231944561004639, "learning_rate": 1.5977458372412373e-05, "loss": 0.7811, "step": 231190 }, { "epoch": 1.47707090195878, "grad_norm": 1.1022666692733765, "learning_rate": 1.5973781615577475e-05, "loss": 0.9701, "step": 231200 }, { "epoch": 1.4771347891085187, "grad_norm": 0.6377559900283813, "learning_rate": 1.5970105201416024e-05, "loss": 0.9913, "step": 231210 }, { "epoch": 1.4771986762582574, "grad_norm": 1.0564450025558472, "learning_rate": 1.5966429129965066e-05, "loss": 1.0922, "step": 231220 }, { "epoch": 1.477262563407996, "grad_norm": 1.1828644275665283, "learning_rate": 1.5962753401261595e-05, "loss": 0.9356, "step": 231230 }, { "epoch": 1.4773264505577348, "grad_norm": 1.4484585523605347, "learning_rate": 1.5959078015342654e-05, "loss": 1.1591, "step": 231240 }, { "epoch": 1.4773903377074735, "grad_norm": 0.9290236830711365, "learning_rate": 1.5955402972245235e-05, "loss": 0.9023, "step": 231250 }, { "epoch": 1.4774542248572122, "grad_norm": 0.9976933002471924, "learning_rate": 1.5951728272006377e-05, "loss": 0.7068, "step": 231260 }, { "epoch": 1.477518112006951, "grad_norm": 1.0432369709014893, "learning_rate": 1.594805391466305e-05, "loss": 0.8532, "step": 231270 }, { "epoch": 1.4775819991566896, "grad_norm": 0.8269700407981873, "learning_rate": 1.5944379900252287e-05, "loss": 0.9943, "step": 231280 }, { "epoch": 1.4776458863064283, "grad_norm": 1.000536322593689, "learning_rate": 1.594070622881109e-05, "loss": 0.7274, "step": 231290 }, { "epoch": 1.477709773456167, "grad_norm": 0.7897246479988098, "learning_rate": 1.5937032900376437e-05, "loss": 0.7177, "step": 231300 }, { "epoch": 1.4777736606059058, "grad_norm": 1.0336436033248901, "learning_rate": 1.5933359914985346e-05, "loss": 0.8322, "step": 231310 }, { "epoch": 1.4778375477556445, "grad_norm": 0.9047965407371521, "learning_rate": 1.5929687272674775e-05, "loss": 1.0183, "step": 231320 }, { "epoch": 1.4779014349053832, "grad_norm": 1.1535193920135498, "learning_rate": 1.5926014973481747e-05, "loss": 0.5858, "step": 231330 }, { "epoch": 1.4779653220551219, "grad_norm": 0.9659336805343628, "learning_rate": 1.5922343017443203e-05, "loss": 0.9971, "step": 231340 }, { "epoch": 1.4780292092048606, "grad_norm": 1.0284655094146729, "learning_rate": 1.5918671404596168e-05, "loss": 1.1042, "step": 231350 }, { "epoch": 1.4780930963545993, "grad_norm": 0.987298309803009, "learning_rate": 1.5915000134977583e-05, "loss": 0.8724, "step": 231360 }, { "epoch": 1.478156983504338, "grad_norm": 1.0232453346252441, "learning_rate": 1.5911329208624443e-05, "loss": 0.9563, "step": 231370 }, { "epoch": 1.4782208706540767, "grad_norm": 1.4888365268707275, "learning_rate": 1.5907658625573695e-05, "loss": 1.0152, "step": 231380 }, { "epoch": 1.4782847578038154, "grad_norm": 0.9709943532943726, "learning_rate": 1.5903988385862338e-05, "loss": 0.7399, "step": 231390 }, { "epoch": 1.4783486449535541, "grad_norm": 0.8116827011108398, "learning_rate": 1.59003184895273e-05, "loss": 1.0779, "step": 231400 }, { "epoch": 1.4784125321032928, "grad_norm": 0.9209198951721191, "learning_rate": 1.5896648936605568e-05, "loss": 0.6134, "step": 231410 }, { "epoch": 1.4784764192530315, "grad_norm": 1.203497290611267, "learning_rate": 1.5892979727134066e-05, "loss": 0.625, "step": 231420 }, { "epoch": 1.47854030640277, "grad_norm": 1.94428551197052, "learning_rate": 1.5889310861149786e-05, "loss": 0.7796, "step": 231430 }, { "epoch": 1.478604193552509, "grad_norm": 1.7343497276306152, "learning_rate": 1.5885642338689638e-05, "loss": 0.6879, "step": 231440 }, { "epoch": 1.4786680807022474, "grad_norm": 0.8659371733665466, "learning_rate": 1.58819741597906e-05, "loss": 0.7654, "step": 231450 }, { "epoch": 1.4787319678519864, "grad_norm": 1.5597984790802002, "learning_rate": 1.5878306324489584e-05, "loss": 1.0089, "step": 231460 }, { "epoch": 1.4787958550017248, "grad_norm": 1.151652455329895, "learning_rate": 1.587463883282356e-05, "loss": 0.6786, "step": 231470 }, { "epoch": 1.4788597421514638, "grad_norm": 1.3175287246704102, "learning_rate": 1.5870971684829426e-05, "loss": 0.7708, "step": 231480 }, { "epoch": 1.4789236293012022, "grad_norm": 0.7284002304077148, "learning_rate": 1.5867304880544133e-05, "loss": 0.7838, "step": 231490 }, { "epoch": 1.4789875164509412, "grad_norm": 1.0089588165283203, "learning_rate": 1.586363842000463e-05, "loss": 0.864, "step": 231500 }, { "epoch": 1.4790514036006797, "grad_norm": 0.8042920827865601, "learning_rate": 1.58599723032478e-05, "loss": 1.109, "step": 231510 }, { "epoch": 1.4791152907504186, "grad_norm": 1.1528286933898926, "learning_rate": 1.58563065303106e-05, "loss": 0.851, "step": 231520 }, { "epoch": 1.479179177900157, "grad_norm": 1.242755651473999, "learning_rate": 1.5852641101229914e-05, "loss": 1.0138, "step": 231530 }, { "epoch": 1.479243065049896, "grad_norm": 0.9317694306373596, "learning_rate": 1.5848976016042693e-05, "loss": 0.8296, "step": 231540 }, { "epoch": 1.4793069521996345, "grad_norm": 0.633034348487854, "learning_rate": 1.5845311274785812e-05, "loss": 0.6141, "step": 231550 }, { "epoch": 1.4793708393493732, "grad_norm": 0.689298152923584, "learning_rate": 1.5841646877496213e-05, "loss": 0.964, "step": 231560 }, { "epoch": 1.479434726499112, "grad_norm": 0.7968207597732544, "learning_rate": 1.5837982824210763e-05, "loss": 0.7972, "step": 231570 }, { "epoch": 1.4794986136488506, "grad_norm": 1.0918278694152832, "learning_rate": 1.583431911496641e-05, "loss": 0.9565, "step": 231580 }, { "epoch": 1.4795625007985893, "grad_norm": 0.8419539928436279, "learning_rate": 1.583065574979999e-05, "loss": 0.9199, "step": 231590 }, { "epoch": 1.479626387948328, "grad_norm": 2.540273666381836, "learning_rate": 1.582699272874843e-05, "loss": 1.1954, "step": 231600 }, { "epoch": 1.4796902750980667, "grad_norm": 0.8567215204238892, "learning_rate": 1.582333005184863e-05, "loss": 1.0835, "step": 231610 }, { "epoch": 1.4797541622478054, "grad_norm": 0.7937307357788086, "learning_rate": 1.581966771913745e-05, "loss": 1.0136, "step": 231620 }, { "epoch": 1.4798180493975441, "grad_norm": 0.733269214630127, "learning_rate": 1.5816005730651802e-05, "loss": 0.7862, "step": 231630 }, { "epoch": 1.4798819365472828, "grad_norm": 0.793347179889679, "learning_rate": 1.5812344086428526e-05, "loss": 0.8984, "step": 231640 }, { "epoch": 1.4799458236970215, "grad_norm": 0.6982327103614807, "learning_rate": 1.5808682786504546e-05, "loss": 0.8062, "step": 231650 }, { "epoch": 1.4800097108467603, "grad_norm": 0.6589148044586182, "learning_rate": 1.5805021830916695e-05, "loss": 0.874, "step": 231660 }, { "epoch": 1.480073597996499, "grad_norm": 0.7669758200645447, "learning_rate": 1.5801361219701865e-05, "loss": 0.8848, "step": 231670 }, { "epoch": 1.4801374851462377, "grad_norm": 0.7774679660797119, "learning_rate": 1.5797700952896903e-05, "loss": 0.8524, "step": 231680 }, { "epoch": 1.4802013722959764, "grad_norm": 0.9099416136741638, "learning_rate": 1.57940410305387e-05, "loss": 0.9443, "step": 231690 }, { "epoch": 1.480265259445715, "grad_norm": 1.5125083923339844, "learning_rate": 1.579038145266408e-05, "loss": 0.9126, "step": 231700 }, { "epoch": 1.4803291465954538, "grad_norm": 1.3232501745224, "learning_rate": 1.5786722219309924e-05, "loss": 0.8163, "step": 231710 }, { "epoch": 1.4803930337451925, "grad_norm": 1.0830581188201904, "learning_rate": 1.5783063330513066e-05, "loss": 0.8484, "step": 231720 }, { "epoch": 1.4804569208949312, "grad_norm": 0.8370949625968933, "learning_rate": 1.577940478631037e-05, "loss": 0.8852, "step": 231730 }, { "epoch": 1.48052080804467, "grad_norm": 0.7332857251167297, "learning_rate": 1.577574658673866e-05, "loss": 0.9428, "step": 231740 }, { "epoch": 1.4805846951944086, "grad_norm": 1.0560225248336792, "learning_rate": 1.5772088731834804e-05, "loss": 0.9721, "step": 231750 }, { "epoch": 1.4806485823441473, "grad_norm": 0.9546782970428467, "learning_rate": 1.5768431221635615e-05, "loss": 0.9743, "step": 231760 }, { "epoch": 1.480712469493886, "grad_norm": 1.2282410860061646, "learning_rate": 1.5764774056177957e-05, "loss": 0.7588, "step": 231770 }, { "epoch": 1.4807763566436247, "grad_norm": 0.8199455738067627, "learning_rate": 1.576111723549862e-05, "loss": 0.8427, "step": 231780 }, { "epoch": 1.4808402437933634, "grad_norm": 0.8073819279670715, "learning_rate": 1.5757460759634468e-05, "loss": 0.9772, "step": 231790 }, { "epoch": 1.4809041309431021, "grad_norm": 1.3390814065933228, "learning_rate": 1.5753804628622292e-05, "loss": 0.9785, "step": 231800 }, { "epoch": 1.4809680180928408, "grad_norm": 1.1008869409561157, "learning_rate": 1.5750148842498934e-05, "loss": 0.8565, "step": 231810 }, { "epoch": 1.4810319052425795, "grad_norm": 1.158144474029541, "learning_rate": 1.5746493401301225e-05, "loss": 0.9149, "step": 231820 }, { "epoch": 1.4810957923923183, "grad_norm": 0.8972355127334595, "learning_rate": 1.5742838305065945e-05, "loss": 1.0507, "step": 231830 }, { "epoch": 1.481159679542057, "grad_norm": 1.1104916334152222, "learning_rate": 1.5739183553829935e-05, "loss": 0.9427, "step": 231840 }, { "epoch": 1.4812235666917957, "grad_norm": 1.1483949422836304, "learning_rate": 1.5735529147629967e-05, "loss": 0.642, "step": 231850 }, { "epoch": 1.4812874538415344, "grad_norm": 0.7706599831581116, "learning_rate": 1.573187508650289e-05, "loss": 0.7168, "step": 231860 }, { "epoch": 1.481351340991273, "grad_norm": 0.9245922565460205, "learning_rate": 1.5728221370485452e-05, "loss": 0.8697, "step": 231870 }, { "epoch": 1.4814152281410118, "grad_norm": 1.2615220546722412, "learning_rate": 1.5724567999614493e-05, "loss": 0.9605, "step": 231880 }, { "epoch": 1.4814791152907505, "grad_norm": 1.3631330728530884, "learning_rate": 1.5720914973926766e-05, "loss": 0.6942, "step": 231890 }, { "epoch": 1.4815430024404892, "grad_norm": 1.0511490106582642, "learning_rate": 1.57172622934591e-05, "loss": 0.916, "step": 231900 }, { "epoch": 1.481606889590228, "grad_norm": 1.2899891138076782, "learning_rate": 1.5713609958248247e-05, "loss": 0.8863, "step": 231910 }, { "epoch": 1.4816707767399664, "grad_norm": 0.9524277448654175, "learning_rate": 1.570995796833102e-05, "loss": 0.7068, "step": 231920 }, { "epoch": 1.4817346638897053, "grad_norm": 0.857600212097168, "learning_rate": 1.5706306323744163e-05, "loss": 0.9073, "step": 231930 }, { "epoch": 1.4817985510394438, "grad_norm": 1.0148301124572754, "learning_rate": 1.570265502452449e-05, "loss": 0.8351, "step": 231940 }, { "epoch": 1.4818624381891827, "grad_norm": 1.1091690063476562, "learning_rate": 1.569900407070873e-05, "loss": 0.7854, "step": 231950 }, { "epoch": 1.4819263253389212, "grad_norm": 1.0870037078857422, "learning_rate": 1.5695353462333697e-05, "loss": 0.6657, "step": 231960 }, { "epoch": 1.4819902124886601, "grad_norm": 1.797438383102417, "learning_rate": 1.569170319943611e-05, "loss": 0.7927, "step": 231970 }, { "epoch": 1.4820540996383986, "grad_norm": 1.677264928817749, "learning_rate": 1.5688053282052767e-05, "loss": 0.8669, "step": 231980 }, { "epoch": 1.4821179867881376, "grad_norm": 2.095792770385742, "learning_rate": 1.5684403710220402e-05, "loss": 0.7737, "step": 231990 }, { "epoch": 1.482181873937876, "grad_norm": 1.3605451583862305, "learning_rate": 1.568075448397579e-05, "loss": 0.7819, "step": 232000 }, { "epoch": 1.482245761087615, "grad_norm": 1.5517606735229492, "learning_rate": 1.5677105603355656e-05, "loss": 0.8138, "step": 232010 }, { "epoch": 1.4823096482373534, "grad_norm": 0.6776690483093262, "learning_rate": 1.5673457068396763e-05, "loss": 1.1062, "step": 232020 }, { "epoch": 1.4823735353870924, "grad_norm": 0.8474202156066895, "learning_rate": 1.566980887913587e-05, "loss": 0.9323, "step": 232030 }, { "epoch": 1.4824374225368309, "grad_norm": 0.6449154019355774, "learning_rate": 1.5666161035609684e-05, "loss": 0.7905, "step": 232040 }, { "epoch": 1.4825013096865696, "grad_norm": 1.335546612739563, "learning_rate": 1.5662513537854978e-05, "loss": 1.0527, "step": 232050 }, { "epoch": 1.4825651968363083, "grad_norm": 0.9183993935585022, "learning_rate": 1.565886638590845e-05, "loss": 0.9243, "step": 232060 }, { "epoch": 1.482629083986047, "grad_norm": 1.0847032070159912, "learning_rate": 1.565521957980688e-05, "loss": 0.7541, "step": 232070 }, { "epoch": 1.4826929711357857, "grad_norm": 0.827590823173523, "learning_rate": 1.5651573119586928e-05, "loss": 0.8019, "step": 232080 }, { "epoch": 1.4827568582855244, "grad_norm": 0.6743472218513489, "learning_rate": 1.564792700528536e-05, "loss": 1.0403, "step": 232090 }, { "epoch": 1.482820745435263, "grad_norm": 1.1407054662704468, "learning_rate": 1.564428123693888e-05, "loss": 0.8504, "step": 232100 }, { "epoch": 1.4828846325850018, "grad_norm": 3.8805408477783203, "learning_rate": 1.564063581458422e-05, "loss": 0.7055, "step": 232110 }, { "epoch": 1.4829485197347405, "grad_norm": 1.3978850841522217, "learning_rate": 1.5636990738258066e-05, "loss": 0.9231, "step": 232120 }, { "epoch": 1.4830124068844792, "grad_norm": 0.8537045121192932, "learning_rate": 1.5633346007997147e-05, "loss": 0.7011, "step": 232130 }, { "epoch": 1.483076294034218, "grad_norm": 1.7283480167388916, "learning_rate": 1.5629701623838176e-05, "loss": 1.0444, "step": 232140 }, { "epoch": 1.4831401811839566, "grad_norm": 0.9529137015342712, "learning_rate": 1.5626057585817837e-05, "loss": 0.7992, "step": 232150 }, { "epoch": 1.4832040683336953, "grad_norm": 1.050119161605835, "learning_rate": 1.56227782475784e-05, "loss": 0.9607, "step": 232160 }, { "epoch": 1.483267955483434, "grad_norm": 2.544686794281006, "learning_rate": 1.5619134867322572e-05, "loss": 0.9267, "step": 232170 }, { "epoch": 1.4833318426331727, "grad_norm": 0.8504775762557983, "learning_rate": 1.561549183331182e-05, "loss": 0.6988, "step": 232180 }, { "epoch": 1.4833957297829115, "grad_norm": 0.766586422920227, "learning_rate": 1.561184914558279e-05, "loss": 0.6566, "step": 232190 }, { "epoch": 1.4834596169326502, "grad_norm": 0.7473874092102051, "learning_rate": 1.5608206804172203e-05, "loss": 0.6506, "step": 232200 }, { "epoch": 1.4835235040823889, "grad_norm": 0.6732820868492126, "learning_rate": 1.5604564809116735e-05, "loss": 0.836, "step": 232210 }, { "epoch": 1.4835873912321276, "grad_norm": 1.0650951862335205, "learning_rate": 1.5600923160453053e-05, "loss": 0.7287, "step": 232220 }, { "epoch": 1.4836512783818663, "grad_norm": 0.8236666917800903, "learning_rate": 1.559728185821781e-05, "loss": 0.8019, "step": 232230 }, { "epoch": 1.483715165531605, "grad_norm": 0.8332436084747314, "learning_rate": 1.5593640902447725e-05, "loss": 0.8261, "step": 232240 }, { "epoch": 1.4837790526813437, "grad_norm": 0.6860572695732117, "learning_rate": 1.5590000293179423e-05, "loss": 0.6497, "step": 232250 }, { "epoch": 1.4838429398310824, "grad_norm": 2.124331474304199, "learning_rate": 1.558636003044959e-05, "loss": 0.7933, "step": 232260 }, { "epoch": 1.483906826980821, "grad_norm": 1.946094274520874, "learning_rate": 1.5582720114294896e-05, "loss": 0.741, "step": 232270 }, { "epoch": 1.4839707141305598, "grad_norm": 0.8453720808029175, "learning_rate": 1.5579080544751974e-05, "loss": 0.7517, "step": 232280 }, { "epoch": 1.4840346012802985, "grad_norm": 0.9035594463348389, "learning_rate": 1.5575441321857503e-05, "loss": 0.7278, "step": 232290 }, { "epoch": 1.4840984884300372, "grad_norm": 0.765318751335144, "learning_rate": 1.5571802445648104e-05, "loss": 0.638, "step": 232300 }, { "epoch": 1.484162375579776, "grad_norm": 1.1105114221572876, "learning_rate": 1.556816391616045e-05, "loss": 0.7516, "step": 232310 }, { "epoch": 1.4842262627295146, "grad_norm": 0.6948691010475159, "learning_rate": 1.5564525733431163e-05, "loss": 0.7438, "step": 232320 }, { "epoch": 1.4842901498792533, "grad_norm": 1.0976312160491943, "learning_rate": 1.5560887897496907e-05, "loss": 0.5559, "step": 232330 }, { "epoch": 1.484354037028992, "grad_norm": 1.7774726152420044, "learning_rate": 1.5557250408394287e-05, "loss": 0.8347, "step": 232340 }, { "epoch": 1.4844179241787308, "grad_norm": 1.5234465599060059, "learning_rate": 1.5553613266159973e-05, "loss": 1.2875, "step": 232350 }, { "epoch": 1.4844818113284695, "grad_norm": 2.0421388149261475, "learning_rate": 1.554997647083055e-05, "loss": 0.8815, "step": 232360 }, { "epoch": 1.4845456984782082, "grad_norm": 0.7968341112136841, "learning_rate": 1.554634002244269e-05, "loss": 0.8076, "step": 232370 }, { "epoch": 1.4846095856279469, "grad_norm": 0.6294237971305847, "learning_rate": 1.5542703921032975e-05, "loss": 0.6622, "step": 232380 }, { "epoch": 1.4846734727776856, "grad_norm": 0.8361502885818481, "learning_rate": 1.5539068166638053e-05, "loss": 0.7356, "step": 232390 }, { "epoch": 1.4847373599274243, "grad_norm": 0.931691586971283, "learning_rate": 1.553543275929452e-05, "loss": 0.8819, "step": 232400 }, { "epoch": 1.4848012470771628, "grad_norm": 0.998232901096344, "learning_rate": 1.553179769903901e-05, "loss": 0.7952, "step": 232410 }, { "epoch": 1.4848651342269017, "grad_norm": 0.9501769542694092, "learning_rate": 1.552816298590809e-05, "loss": 0.7659, "step": 232420 }, { "epoch": 1.4849290213766402, "grad_norm": 0.7583515048027039, "learning_rate": 1.5524528619938417e-05, "loss": 0.8152, "step": 232430 }, { "epoch": 1.484992908526379, "grad_norm": 0.9512587189674377, "learning_rate": 1.5520894601166546e-05, "loss": 0.8091, "step": 232440 }, { "epoch": 1.4850567956761176, "grad_norm": 1.126081109046936, "learning_rate": 1.5517260929629097e-05, "loss": 1.0074, "step": 232450 }, { "epoch": 1.4851206828258565, "grad_norm": 1.1219438314437866, "learning_rate": 1.551362760536268e-05, "loss": 1.0365, "step": 232460 }, { "epoch": 1.485184569975595, "grad_norm": 0.7321421504020691, "learning_rate": 1.5509994628403846e-05, "loss": 0.8553, "step": 232470 }, { "epoch": 1.485248457125334, "grad_norm": 1.2466015815734863, "learning_rate": 1.5506361998789225e-05, "loss": 0.8566, "step": 232480 }, { "epoch": 1.4853123442750724, "grad_norm": 0.8446059823036194, "learning_rate": 1.5502729716555364e-05, "loss": 0.603, "step": 232490 }, { "epoch": 1.4853762314248113, "grad_norm": 0.5884280800819397, "learning_rate": 1.549909778173887e-05, "loss": 0.8151, "step": 232500 }, { "epoch": 1.4854401185745498, "grad_norm": 3.8547708988189697, "learning_rate": 1.5495466194376295e-05, "loss": 0.7316, "step": 232510 }, { "epoch": 1.4855040057242885, "grad_norm": 2.5176239013671875, "learning_rate": 1.5491834954504246e-05, "loss": 0.8722, "step": 232520 }, { "epoch": 1.4855678928740272, "grad_norm": 0.652862012386322, "learning_rate": 1.5488204062159255e-05, "loss": 0.7634, "step": 232530 }, { "epoch": 1.485631780023766, "grad_norm": 0.9530776143074036, "learning_rate": 1.548457351737792e-05, "loss": 0.8714, "step": 232540 }, { "epoch": 1.4856956671735047, "grad_norm": 0.7320083379745483, "learning_rate": 1.5480943320196778e-05, "loss": 0.705, "step": 232550 }, { "epoch": 1.4857595543232434, "grad_norm": 0.5726275444030762, "learning_rate": 1.547731347065241e-05, "loss": 0.9791, "step": 232560 }, { "epoch": 1.485823441472982, "grad_norm": 1.0993123054504395, "learning_rate": 1.547368396878135e-05, "loss": 0.8455, "step": 232570 }, { "epoch": 1.4858873286227208, "grad_norm": 1.0467476844787598, "learning_rate": 1.5470054814620178e-05, "loss": 0.9816, "step": 232580 }, { "epoch": 1.4859512157724595, "grad_norm": 1.7535980939865112, "learning_rate": 1.546642600820541e-05, "loss": 0.7644, "step": 232590 }, { "epoch": 1.4860151029221982, "grad_norm": 0.85832279920578, "learning_rate": 1.5462797549573627e-05, "loss": 0.7564, "step": 232600 }, { "epoch": 1.486078990071937, "grad_norm": 0.8821678161621094, "learning_rate": 1.545916943876133e-05, "loss": 0.7779, "step": 232610 }, { "epoch": 1.4861428772216756, "grad_norm": 0.9968520998954773, "learning_rate": 1.5455541675805095e-05, "loss": 0.7113, "step": 232620 }, { "epoch": 1.4862067643714143, "grad_norm": 0.8889404535293579, "learning_rate": 1.545191426074143e-05, "loss": 0.8483, "step": 232630 }, { "epoch": 1.486270651521153, "grad_norm": 0.8896504044532776, "learning_rate": 1.5448287193606887e-05, "loss": 0.7159, "step": 232640 }, { "epoch": 1.4863345386708917, "grad_norm": 0.8445311784744263, "learning_rate": 1.5444660474437972e-05, "loss": 0.9071, "step": 232650 }, { "epoch": 1.4863984258206304, "grad_norm": 1.0877915620803833, "learning_rate": 1.544103410327122e-05, "loss": 0.9653, "step": 232660 }, { "epoch": 1.4864623129703691, "grad_norm": 0.7088839411735535, "learning_rate": 1.543740808014316e-05, "loss": 0.6883, "step": 232670 }, { "epoch": 1.4865262001201078, "grad_norm": 0.8006494641304016, "learning_rate": 1.5433782405090297e-05, "loss": 0.6404, "step": 232680 }, { "epoch": 1.4865900872698465, "grad_norm": 0.6382250189781189, "learning_rate": 1.543015707814916e-05, "loss": 0.9332, "step": 232690 }, { "epoch": 1.4866539744195852, "grad_norm": 5.511012554168701, "learning_rate": 1.542653209935625e-05, "loss": 0.9581, "step": 232700 }, { "epoch": 1.486717861569324, "grad_norm": 1.100386381149292, "learning_rate": 1.542290746874807e-05, "loss": 0.9676, "step": 232710 }, { "epoch": 1.4867817487190627, "grad_norm": 1.28281569480896, "learning_rate": 1.541928318636111e-05, "loss": 0.925, "step": 232720 }, { "epoch": 1.4868456358688014, "grad_norm": 0.6798058152198792, "learning_rate": 1.54156592522319e-05, "loss": 0.7567, "step": 232730 }, { "epoch": 1.48690952301854, "grad_norm": 0.7467005252838135, "learning_rate": 1.5412035666396906e-05, "loss": 0.7877, "step": 232740 }, { "epoch": 1.4869734101682788, "grad_norm": 1.0129773616790771, "learning_rate": 1.5408412428892655e-05, "loss": 0.9662, "step": 232750 }, { "epoch": 1.4870372973180175, "grad_norm": 0.9531970024108887, "learning_rate": 1.5404789539755593e-05, "loss": 1.0394, "step": 232760 }, { "epoch": 1.4871011844677562, "grad_norm": 1.0765858888626099, "learning_rate": 1.5401166999022254e-05, "loss": 0.8266, "step": 232770 }, { "epoch": 1.487165071617495, "grad_norm": 1.456829309463501, "learning_rate": 1.5397544806729076e-05, "loss": 0.9925, "step": 232780 }, { "epoch": 1.4872289587672336, "grad_norm": 1.1520856618881226, "learning_rate": 1.5393922962912555e-05, "loss": 0.8577, "step": 232790 }, { "epoch": 1.4872928459169723, "grad_norm": 0.9173868894577026, "learning_rate": 1.5390301467609187e-05, "loss": 0.7145, "step": 232800 }, { "epoch": 1.487356733066711, "grad_norm": 1.2395943403244019, "learning_rate": 1.5386680320855408e-05, "loss": 0.7307, "step": 232810 }, { "epoch": 1.4874206202164497, "grad_norm": 1.308870792388916, "learning_rate": 1.538305952268772e-05, "loss": 0.8319, "step": 232820 }, { "epoch": 1.4874845073661884, "grad_norm": 0.7490695118904114, "learning_rate": 1.5379439073142553e-05, "loss": 0.8522, "step": 232830 }, { "epoch": 1.4875483945159271, "grad_norm": 0.7452239990234375, "learning_rate": 1.53758189722564e-05, "loss": 0.8708, "step": 232840 }, { "epoch": 1.4876122816656658, "grad_norm": 0.8576825261116028, "learning_rate": 1.537219922006569e-05, "loss": 0.7481, "step": 232850 }, { "epoch": 1.4876761688154045, "grad_norm": 1.7907367944717407, "learning_rate": 1.536857981660691e-05, "loss": 0.9311, "step": 232860 }, { "epoch": 1.4877400559651432, "grad_norm": 0.8764051795005798, "learning_rate": 1.536496076191647e-05, "loss": 1.0075, "step": 232870 }, { "epoch": 1.487803943114882, "grad_norm": 1.157345175743103, "learning_rate": 1.536134205603086e-05, "loss": 0.8867, "step": 232880 }, { "epoch": 1.4878678302646207, "grad_norm": 0.9898677468299866, "learning_rate": 1.5357723698986482e-05, "loss": 1.0325, "step": 232890 }, { "epoch": 1.4879317174143591, "grad_norm": 1.3587260246276855, "learning_rate": 1.5354105690819814e-05, "loss": 0.9747, "step": 232900 }, { "epoch": 1.487995604564098, "grad_norm": 0.8809471726417542, "learning_rate": 1.5350488031567263e-05, "loss": 0.8676, "step": 232910 }, { "epoch": 1.4880594917138366, "grad_norm": 1.4196127653121948, "learning_rate": 1.5346870721265283e-05, "loss": 0.9157, "step": 232920 }, { "epoch": 1.4881233788635755, "grad_norm": 0.9943186640739441, "learning_rate": 1.5343253759950284e-05, "loss": 0.7639, "step": 232930 }, { "epoch": 1.488187266013314, "grad_norm": 0.735995888710022, "learning_rate": 1.533963714765871e-05, "loss": 0.7108, "step": 232940 }, { "epoch": 1.488251153163053, "grad_norm": 1.09149169921875, "learning_rate": 1.5336020884426967e-05, "loss": 1.0359, "step": 232950 }, { "epoch": 1.4883150403127914, "grad_norm": 0.6204273700714111, "learning_rate": 1.533240497029149e-05, "loss": 0.8711, "step": 232960 }, { "epoch": 1.4883789274625303, "grad_norm": 1.1760767698287964, "learning_rate": 1.5328789405288678e-05, "loss": 0.6275, "step": 232970 }, { "epoch": 1.4884428146122688, "grad_norm": 1.1136373281478882, "learning_rate": 1.532517418945495e-05, "loss": 1.0665, "step": 232980 }, { "epoch": 1.4885067017620077, "grad_norm": 0.851905882358551, "learning_rate": 1.5321559322826733e-05, "loss": 0.8263, "step": 232990 }, { "epoch": 1.4885705889117462, "grad_norm": 0.5786584615707397, "learning_rate": 1.53179448054404e-05, "loss": 1.2527, "step": 233000 }, { "epoch": 1.488634476061485, "grad_norm": 1.1659579277038574, "learning_rate": 1.5314330637332376e-05, "loss": 0.7822, "step": 233010 }, { "epoch": 1.4886983632112236, "grad_norm": 1.3709019422531128, "learning_rate": 1.531071681853904e-05, "loss": 0.9937, "step": 233020 }, { "epoch": 1.4887622503609623, "grad_norm": 1.0024594068527222, "learning_rate": 1.530710334909681e-05, "loss": 1.1988, "step": 233030 }, { "epoch": 1.488826137510701, "grad_norm": 0.6908731460571289, "learning_rate": 1.5303490229042045e-05, "loss": 0.8364, "step": 233040 }, { "epoch": 1.4888900246604397, "grad_norm": 0.9514659643173218, "learning_rate": 1.5299877458411168e-05, "loss": 0.7613, "step": 233050 }, { "epoch": 1.4889539118101784, "grad_norm": 1.1845754384994507, "learning_rate": 1.529626503724053e-05, "loss": 0.7988, "step": 233060 }, { "epoch": 1.4890177989599171, "grad_norm": 1.0755010843276978, "learning_rate": 1.529265296556654e-05, "loss": 0.9096, "step": 233070 }, { "epoch": 1.4890816861096559, "grad_norm": 0.8984807729721069, "learning_rate": 1.5289041243425544e-05, "loss": 0.8871, "step": 233080 }, { "epoch": 1.4891455732593946, "grad_norm": 0.7457914352416992, "learning_rate": 1.528542987085395e-05, "loss": 0.8331, "step": 233090 }, { "epoch": 1.4892094604091333, "grad_norm": 1.1087687015533447, "learning_rate": 1.5281818847888097e-05, "loss": 1.04, "step": 233100 }, { "epoch": 1.489273347558872, "grad_norm": 1.566397786140442, "learning_rate": 1.5278208174564374e-05, "loss": 0.8974, "step": 233110 }, { "epoch": 1.4893372347086107, "grad_norm": 1.3691843748092651, "learning_rate": 1.527459785091912e-05, "loss": 0.7804, "step": 233120 }, { "epoch": 1.4894011218583494, "grad_norm": 0.7337422370910645, "learning_rate": 1.5270987876988723e-05, "loss": 0.9106, "step": 233130 }, { "epoch": 1.489465009008088, "grad_norm": 0.871205747127533, "learning_rate": 1.5267378252809504e-05, "loss": 0.9425, "step": 233140 }, { "epoch": 1.4895288961578268, "grad_norm": 1.1365987062454224, "learning_rate": 1.5263768978417858e-05, "loss": 0.8911, "step": 233150 }, { "epoch": 1.4895927833075655, "grad_norm": 0.6923983097076416, "learning_rate": 1.5260160053850086e-05, "loss": 0.7918, "step": 233160 }, { "epoch": 1.4896566704573042, "grad_norm": 0.7337884902954102, "learning_rate": 1.5256551479142572e-05, "loss": 1.2812, "step": 233170 }, { "epoch": 1.489720557607043, "grad_norm": 1.0790367126464844, "learning_rate": 1.5252943254331648e-05, "loss": 0.9928, "step": 233180 }, { "epoch": 1.4897844447567816, "grad_norm": 1.5988062620162964, "learning_rate": 1.5249335379453627e-05, "loss": 0.795, "step": 233190 }, { "epoch": 1.4898483319065203, "grad_norm": 1.0071760416030884, "learning_rate": 1.5245727854544879e-05, "loss": 1.0201, "step": 233200 }, { "epoch": 1.489912219056259, "grad_norm": 1.4104787111282349, "learning_rate": 1.5242120679641697e-05, "loss": 0.7409, "step": 233210 }, { "epoch": 1.4899761062059977, "grad_norm": 0.8305241465568542, "learning_rate": 1.5238513854780451e-05, "loss": 1.3258, "step": 233220 }, { "epoch": 1.4900399933557364, "grad_norm": 0.5620672106742859, "learning_rate": 1.5234907379997426e-05, "loss": 0.8582, "step": 233230 }, { "epoch": 1.4901038805054752, "grad_norm": 0.7753502130508423, "learning_rate": 1.5231301255328978e-05, "loss": 0.6606, "step": 233240 }, { "epoch": 1.4901677676552139, "grad_norm": 0.7875840067863464, "learning_rate": 1.5227695480811388e-05, "loss": 0.7296, "step": 233250 }, { "epoch": 1.4902316548049526, "grad_norm": 1.440796136856079, "learning_rate": 1.5224090056481e-05, "loss": 0.7251, "step": 233260 }, { "epoch": 1.4902955419546913, "grad_norm": 0.8261032104492188, "learning_rate": 1.52204849823741e-05, "loss": 0.771, "step": 233270 }, { "epoch": 1.49035942910443, "grad_norm": 0.8172363638877869, "learning_rate": 1.5216880258527017e-05, "loss": 0.9547, "step": 233280 }, { "epoch": 1.4904233162541687, "grad_norm": 0.6734633445739746, "learning_rate": 1.5213275884976024e-05, "loss": 0.8702, "step": 233290 }, { "epoch": 1.4904872034039074, "grad_norm": 1.0001932382583618, "learning_rate": 1.5209671861757441e-05, "loss": 0.815, "step": 233300 }, { "epoch": 1.490551090553646, "grad_norm": 1.8237123489379883, "learning_rate": 1.5206068188907574e-05, "loss": 0.8879, "step": 233310 }, { "epoch": 1.4906149777033848, "grad_norm": 0.9669250845909119, "learning_rate": 1.5202464866462691e-05, "loss": 0.8936, "step": 233320 }, { "epoch": 1.4906788648531235, "grad_norm": 1.2896833419799805, "learning_rate": 1.5198861894459099e-05, "loss": 0.843, "step": 233330 }, { "epoch": 1.4907427520028622, "grad_norm": 0.9315965175628662, "learning_rate": 1.519525927293306e-05, "loss": 0.8093, "step": 233340 }, { "epoch": 1.490806639152601, "grad_norm": 2.279897928237915, "learning_rate": 1.5191657001920889e-05, "loss": 1.1767, "step": 233350 }, { "epoch": 1.4908705263023396, "grad_norm": 1.2854963541030884, "learning_rate": 1.518805508145883e-05, "loss": 0.711, "step": 233360 }, { "epoch": 1.4909344134520781, "grad_norm": 0.6539024710655212, "learning_rate": 1.518445351158319e-05, "loss": 0.7295, "step": 233370 }, { "epoch": 1.490998300601817, "grad_norm": 2.2135396003723145, "learning_rate": 1.5180852292330206e-05, "loss": 0.9475, "step": 233380 }, { "epoch": 1.4910621877515555, "grad_norm": 0.8237118721008301, "learning_rate": 1.5177251423736178e-05, "loss": 0.7877, "step": 233390 }, { "epoch": 1.4911260749012945, "grad_norm": 1.0154304504394531, "learning_rate": 1.5173650905837339e-05, "loss": 0.8719, "step": 233400 }, { "epoch": 1.491189962051033, "grad_norm": 1.093376874923706, "learning_rate": 1.5170050738669978e-05, "loss": 0.7655, "step": 233410 }, { "epoch": 1.4912538492007719, "grad_norm": 1.0671299695968628, "learning_rate": 1.516645092227032e-05, "loss": 0.7285, "step": 233420 }, { "epoch": 1.4913177363505103, "grad_norm": 1.1775280237197876, "learning_rate": 1.5162851456674659e-05, "loss": 0.9614, "step": 233430 }, { "epoch": 1.4913816235002493, "grad_norm": 1.378699779510498, "learning_rate": 1.5159252341919206e-05, "loss": 0.9049, "step": 233440 }, { "epoch": 1.4914455106499878, "grad_norm": 1.5878591537475586, "learning_rate": 1.515565357804023e-05, "loss": 0.7096, "step": 233450 }, { "epoch": 1.4915093977997267, "grad_norm": 0.9215556383132935, "learning_rate": 1.515241499057849e-05, "loss": 1.1814, "step": 233460 }, { "epoch": 1.4915732849494652, "grad_norm": 0.6913306713104248, "learning_rate": 1.5148816893464646e-05, "loss": 0.7497, "step": 233470 }, { "epoch": 1.491637172099204, "grad_norm": 1.190250039100647, "learning_rate": 1.514521914733238e-05, "loss": 0.7633, "step": 233480 }, { "epoch": 1.4917010592489426, "grad_norm": 0.6808974146842957, "learning_rate": 1.5141621752217893e-05, "loss": 0.8693, "step": 233490 }, { "epoch": 1.4917649463986813, "grad_norm": 1.5304328203201294, "learning_rate": 1.513802470815745e-05, "loss": 0.7401, "step": 233500 }, { "epoch": 1.49182883354842, "grad_norm": 1.0490107536315918, "learning_rate": 1.5134428015187242e-05, "loss": 0.9076, "step": 233510 }, { "epoch": 1.4918927206981587, "grad_norm": 0.7744476199150085, "learning_rate": 1.5130831673343526e-05, "loss": 0.7956, "step": 233520 }, { "epoch": 1.4919566078478974, "grad_norm": 1.0777429342269897, "learning_rate": 1.5127235682662477e-05, "loss": 0.757, "step": 233530 }, { "epoch": 1.4920204949976361, "grad_norm": 0.9591853618621826, "learning_rate": 1.5123640043180359e-05, "loss": 0.8078, "step": 233540 }, { "epoch": 1.4920843821473748, "grad_norm": 1.0310931205749512, "learning_rate": 1.5120044754933338e-05, "loss": 1.0422, "step": 233550 }, { "epoch": 1.4921482692971135, "grad_norm": 1.1196818351745605, "learning_rate": 1.5116449817957656e-05, "loss": 0.7267, "step": 233560 }, { "epoch": 1.4922121564468522, "grad_norm": 1.0456836223602295, "learning_rate": 1.5112855232289491e-05, "loss": 0.7895, "step": 233570 }, { "epoch": 1.492276043596591, "grad_norm": 1.1032707691192627, "learning_rate": 1.5109260997965069e-05, "loss": 0.8093, "step": 233580 }, { "epoch": 1.4923399307463296, "grad_norm": 1.1063132286071777, "learning_rate": 1.510566711502056e-05, "loss": 0.7527, "step": 233590 }, { "epoch": 1.4924038178960684, "grad_norm": 1.1770296096801758, "learning_rate": 1.5102073583492183e-05, "loss": 0.7738, "step": 233600 }, { "epoch": 1.492467705045807, "grad_norm": 0.682502031326294, "learning_rate": 1.5098480403416104e-05, "loss": 1.0723, "step": 233610 }, { "epoch": 1.4925315921955458, "grad_norm": 1.0063148736953735, "learning_rate": 1.5094887574828536e-05, "loss": 0.8741, "step": 233620 }, { "epoch": 1.4925954793452845, "grad_norm": 1.2028182744979858, "learning_rate": 1.5091295097765629e-05, "loss": 0.9136, "step": 233630 }, { "epoch": 1.4926593664950232, "grad_norm": 0.7433778643608093, "learning_rate": 1.5087702972263584e-05, "loss": 1.0283, "step": 233640 }, { "epoch": 1.4927232536447619, "grad_norm": 1.0931978225708008, "learning_rate": 1.5084111198358586e-05, "loss": 0.8848, "step": 233650 }, { "epoch": 1.4927871407945006, "grad_norm": 0.8081746697425842, "learning_rate": 1.5080519776086782e-05, "loss": 0.888, "step": 233660 }, { "epoch": 1.4928510279442393, "grad_norm": 0.9746555685997009, "learning_rate": 1.5076928705484366e-05, "loss": 0.9802, "step": 233670 }, { "epoch": 1.492914915093978, "grad_norm": 0.5713024139404297, "learning_rate": 1.5073337986587476e-05, "loss": 0.831, "step": 233680 }, { "epoch": 1.4929788022437167, "grad_norm": 1.1723284721374512, "learning_rate": 1.5069747619432307e-05, "loss": 0.8193, "step": 233690 }, { "epoch": 1.4930426893934554, "grad_norm": 0.9624750018119812, "learning_rate": 1.506615760405498e-05, "loss": 0.9194, "step": 233700 }, { "epoch": 1.4931065765431941, "grad_norm": 0.8232372403144836, "learning_rate": 1.5062567940491685e-05, "loss": 0.9009, "step": 233710 }, { "epoch": 1.4931704636929328, "grad_norm": 1.230167269706726, "learning_rate": 1.5058978628778541e-05, "loss": 0.9485, "step": 233720 }, { "epoch": 1.4932343508426715, "grad_norm": 1.1337039470672607, "learning_rate": 1.5055389668951725e-05, "loss": 0.7884, "step": 233730 }, { "epoch": 1.4932982379924102, "grad_norm": 0.8492588996887207, "learning_rate": 1.5051801061047355e-05, "loss": 0.7516, "step": 233740 }, { "epoch": 1.493362125142149, "grad_norm": 0.8926807641983032, "learning_rate": 1.5048212805101591e-05, "loss": 1.144, "step": 233750 }, { "epoch": 1.4934260122918877, "grad_norm": 0.6456555724143982, "learning_rate": 1.504462490115055e-05, "loss": 0.8384, "step": 233760 }, { "epoch": 1.4934898994416264, "grad_norm": 1.1378792524337769, "learning_rate": 1.5041037349230392e-05, "loss": 0.7143, "step": 233770 }, { "epoch": 1.493553786591365, "grad_norm": 0.6030178070068359, "learning_rate": 1.5037450149377214e-05, "loss": 0.7133, "step": 233780 }, { "epoch": 1.4936176737411038, "grad_norm": 0.9475358128547668, "learning_rate": 1.5033863301627183e-05, "loss": 0.8603, "step": 233790 }, { "epoch": 1.4936815608908425, "grad_norm": 0.6618078351020813, "learning_rate": 1.5030276806016375e-05, "loss": 0.8643, "step": 233800 }, { "epoch": 1.4937454480405812, "grad_norm": 0.6175677180290222, "learning_rate": 1.502669066258095e-05, "loss": 0.7618, "step": 233810 }, { "epoch": 1.4938093351903199, "grad_norm": 0.7343020439147949, "learning_rate": 1.5023104871357007e-05, "loss": 0.7322, "step": 233820 }, { "epoch": 1.4938732223400586, "grad_norm": 1.1418992280960083, "learning_rate": 1.501951943238064e-05, "loss": 0.8652, "step": 233830 }, { "epoch": 1.4939371094897973, "grad_norm": 0.8838281035423279, "learning_rate": 1.5015934345687992e-05, "loss": 0.9093, "step": 233840 }, { "epoch": 1.494000996639536, "grad_norm": 1.2746473550796509, "learning_rate": 1.5012349611315136e-05, "loss": 0.7186, "step": 233850 }, { "epoch": 1.4940648837892745, "grad_norm": 0.8988287448883057, "learning_rate": 1.5008765229298206e-05, "loss": 0.6573, "step": 233860 }, { "epoch": 1.4941287709390134, "grad_norm": 0.8221251964569092, "learning_rate": 1.5005181199673263e-05, "loss": 1.0266, "step": 233870 }, { "epoch": 1.494192658088752, "grad_norm": 0.8387921452522278, "learning_rate": 1.5001597522476435e-05, "loss": 0.8873, "step": 233880 }, { "epoch": 1.4942565452384908, "grad_norm": 1.367504358291626, "learning_rate": 1.4998014197743781e-05, "loss": 0.9954, "step": 233890 }, { "epoch": 1.4943204323882293, "grad_norm": 1.0656965970993042, "learning_rate": 1.4994431225511424e-05, "loss": 1.0383, "step": 233900 }, { "epoch": 1.4943843195379682, "grad_norm": 1.9154045581817627, "learning_rate": 1.4990848605815411e-05, "loss": 1.1103, "step": 233910 }, { "epoch": 1.4944482066877067, "grad_norm": 2.2200756072998047, "learning_rate": 1.4987266338691858e-05, "loss": 0.6776, "step": 233920 }, { "epoch": 1.4945120938374457, "grad_norm": 1.3810445070266724, "learning_rate": 1.4983684424176803e-05, "loss": 0.96, "step": 233930 }, { "epoch": 1.4945759809871841, "grad_norm": 0.8132758736610413, "learning_rate": 1.498010286230636e-05, "loss": 0.7585, "step": 233940 }, { "epoch": 1.494639868136923, "grad_norm": 0.9625273942947388, "learning_rate": 1.497652165311656e-05, "loss": 0.9365, "step": 233950 }, { "epoch": 1.4947037552866616, "grad_norm": 1.0185012817382812, "learning_rate": 1.4972940796643487e-05, "loss": 0.7643, "step": 233960 }, { "epoch": 1.4947676424364005, "grad_norm": 0.7479864954948425, "learning_rate": 1.4969360292923217e-05, "loss": 0.6614, "step": 233970 }, { "epoch": 1.494831529586139, "grad_norm": 2.9963157176971436, "learning_rate": 1.496578014199178e-05, "loss": 0.8242, "step": 233980 }, { "epoch": 1.4948954167358777, "grad_norm": 1.1559045314788818, "learning_rate": 1.4962200343885264e-05, "loss": 0.8894, "step": 233990 }, { "epoch": 1.4949593038856164, "grad_norm": 1.065392255783081, "learning_rate": 1.495862089863968e-05, "loss": 0.9941, "step": 234000 }, { "epoch": 1.495023191035355, "grad_norm": 0.8827859163284302, "learning_rate": 1.495504180629112e-05, "loss": 1.0541, "step": 234010 }, { "epoch": 1.4950870781850938, "grad_norm": 0.9255498647689819, "learning_rate": 1.4951463066875594e-05, "loss": 0.8875, "step": 234020 }, { "epoch": 1.4951509653348325, "grad_norm": 0.7709739804267883, "learning_rate": 1.4947884680429164e-05, "loss": 0.8926, "step": 234030 }, { "epoch": 1.4952148524845712, "grad_norm": 0.9358537197113037, "learning_rate": 1.494430664698785e-05, "loss": 0.8256, "step": 234040 }, { "epoch": 1.49527873963431, "grad_norm": 0.5185632109642029, "learning_rate": 1.4940728966587708e-05, "loss": 0.7478, "step": 234050 }, { "epoch": 1.4953426267840486, "grad_norm": 0.9753923416137695, "learning_rate": 1.493715163926474e-05, "loss": 0.8866, "step": 234060 }, { "epoch": 1.4954065139337873, "grad_norm": 1.034305453300476, "learning_rate": 1.4933574665055006e-05, "loss": 1.0687, "step": 234070 }, { "epoch": 1.495470401083526, "grad_norm": 0.6250910758972168, "learning_rate": 1.4929998043994497e-05, "loss": 0.9843, "step": 234080 }, { "epoch": 1.4955342882332647, "grad_norm": 0.7841780781745911, "learning_rate": 1.4926421776119265e-05, "loss": 0.9004, "step": 234090 }, { "epoch": 1.4955981753830034, "grad_norm": 1.2987459897994995, "learning_rate": 1.492284586146529e-05, "loss": 0.9964, "step": 234100 }, { "epoch": 1.4956620625327421, "grad_norm": 0.9914219379425049, "learning_rate": 1.4919270300068615e-05, "loss": 0.8802, "step": 234110 }, { "epoch": 1.4957259496824808, "grad_norm": 0.6687591075897217, "learning_rate": 1.4915695091965232e-05, "loss": 0.9669, "step": 234120 }, { "epoch": 1.4957898368322196, "grad_norm": 0.5962859392166138, "learning_rate": 1.4912120237191157e-05, "loss": 0.8703, "step": 234130 }, { "epoch": 1.4958537239819583, "grad_norm": 1.0896154642105103, "learning_rate": 1.4908545735782376e-05, "loss": 1.0735, "step": 234140 }, { "epoch": 1.495917611131697, "grad_norm": 0.8879744410514832, "learning_rate": 1.4904971587774896e-05, "loss": 1.0103, "step": 234150 }, { "epoch": 1.4959814982814357, "grad_norm": 0.7168521285057068, "learning_rate": 1.490139779320473e-05, "loss": 0.8318, "step": 234160 }, { "epoch": 1.4960453854311744, "grad_norm": 0.949070930480957, "learning_rate": 1.4897824352107836e-05, "loss": 0.7894, "step": 234170 }, { "epoch": 1.496109272580913, "grad_norm": 1.325273036956787, "learning_rate": 1.4894251264520238e-05, "loss": 0.8188, "step": 234180 }, { "epoch": 1.4961731597306518, "grad_norm": 0.9096631407737732, "learning_rate": 1.4890678530477881e-05, "loss": 0.9897, "step": 234190 }, { "epoch": 1.4962370468803905, "grad_norm": 0.8928118348121643, "learning_rate": 1.4887106150016784e-05, "loss": 0.9482, "step": 234200 }, { "epoch": 1.4963009340301292, "grad_norm": 0.6268375515937805, "learning_rate": 1.4883534123172887e-05, "loss": 0.9183, "step": 234210 }, { "epoch": 1.496364821179868, "grad_norm": 1.260741114616394, "learning_rate": 1.4879962449982199e-05, "loss": 0.995, "step": 234220 }, { "epoch": 1.4964287083296066, "grad_norm": 2.3379745483398438, "learning_rate": 1.4876391130480654e-05, "loss": 0.8229, "step": 234230 }, { "epoch": 1.4964925954793453, "grad_norm": 1.0594456195831299, "learning_rate": 1.4872820164704255e-05, "loss": 1.0574, "step": 234240 }, { "epoch": 1.496556482629084, "grad_norm": 1.1462024450302124, "learning_rate": 1.486924955268893e-05, "loss": 0.6305, "step": 234250 }, { "epoch": 1.4966203697788227, "grad_norm": 1.3557465076446533, "learning_rate": 1.4865679294470669e-05, "loss": 1.038, "step": 234260 }, { "epoch": 1.4966842569285614, "grad_norm": 0.7691382765769958, "learning_rate": 1.4862109390085399e-05, "loss": 0.7011, "step": 234270 }, { "epoch": 1.4967481440783001, "grad_norm": 0.6914074420928955, "learning_rate": 1.4858539839569096e-05, "loss": 0.9041, "step": 234280 }, { "epoch": 1.4968120312280389, "grad_norm": 1.4908119440078735, "learning_rate": 1.4854970642957688e-05, "loss": 0.6648, "step": 234290 }, { "epoch": 1.4968759183777776, "grad_norm": 0.6637123227119446, "learning_rate": 1.4851401800287146e-05, "loss": 0.8933, "step": 234300 }, { "epoch": 1.4969398055275163, "grad_norm": 1.5094033479690552, "learning_rate": 1.4847833311593395e-05, "loss": 0.738, "step": 234310 }, { "epoch": 1.497003692677255, "grad_norm": 1.5913399457931519, "learning_rate": 1.4844265176912359e-05, "loss": 0.9591, "step": 234320 }, { "epoch": 1.4970675798269937, "grad_norm": 0.9007583260536194, "learning_rate": 1.484069739628e-05, "loss": 0.802, "step": 234330 }, { "epoch": 1.4971314669767324, "grad_norm": 1.1307207345962524, "learning_rate": 1.4837129969732222e-05, "loss": 0.8646, "step": 234340 }, { "epoch": 1.4971953541264709, "grad_norm": 1.7793842554092407, "learning_rate": 1.4833562897304975e-05, "loss": 1.0184, "step": 234350 }, { "epoch": 1.4972592412762098, "grad_norm": 0.8876829743385315, "learning_rate": 1.4829996179034167e-05, "loss": 0.812, "step": 234360 }, { "epoch": 1.4973231284259483, "grad_norm": 0.8620350956916809, "learning_rate": 1.4826429814955734e-05, "loss": 0.8405, "step": 234370 }, { "epoch": 1.4973870155756872, "grad_norm": 1.2996463775634766, "learning_rate": 1.4822863805105569e-05, "loss": 0.9884, "step": 234380 }, { "epoch": 1.4974509027254257, "grad_norm": 0.757505476474762, "learning_rate": 1.4819298149519611e-05, "loss": 1.1306, "step": 234390 }, { "epoch": 1.4975147898751646, "grad_norm": 0.8766262531280518, "learning_rate": 1.4815732848233744e-05, "loss": 1.0607, "step": 234400 }, { "epoch": 1.497578677024903, "grad_norm": 1.9996721744537354, "learning_rate": 1.4812167901283896e-05, "loss": 0.7488, "step": 234410 }, { "epoch": 1.497642564174642, "grad_norm": 1.6408275365829468, "learning_rate": 1.4808603308705949e-05, "loss": 1.0059, "step": 234420 }, { "epoch": 1.4977064513243805, "grad_norm": 1.429452896118164, "learning_rate": 1.480503907053582e-05, "loss": 0.9959, "step": 234430 }, { "epoch": 1.4977703384741194, "grad_norm": 0.7910223603248596, "learning_rate": 1.4801475186809388e-05, "loss": 0.9692, "step": 234440 }, { "epoch": 1.497834225623858, "grad_norm": 1.11632239818573, "learning_rate": 1.4797911657562562e-05, "loss": 1.0808, "step": 234450 }, { "epoch": 1.4978981127735969, "grad_norm": 0.9424408674240112, "learning_rate": 1.4794348482831206e-05, "loss": 0.8386, "step": 234460 }, { "epoch": 1.4979619999233353, "grad_norm": 0.742712676525116, "learning_rate": 1.4790785662651235e-05, "loss": 0.7571, "step": 234470 }, { "epoch": 1.498025887073074, "grad_norm": 0.9132349491119385, "learning_rate": 1.4787223197058498e-05, "loss": 0.6435, "step": 234480 }, { "epoch": 1.4980897742228128, "grad_norm": 0.6201077103614807, "learning_rate": 1.4783661086088885e-05, "loss": 0.9236, "step": 234490 }, { "epoch": 1.4981536613725515, "grad_norm": 1.0537291765213013, "learning_rate": 1.4780099329778285e-05, "loss": 1.0377, "step": 234500 }, { "epoch": 1.4982175485222902, "grad_norm": 1.24350106716156, "learning_rate": 1.4776537928162537e-05, "loss": 0.8583, "step": 234510 }, { "epoch": 1.4982814356720289, "grad_norm": 0.6610399484634399, "learning_rate": 1.4772976881277544e-05, "loss": 0.9588, "step": 234520 }, { "epoch": 1.4983453228217676, "grad_norm": 0.6641136407852173, "learning_rate": 1.4769416189159135e-05, "loss": 0.8946, "step": 234530 }, { "epoch": 1.4984092099715063, "grad_norm": 1.025057315826416, "learning_rate": 1.4765855851843202e-05, "loss": 0.8817, "step": 234540 }, { "epoch": 1.498473097121245, "grad_norm": 0.7867838144302368, "learning_rate": 1.4762295869365561e-05, "loss": 0.6613, "step": 234550 }, { "epoch": 1.4985369842709837, "grad_norm": 1.0991227626800537, "learning_rate": 1.4758736241762106e-05, "loss": 0.6598, "step": 234560 }, { "epoch": 1.4986008714207224, "grad_norm": 0.8666796088218689, "learning_rate": 1.475517696906864e-05, "loss": 0.7919, "step": 234570 }, { "epoch": 1.498664758570461, "grad_norm": 1.0286558866500854, "learning_rate": 1.475161805132106e-05, "loss": 0.6867, "step": 234580 }, { "epoch": 1.4987286457201998, "grad_norm": 1.2615046501159668, "learning_rate": 1.474805948855516e-05, "loss": 0.7035, "step": 234590 }, { "epoch": 1.4987925328699385, "grad_norm": 0.8936821222305298, "learning_rate": 1.4744501280806811e-05, "loss": 0.986, "step": 234600 }, { "epoch": 1.4988564200196772, "grad_norm": 1.254328727722168, "learning_rate": 1.4740943428111825e-05, "loss": 0.7812, "step": 234610 }, { "epoch": 1.498920307169416, "grad_norm": 0.8320007920265198, "learning_rate": 1.4737385930506053e-05, "loss": 0.9266, "step": 234620 }, { "epoch": 1.4989841943191546, "grad_norm": 1.5553789138793945, "learning_rate": 1.4733828788025294e-05, "loss": 0.7904, "step": 234630 }, { "epoch": 1.4990480814688933, "grad_norm": 1.5870753526687622, "learning_rate": 1.4730272000705408e-05, "loss": 0.7808, "step": 234640 }, { "epoch": 1.499111968618632, "grad_norm": 0.8670278191566467, "learning_rate": 1.4726715568582184e-05, "loss": 0.8087, "step": 234650 }, { "epoch": 1.4991758557683708, "grad_norm": 0.8606697916984558, "learning_rate": 1.4723159491691458e-05, "loss": 0.7755, "step": 234660 }, { "epoch": 1.4992397429181095, "grad_norm": 0.7828750014305115, "learning_rate": 1.4719603770069023e-05, "loss": 1.1492, "step": 234670 }, { "epoch": 1.4993036300678482, "grad_norm": 0.6629054546356201, "learning_rate": 1.4716048403750698e-05, "loss": 0.9579, "step": 234680 }, { "epoch": 1.4993675172175869, "grad_norm": 1.172922968864441, "learning_rate": 1.471249339277231e-05, "loss": 0.9958, "step": 234690 }, { "epoch": 1.4994314043673256, "grad_norm": 0.7931080460548401, "learning_rate": 1.470893873716962e-05, "loss": 0.6613, "step": 234700 }, { "epoch": 1.4994952915170643, "grad_norm": 0.687065064907074, "learning_rate": 1.4705384436978464e-05, "loss": 0.6888, "step": 234710 }, { "epoch": 1.499559178666803, "grad_norm": 0.707865297794342, "learning_rate": 1.4701830492234609e-05, "loss": 1.2256, "step": 234720 }, { "epoch": 1.4996230658165417, "grad_norm": 0.6391942501068115, "learning_rate": 1.4698276902973873e-05, "loss": 0.6766, "step": 234730 }, { "epoch": 1.4996869529662804, "grad_norm": 0.8546810746192932, "learning_rate": 1.4694723669232014e-05, "loss": 1.0149, "step": 234740 }, { "epoch": 1.4997508401160191, "grad_norm": 0.8669940233230591, "learning_rate": 1.4691170791044844e-05, "loss": 0.7914, "step": 234750 }, { "epoch": 1.4998147272657578, "grad_norm": 0.6280136108398438, "learning_rate": 1.4687618268448116e-05, "loss": 0.9951, "step": 234760 }, { "epoch": 1.4998786144154965, "grad_norm": 0.7606225609779358, "learning_rate": 1.468406610147764e-05, "loss": 1.1205, "step": 234770 }, { "epoch": 1.4999425015652352, "grad_norm": 0.9498753547668457, "learning_rate": 1.4680514290169157e-05, "loss": 1.101, "step": 234780 }, { "epoch": 1.500006388714974, "grad_norm": 0.9360266923904419, "learning_rate": 1.4676962834558472e-05, "loss": 0.7562, "step": 234790 }, { "epoch": 1.5000702758647124, "grad_norm": 1.285915493965149, "learning_rate": 1.4673411734681309e-05, "loss": 0.8137, "step": 234800 }, { "epoch": 1.5001341630144513, "grad_norm": 1.4864213466644287, "learning_rate": 1.4669860990573448e-05, "loss": 0.9683, "step": 234810 }, { "epoch": 1.5001980501641898, "grad_norm": 0.9704619646072388, "learning_rate": 1.4666310602270666e-05, "loss": 0.7457, "step": 234820 }, { "epoch": 1.5002619373139288, "grad_norm": 1.6511520147323608, "learning_rate": 1.4662760569808686e-05, "loss": 0.855, "step": 234830 }, { "epoch": 1.5003258244636672, "grad_norm": 1.3069212436676025, "learning_rate": 1.4659210893223301e-05, "loss": 0.7079, "step": 234840 }, { "epoch": 1.5003897116134062, "grad_norm": 2.1083788871765137, "learning_rate": 1.4655661572550217e-05, "loss": 0.933, "step": 234850 }, { "epoch": 1.5004535987631447, "grad_norm": 0.7109601497650146, "learning_rate": 1.4652112607825213e-05, "loss": 0.923, "step": 234860 }, { "epoch": 1.5005174859128836, "grad_norm": 0.9529529809951782, "learning_rate": 1.4648563999084002e-05, "loss": 0.6489, "step": 234870 }, { "epoch": 1.500581373062622, "grad_norm": 0.627356767654419, "learning_rate": 1.464501574636235e-05, "loss": 0.8803, "step": 234880 }, { "epoch": 1.500645260212361, "grad_norm": 1.132530927658081, "learning_rate": 1.4641467849695961e-05, "loss": 1.0272, "step": 234890 }, { "epoch": 1.5007091473620995, "grad_norm": 1.142256259918213, "learning_rate": 1.463792030912059e-05, "loss": 0.8087, "step": 234900 }, { "epoch": 1.5007730345118384, "grad_norm": 1.1030948162078857, "learning_rate": 1.4634373124671947e-05, "loss": 1.4046, "step": 234910 }, { "epoch": 1.500836921661577, "grad_norm": 0.9625082612037659, "learning_rate": 1.4630826296385775e-05, "loss": 0.9084, "step": 234920 }, { "epoch": 1.5009008088113158, "grad_norm": 0.7878156304359436, "learning_rate": 1.4627279824297762e-05, "loss": 0.7928, "step": 234930 }, { "epoch": 1.5009646959610543, "grad_norm": 5.58283805847168, "learning_rate": 1.462373370844366e-05, "loss": 1.0296, "step": 234940 }, { "epoch": 1.5010285831107932, "grad_norm": 0.9159521460533142, "learning_rate": 1.4620187948859149e-05, "loss": 1.1049, "step": 234950 }, { "epoch": 1.5010924702605317, "grad_norm": 0.9695411324501038, "learning_rate": 1.4616642545579967e-05, "loss": 0.7801, "step": 234960 }, { "epoch": 1.5011563574102706, "grad_norm": 1.1427416801452637, "learning_rate": 1.4613097498641792e-05, "loss": 0.9557, "step": 234970 }, { "epoch": 1.5012202445600091, "grad_norm": 0.6965705752372742, "learning_rate": 1.4609552808080357e-05, "loss": 0.6617, "step": 234980 }, { "epoch": 1.501284131709748, "grad_norm": 2.3865509033203125, "learning_rate": 1.4606008473931326e-05, "loss": 1.207, "step": 234990 }, { "epoch": 1.5013480188594865, "grad_norm": 0.8042628169059753, "learning_rate": 1.4602464496230406e-05, "loss": 1.0717, "step": 235000 }, { "epoch": 1.5014119060092253, "grad_norm": 0.7267552018165588, "learning_rate": 1.4598920875013312e-05, "loss": 0.9661, "step": 235010 }, { "epoch": 1.501475793158964, "grad_norm": 0.8529950380325317, "learning_rate": 1.4595377610315692e-05, "loss": 0.8902, "step": 235020 }, { "epoch": 1.5015396803087027, "grad_norm": 0.893867552280426, "learning_rate": 1.4591834702173262e-05, "loss": 0.7361, "step": 235030 }, { "epoch": 1.5016035674584414, "grad_norm": 1.094534158706665, "learning_rate": 1.458829215062168e-05, "loss": 1.2228, "step": 235040 }, { "epoch": 1.50166745460818, "grad_norm": 0.9267030954360962, "learning_rate": 1.4584749955696648e-05, "loss": 0.9459, "step": 235050 }, { "epoch": 1.5017313417579188, "grad_norm": 0.6565288305282593, "learning_rate": 1.4581208117433804e-05, "loss": 1.0574, "step": 235060 }, { "epoch": 1.5017952289076575, "grad_norm": 1.656847357749939, "learning_rate": 1.4577666635868848e-05, "loss": 1.1281, "step": 235070 }, { "epoch": 1.5018591160573962, "grad_norm": 1.3042356967926025, "learning_rate": 1.4574125511037423e-05, "loss": 0.8161, "step": 235080 }, { "epoch": 1.501923003207135, "grad_norm": 1.1575617790222168, "learning_rate": 1.4570584742975213e-05, "loss": 0.7767, "step": 235090 }, { "epoch": 1.5019868903568736, "grad_norm": 1.0763665437698364, "learning_rate": 1.456704433171785e-05, "loss": 1.3651, "step": 235100 }, { "epoch": 1.5020507775066123, "grad_norm": 2.460211992263794, "learning_rate": 1.456350427730102e-05, "loss": 0.7748, "step": 235110 }, { "epoch": 1.502114664656351, "grad_norm": 1.2127439975738525, "learning_rate": 1.4559964579760348e-05, "loss": 0.9433, "step": 235120 }, { "epoch": 1.5021785518060897, "grad_norm": 1.1584923267364502, "learning_rate": 1.4556425239131504e-05, "loss": 1.0207, "step": 235130 }, { "epoch": 1.5022424389558284, "grad_norm": 0.915944516658783, "learning_rate": 1.455288625545011e-05, "loss": 0.7177, "step": 235140 }, { "epoch": 1.5023063261055671, "grad_norm": 0.9569301009178162, "learning_rate": 1.454934762875183e-05, "loss": 0.699, "step": 235150 }, { "epoch": 1.5023702132553058, "grad_norm": 0.8162155151367188, "learning_rate": 1.4545809359072271e-05, "loss": 1.0925, "step": 235160 }, { "epoch": 1.5024341004050445, "grad_norm": 0.8980828523635864, "learning_rate": 1.4542271446447103e-05, "loss": 0.7741, "step": 235170 }, { "epoch": 1.5024979875547833, "grad_norm": 0.6045962572097778, "learning_rate": 1.4538733890911916e-05, "loss": 0.8077, "step": 235180 }, { "epoch": 1.502561874704522, "grad_norm": 0.6471248865127563, "learning_rate": 1.4535196692502379e-05, "loss": 0.8041, "step": 235190 }, { "epoch": 1.5026257618542607, "grad_norm": 0.9747743606567383, "learning_rate": 1.4531659851254076e-05, "loss": 0.8377, "step": 235200 }, { "epoch": 1.5026896490039994, "grad_norm": 0.8339213132858276, "learning_rate": 1.452812336720264e-05, "loss": 0.9018, "step": 235210 }, { "epoch": 1.502753536153738, "grad_norm": 1.8665075302124023, "learning_rate": 1.4524587240383703e-05, "loss": 0.9894, "step": 235220 }, { "epoch": 1.5028174233034768, "grad_norm": 1.8768938779830933, "learning_rate": 1.4521051470832852e-05, "loss": 0.9992, "step": 235230 }, { "epoch": 1.5028813104532155, "grad_norm": 2.487178087234497, "learning_rate": 1.4517516058585723e-05, "loss": 0.8355, "step": 235240 }, { "epoch": 1.5029451976029542, "grad_norm": 1.0076284408569336, "learning_rate": 1.4513981003677885e-05, "loss": 0.9543, "step": 235250 }, { "epoch": 1.503009084752693, "grad_norm": 0.7256926894187927, "learning_rate": 1.4510446306144977e-05, "loss": 0.8898, "step": 235260 }, { "epoch": 1.5030729719024314, "grad_norm": 1.6876107454299927, "learning_rate": 1.4506911966022574e-05, "loss": 0.8502, "step": 235270 }, { "epoch": 1.5031368590521703, "grad_norm": 0.8799774646759033, "learning_rate": 1.4503377983346272e-05, "loss": 0.7909, "step": 235280 }, { "epoch": 1.5032007462019088, "grad_norm": 0.8718019127845764, "learning_rate": 1.449984435815165e-05, "loss": 0.957, "step": 235290 }, { "epoch": 1.5032646333516477, "grad_norm": 0.7004748582839966, "learning_rate": 1.4496311090474324e-05, "loss": 0.8612, "step": 235300 }, { "epoch": 1.5033285205013862, "grad_norm": 1.0339982509613037, "learning_rate": 1.4492778180349841e-05, "loss": 0.7531, "step": 235310 }, { "epoch": 1.5033924076511251, "grad_norm": 1.2925528287887573, "learning_rate": 1.4489245627813819e-05, "loss": 0.7904, "step": 235320 }, { "epoch": 1.5034562948008636, "grad_norm": 1.0401469469070435, "learning_rate": 1.4485713432901798e-05, "loss": 0.903, "step": 235330 }, { "epoch": 1.5035201819506026, "grad_norm": 0.8769915103912354, "learning_rate": 1.4482181595649369e-05, "loss": 1.0074, "step": 235340 }, { "epoch": 1.503584069100341, "grad_norm": 1.0945196151733398, "learning_rate": 1.4478650116092107e-05, "loss": 0.7037, "step": 235350 }, { "epoch": 1.50364795625008, "grad_norm": 1.3890713453292847, "learning_rate": 1.4475118994265563e-05, "loss": 0.7796, "step": 235360 }, { "epoch": 1.5037118433998184, "grad_norm": 1.254223346710205, "learning_rate": 1.4471588230205313e-05, "loss": 0.9753, "step": 235370 }, { "epoch": 1.5037757305495574, "grad_norm": 2.11234974861145, "learning_rate": 1.446805782394689e-05, "loss": 0.7127, "step": 235380 }, { "epoch": 1.5038396176992959, "grad_norm": 0.6228627562522888, "learning_rate": 1.4464527775525883e-05, "loss": 1.0625, "step": 235390 }, { "epoch": 1.5039035048490348, "grad_norm": 0.8461014032363892, "learning_rate": 1.4460998084977812e-05, "loss": 1.0634, "step": 235400 }, { "epoch": 1.5039673919987733, "grad_norm": 0.9258694648742676, "learning_rate": 1.4457468752338244e-05, "loss": 0.8858, "step": 235410 }, { "epoch": 1.5040312791485122, "grad_norm": 1.5003474950790405, "learning_rate": 1.44539397776427e-05, "loss": 0.9238, "step": 235420 }, { "epoch": 1.5040951662982507, "grad_norm": 0.9997551441192627, "learning_rate": 1.4450411160926753e-05, "loss": 0.9678, "step": 235430 }, { "epoch": 1.5041590534479896, "grad_norm": 1.9879392385482788, "learning_rate": 1.44468829022259e-05, "loss": 1.0032, "step": 235440 }, { "epoch": 1.504222940597728, "grad_norm": 1.0907074213027954, "learning_rate": 1.4443355001575715e-05, "loss": 0.8554, "step": 235450 }, { "epoch": 1.504286827747467, "grad_norm": 1.0263760089874268, "learning_rate": 1.4439827459011685e-05, "loss": 0.7579, "step": 235460 }, { "epoch": 1.5043507148972055, "grad_norm": 1.8271600008010864, "learning_rate": 1.4436300274569375e-05, "loss": 1.0961, "step": 235470 }, { "epoch": 1.5044146020469444, "grad_norm": 0.9254994988441467, "learning_rate": 1.4432773448284276e-05, "loss": 0.6641, "step": 235480 }, { "epoch": 1.504478489196683, "grad_norm": 1.155799388885498, "learning_rate": 1.4429246980191929e-05, "loss": 0.9395, "step": 235490 }, { "epoch": 1.5045423763464216, "grad_norm": 1.6232599020004272, "learning_rate": 1.4425720870327825e-05, "loss": 0.7697, "step": 235500 }, { "epoch": 1.5046062634961603, "grad_norm": 1.015397548675537, "learning_rate": 1.4422195118727506e-05, "loss": 0.7536, "step": 235510 }, { "epoch": 1.504670150645899, "grad_norm": 0.7980430722236633, "learning_rate": 1.4418669725426436e-05, "loss": 1.2109, "step": 235520 }, { "epoch": 1.5047340377956377, "grad_norm": 0.7753633856773376, "learning_rate": 1.4415144690460153e-05, "loss": 0.8404, "step": 235530 }, { "epoch": 1.5047979249453765, "grad_norm": 0.9534075856208801, "learning_rate": 1.4411620013864163e-05, "loss": 0.6998, "step": 235540 }, { "epoch": 1.5048618120951152, "grad_norm": 0.9780369400978088, "learning_rate": 1.440809569567393e-05, "loss": 0.6091, "step": 235550 }, { "epoch": 1.5049256992448539, "grad_norm": 0.80521559715271, "learning_rate": 1.4404571735924983e-05, "loss": 0.87, "step": 235560 }, { "epoch": 1.5049895863945926, "grad_norm": 1.0219656229019165, "learning_rate": 1.4401048134652773e-05, "loss": 1.181, "step": 235570 }, { "epoch": 1.5050534735443313, "grad_norm": 1.8196736574172974, "learning_rate": 1.4397524891892821e-05, "loss": 0.9589, "step": 235580 }, { "epoch": 1.50511736069407, "grad_norm": 0.778160810470581, "learning_rate": 1.4394002007680585e-05, "loss": 0.6646, "step": 235590 }, { "epoch": 1.5051812478438087, "grad_norm": 0.9555881023406982, "learning_rate": 1.4390479482051561e-05, "loss": 0.8511, "step": 235600 }, { "epoch": 1.5052451349935474, "grad_norm": 1.0950413942337036, "learning_rate": 1.4386957315041205e-05, "loss": 1.0269, "step": 235610 }, { "epoch": 1.505309022143286, "grad_norm": 1.0835603475570679, "learning_rate": 1.4383435506685012e-05, "loss": 0.7831, "step": 235620 }, { "epoch": 1.5053729092930248, "grad_norm": 0.7866032123565674, "learning_rate": 1.4379914057018417e-05, "loss": 0.8731, "step": 235630 }, { "epoch": 1.5054367964427635, "grad_norm": 1.1362463235855103, "learning_rate": 1.4376392966076924e-05, "loss": 0.8806, "step": 235640 }, { "epoch": 1.5055006835925022, "grad_norm": 1.0019675493240356, "learning_rate": 1.4372872233895957e-05, "loss": 0.8582, "step": 235650 }, { "epoch": 1.505564570742241, "grad_norm": 1.379349946975708, "learning_rate": 1.4369351860511e-05, "loss": 0.6788, "step": 235660 }, { "epoch": 1.5056284578919796, "grad_norm": 0.7315894365310669, "learning_rate": 1.4365831845957483e-05, "loss": 0.7359, "step": 235670 }, { "epoch": 1.5056923450417183, "grad_norm": 1.0244824886322021, "learning_rate": 1.4362312190270877e-05, "loss": 0.9086, "step": 235680 }, { "epoch": 1.505756232191457, "grad_norm": 1.2244248390197754, "learning_rate": 1.4358792893486611e-05, "loss": 0.7557, "step": 235690 }, { "epoch": 1.5058201193411958, "grad_norm": 1.191369652748108, "learning_rate": 1.4355273955640141e-05, "loss": 0.8936, "step": 235700 }, { "epoch": 1.5058840064909345, "grad_norm": 1.3286489248275757, "learning_rate": 1.435175537676689e-05, "loss": 0.7653, "step": 235710 }, { "epoch": 1.5059478936406732, "grad_norm": 0.8277062773704529, "learning_rate": 1.4348237156902317e-05, "loss": 0.9562, "step": 235720 }, { "epoch": 1.5060117807904119, "grad_norm": 0.920819103717804, "learning_rate": 1.434471929608182e-05, "loss": 0.8078, "step": 235730 }, { "epoch": 1.5060756679401504, "grad_norm": 1.1261929273605347, "learning_rate": 1.4341201794340852e-05, "loss": 1.0394, "step": 235740 }, { "epoch": 1.5061395550898893, "grad_norm": 0.8690299987792969, "learning_rate": 1.4337684651714844e-05, "loss": 1.2097, "step": 235750 }, { "epoch": 1.5062034422396278, "grad_norm": 1.0420963764190674, "learning_rate": 1.4334167868239202e-05, "loss": 1.0992, "step": 235760 }, { "epoch": 1.5062673293893667, "grad_norm": 0.7404338121414185, "learning_rate": 1.433065144394935e-05, "loss": 0.7894, "step": 235770 }, { "epoch": 1.5063312165391052, "grad_norm": 0.958733320236206, "learning_rate": 1.4327135378880674e-05, "loss": 0.8995, "step": 235780 }, { "epoch": 1.506395103688844, "grad_norm": 0.6937169432640076, "learning_rate": 1.4323619673068628e-05, "loss": 1.1497, "step": 235790 }, { "epoch": 1.5064589908385826, "grad_norm": 1.0881526470184326, "learning_rate": 1.4320104326548578e-05, "loss": 0.7022, "step": 235800 }, { "epoch": 1.5065228779883215, "grad_norm": 1.025079369544983, "learning_rate": 1.4316589339355957e-05, "loss": 0.9019, "step": 235810 }, { "epoch": 1.50658676513806, "grad_norm": 0.9831896424293518, "learning_rate": 1.4313074711526142e-05, "loss": 0.9154, "step": 235820 }, { "epoch": 1.506650652287799, "grad_norm": 0.7834585905075073, "learning_rate": 1.4309560443094549e-05, "loss": 0.8514, "step": 235830 }, { "epoch": 1.5067145394375374, "grad_norm": 0.7178348302841187, "learning_rate": 1.4306046534096546e-05, "loss": 0.803, "step": 235840 }, { "epoch": 1.5067784265872763, "grad_norm": 1.5619884729385376, "learning_rate": 1.4302532984567535e-05, "loss": 1.0766, "step": 235850 }, { "epoch": 1.5068423137370148, "grad_norm": 0.7582671642303467, "learning_rate": 1.4299019794542912e-05, "loss": 0.8792, "step": 235860 }, { "epoch": 1.5069062008867538, "grad_norm": 0.9229720234870911, "learning_rate": 1.4295506964058037e-05, "loss": 0.8011, "step": 235870 }, { "epoch": 1.5069700880364922, "grad_norm": 1.0600378513336182, "learning_rate": 1.4291994493148303e-05, "loss": 0.7071, "step": 235880 }, { "epoch": 1.5070339751862312, "grad_norm": 1.0364567041397095, "learning_rate": 1.428848238184906e-05, "loss": 0.7985, "step": 235890 }, { "epoch": 1.5070978623359697, "grad_norm": 0.8426311016082764, "learning_rate": 1.4284970630195715e-05, "loss": 0.9932, "step": 235900 }, { "epoch": 1.5071617494857086, "grad_norm": 0.9839722514152527, "learning_rate": 1.4281459238223598e-05, "loss": 0.7332, "step": 235910 }, { "epoch": 1.507225636635447, "grad_norm": 0.6878924369812012, "learning_rate": 1.42779482059681e-05, "loss": 1.1979, "step": 235920 }, { "epoch": 1.507289523785186, "grad_norm": 1.0222363471984863, "learning_rate": 1.4274437533464552e-05, "loss": 0.752, "step": 235930 }, { "epoch": 1.5073534109349245, "grad_norm": 0.9550470113754272, "learning_rate": 1.4270927220748348e-05, "loss": 0.8941, "step": 235940 }, { "epoch": 1.5074172980846634, "grad_norm": 1.4349061250686646, "learning_rate": 1.4267417267854793e-05, "loss": 0.6205, "step": 235950 }, { "epoch": 1.5074811852344019, "grad_norm": 0.6272745728492737, "learning_rate": 1.4263907674819277e-05, "loss": 0.598, "step": 235960 }, { "epoch": 1.5075450723841408, "grad_norm": 1.3375252485275269, "learning_rate": 1.4260398441677114e-05, "loss": 0.7406, "step": 235970 }, { "epoch": 1.5076089595338793, "grad_norm": 0.8655692338943481, "learning_rate": 1.4256889568463671e-05, "loss": 0.9343, "step": 235980 }, { "epoch": 1.507672846683618, "grad_norm": 0.8092108964920044, "learning_rate": 1.4253381055214254e-05, "loss": 0.8449, "step": 235990 }, { "epoch": 1.5077367338333567, "grad_norm": 0.7435670495033264, "learning_rate": 1.4249872901964234e-05, "loss": 1.1352, "step": 236000 }, { "epoch": 1.5078006209830954, "grad_norm": 1.159881591796875, "learning_rate": 1.4246365108748904e-05, "loss": 0.916, "step": 236010 }, { "epoch": 1.5078645081328341, "grad_norm": 0.8930804133415222, "learning_rate": 1.4242857675603627e-05, "loss": 0.7678, "step": 236020 }, { "epoch": 1.5079283952825728, "grad_norm": 0.9131158590316772, "learning_rate": 1.4239350602563688e-05, "loss": 0.9283, "step": 236030 }, { "epoch": 1.5079922824323115, "grad_norm": 2.3771884441375732, "learning_rate": 1.4235843889664447e-05, "loss": 0.8729, "step": 236040 }, { "epoch": 1.5080561695820502, "grad_norm": 0.689081609249115, "learning_rate": 1.4232337536941182e-05, "loss": 0.9161, "step": 236050 }, { "epoch": 1.508120056731789, "grad_norm": 0.6467447876930237, "learning_rate": 1.4228831544429222e-05, "loss": 0.8958, "step": 236060 }, { "epoch": 1.5081839438815277, "grad_norm": 0.8249161839485168, "learning_rate": 1.422532591216389e-05, "loss": 0.8604, "step": 236070 }, { "epoch": 1.5082478310312664, "grad_norm": 1.0231643915176392, "learning_rate": 1.4221820640180456e-05, "loss": 1.2338, "step": 236080 }, { "epoch": 1.508311718181005, "grad_norm": 0.7032090425491333, "learning_rate": 1.4218315728514253e-05, "loss": 1.2069, "step": 236090 }, { "epoch": 1.5083756053307438, "grad_norm": 0.7626880407333374, "learning_rate": 1.421481117720056e-05, "loss": 0.9404, "step": 236100 }, { "epoch": 1.5084394924804825, "grad_norm": 0.8990775346755981, "learning_rate": 1.4211306986274686e-05, "loss": 0.8909, "step": 236110 }, { "epoch": 1.5085033796302212, "grad_norm": 0.8762044906616211, "learning_rate": 1.4207803155771898e-05, "loss": 0.7445, "step": 236120 }, { "epoch": 1.50856726677996, "grad_norm": 0.8042936325073242, "learning_rate": 1.4204299685727518e-05, "loss": 0.824, "step": 236130 }, { "epoch": 1.5086311539296986, "grad_norm": 0.9430978298187256, "learning_rate": 1.4200796576176788e-05, "loss": 0.7238, "step": 236140 }, { "epoch": 1.5086950410794373, "grad_norm": 1.2746912240982056, "learning_rate": 1.4197293827155023e-05, "loss": 0.914, "step": 236150 }, { "epoch": 1.508758928229176, "grad_norm": 1.2489253282546997, "learning_rate": 1.4193791438697467e-05, "loss": 0.9566, "step": 236160 }, { "epoch": 1.5088228153789147, "grad_norm": 0.8797708749771118, "learning_rate": 1.4190289410839425e-05, "loss": 1.0164, "step": 236170 }, { "epoch": 1.5088867025286534, "grad_norm": 0.6316624879837036, "learning_rate": 1.418678774361613e-05, "loss": 0.8057, "step": 236180 }, { "epoch": 1.5089505896783921, "grad_norm": 0.9060586094856262, "learning_rate": 1.4183286437062882e-05, "loss": 0.8373, "step": 236190 }, { "epoch": 1.5090144768281308, "grad_norm": 0.741062581539154, "learning_rate": 1.417978549121491e-05, "loss": 0.8532, "step": 236200 }, { "epoch": 1.5090783639778695, "grad_norm": 1.327261209487915, "learning_rate": 1.4176284906107501e-05, "loss": 0.7526, "step": 236210 }, { "epoch": 1.5091422511276082, "grad_norm": 0.6677632927894592, "learning_rate": 1.417278468177588e-05, "loss": 0.8935, "step": 236220 }, { "epoch": 1.5092061382773467, "grad_norm": 0.711325466632843, "learning_rate": 1.416928481825533e-05, "loss": 1.1103, "step": 236230 }, { "epoch": 1.5092700254270857, "grad_norm": 1.0160562992095947, "learning_rate": 1.4165785315581066e-05, "loss": 0.7888, "step": 236240 }, { "epoch": 1.5093339125768241, "grad_norm": 0.8401907682418823, "learning_rate": 1.4162286173788359e-05, "loss": 0.7818, "step": 236250 }, { "epoch": 1.509397799726563, "grad_norm": 0.573164701461792, "learning_rate": 1.4158787392912431e-05, "loss": 0.716, "step": 236260 }, { "epoch": 1.5094616868763016, "grad_norm": 0.5766507983207703, "learning_rate": 1.4155288972988507e-05, "loss": 0.7058, "step": 236270 }, { "epoch": 1.5095255740260405, "grad_norm": 1.2178000211715698, "learning_rate": 1.4151790914051849e-05, "loss": 0.8116, "step": 236280 }, { "epoch": 1.509589461175779, "grad_norm": 0.9472524523735046, "learning_rate": 1.4148293216137654e-05, "loss": 0.725, "step": 236290 }, { "epoch": 1.509653348325518, "grad_norm": 1.013202428817749, "learning_rate": 1.414479587928118e-05, "loss": 0.9632, "step": 236300 }, { "epoch": 1.5097172354752564, "grad_norm": 0.6979582905769348, "learning_rate": 1.4141298903517608e-05, "loss": 0.9035, "step": 236310 }, { "epoch": 1.5097811226249953, "grad_norm": 0.641642689704895, "learning_rate": 1.4137802288882202e-05, "loss": 0.9259, "step": 236320 }, { "epoch": 1.5098450097747338, "grad_norm": 0.8102341294288635, "learning_rate": 1.4134306035410134e-05, "loss": 0.8065, "step": 236330 }, { "epoch": 1.5099088969244727, "grad_norm": 0.8578863143920898, "learning_rate": 1.4130810143136646e-05, "loss": 0.847, "step": 236340 }, { "epoch": 1.5099727840742112, "grad_norm": 1.7385120391845703, "learning_rate": 1.412731461209692e-05, "loss": 0.9918, "step": 236350 }, { "epoch": 1.5100366712239501, "grad_norm": 1.2686347961425781, "learning_rate": 1.412381944232618e-05, "loss": 0.9261, "step": 236360 }, { "epoch": 1.5101005583736886, "grad_norm": 0.8600685596466064, "learning_rate": 1.4120324633859605e-05, "loss": 0.8673, "step": 236370 }, { "epoch": 1.5101644455234275, "grad_norm": 1.0204668045043945, "learning_rate": 1.41168301867324e-05, "loss": 0.912, "step": 236380 }, { "epoch": 1.510228332673166, "grad_norm": 0.8734965920448303, "learning_rate": 1.4113336100979767e-05, "loss": 0.6381, "step": 236390 }, { "epoch": 1.510292219822905, "grad_norm": 0.6682913303375244, "learning_rate": 1.4109842376636878e-05, "loss": 0.9431, "step": 236400 }, { "epoch": 1.5103561069726434, "grad_norm": 2.5903491973876953, "learning_rate": 1.410634901373894e-05, "loss": 0.8283, "step": 236410 }, { "epoch": 1.5104199941223824, "grad_norm": 0.5716591477394104, "learning_rate": 1.4102856012321104e-05, "loss": 0.9301, "step": 236420 }, { "epoch": 1.5104838812721209, "grad_norm": 0.8407396674156189, "learning_rate": 1.4099363372418584e-05, "loss": 1.024, "step": 236430 }, { "epoch": 1.5105477684218598, "grad_norm": 1.072849988937378, "learning_rate": 1.409587109406651e-05, "loss": 1.2643, "step": 236440 }, { "epoch": 1.5106116555715983, "grad_norm": 0.9766477346420288, "learning_rate": 1.4092379177300091e-05, "loss": 0.8839, "step": 236450 }, { "epoch": 1.5106755427213372, "grad_norm": 0.8836285471916199, "learning_rate": 1.4088887622154468e-05, "loss": 1.0016, "step": 236460 }, { "epoch": 1.5107394298710757, "grad_norm": 0.9911049008369446, "learning_rate": 1.4085396428664826e-05, "loss": 0.8989, "step": 236470 }, { "epoch": 1.5108033170208144, "grad_norm": 1.1944478750228882, "learning_rate": 1.4081905596866296e-05, "loss": 0.9794, "step": 236480 }, { "epoch": 1.510867204170553, "grad_norm": 0.7414306998252869, "learning_rate": 1.407841512679407e-05, "loss": 0.6418, "step": 236490 }, { "epoch": 1.5109310913202918, "grad_norm": 1.1465985774993896, "learning_rate": 1.4074925018483265e-05, "loss": 0.754, "step": 236500 }, { "epoch": 1.5109949784700305, "grad_norm": 1.3809295892715454, "learning_rate": 1.4071435271969058e-05, "loss": 0.7055, "step": 236510 }, { "epoch": 1.5110588656197692, "grad_norm": 0.7024053931236267, "learning_rate": 1.406794588728656e-05, "loss": 0.8718, "step": 236520 }, { "epoch": 1.511122752769508, "grad_norm": 0.8922861218452454, "learning_rate": 1.4064456864470954e-05, "loss": 0.7903, "step": 236530 }, { "epoch": 1.5111866399192466, "grad_norm": 0.7403420805931091, "learning_rate": 1.406096820355734e-05, "loss": 0.8861, "step": 236540 }, { "epoch": 1.5112505270689853, "grad_norm": 0.8601128458976746, "learning_rate": 1.4057479904580884e-05, "loss": 1.1446, "step": 236550 }, { "epoch": 1.511314414218724, "grad_norm": 0.9789170622825623, "learning_rate": 1.4053991967576684e-05, "loss": 0.6798, "step": 236560 }, { "epoch": 1.5113783013684627, "grad_norm": 1.0461435317993164, "learning_rate": 1.4050504392579894e-05, "loss": 0.7695, "step": 236570 }, { "epoch": 1.5114421885182014, "grad_norm": 1.5159739255905151, "learning_rate": 1.4047017179625616e-05, "loss": 0.92, "step": 236580 }, { "epoch": 1.5115060756679402, "grad_norm": 0.7177718877792358, "learning_rate": 1.4043530328748976e-05, "loss": 0.7973, "step": 236590 }, { "epoch": 1.5115699628176789, "grad_norm": 1.0641429424285889, "learning_rate": 1.4040043839985107e-05, "loss": 0.9135, "step": 236600 }, { "epoch": 1.5116338499674176, "grad_norm": 1.2808974981307983, "learning_rate": 1.4036557713369091e-05, "loss": 0.7015, "step": 236610 }, { "epoch": 1.5116977371171563, "grad_norm": 0.9831389784812927, "learning_rate": 1.4033071948936071e-05, "loss": 0.9069, "step": 236620 }, { "epoch": 1.511761624266895, "grad_norm": 1.1371793746948242, "learning_rate": 1.4029586546721113e-05, "loss": 0.8939, "step": 236630 }, { "epoch": 1.5118255114166337, "grad_norm": 0.7853311896324158, "learning_rate": 1.4026101506759354e-05, "loss": 0.9674, "step": 236640 }, { "epoch": 1.5118893985663724, "grad_norm": 0.6516915559768677, "learning_rate": 1.4022616829085861e-05, "loss": 0.8483, "step": 236650 }, { "epoch": 1.511953285716111, "grad_norm": 0.8449056148529053, "learning_rate": 1.401913251373575e-05, "loss": 0.893, "step": 236660 }, { "epoch": 1.5120171728658498, "grad_norm": 1.0003093481063843, "learning_rate": 1.4015648560744093e-05, "loss": 0.7645, "step": 236670 }, { "epoch": 1.5120810600155885, "grad_norm": 1.1056313514709473, "learning_rate": 1.4012164970146002e-05, "loss": 0.8626, "step": 236680 }, { "epoch": 1.5121449471653272, "grad_norm": 0.949406623840332, "learning_rate": 1.4008681741976526e-05, "loss": 0.9225, "step": 236690 }, { "epoch": 1.512208834315066, "grad_norm": 0.9006887078285217, "learning_rate": 1.4005198876270775e-05, "loss": 0.9581, "step": 236700 }, { "epoch": 1.5122727214648046, "grad_norm": 0.7306432127952576, "learning_rate": 1.40017163730638e-05, "loss": 0.9412, "step": 236710 }, { "epoch": 1.512336608614543, "grad_norm": 1.1052353382110596, "learning_rate": 1.3998234232390695e-05, "loss": 1.0588, "step": 236720 }, { "epoch": 1.512400495764282, "grad_norm": 1.119249701499939, "learning_rate": 1.3994752454286525e-05, "loss": 0.7952, "step": 236730 }, { "epoch": 1.5124643829140205, "grad_norm": 0.7595828771591187, "learning_rate": 1.3991271038786325e-05, "loss": 0.8514, "step": 236740 }, { "epoch": 1.5125282700637595, "grad_norm": 1.1814230680465698, "learning_rate": 1.3987789985925193e-05, "loss": 0.8241, "step": 236750 }, { "epoch": 1.512592157213498, "grad_norm": 0.7136185765266418, "learning_rate": 1.3984309295738157e-05, "loss": 0.8798, "step": 236760 }, { "epoch": 1.5126560443632369, "grad_norm": 1.1095455884933472, "learning_rate": 1.3980828968260296e-05, "loss": 0.8871, "step": 236770 }, { "epoch": 1.5127199315129753, "grad_norm": 1.1192913055419922, "learning_rate": 1.397734900352664e-05, "loss": 0.9155, "step": 236780 }, { "epoch": 1.5127838186627143, "grad_norm": 0.9863184094429016, "learning_rate": 1.3973869401572254e-05, "loss": 0.907, "step": 236790 }, { "epoch": 1.5128477058124528, "grad_norm": 0.8944969773292542, "learning_rate": 1.3970390162432156e-05, "loss": 1.1686, "step": 236800 }, { "epoch": 1.5129115929621917, "grad_norm": 0.6087459325790405, "learning_rate": 1.3966911286141416e-05, "loss": 0.9757, "step": 236810 }, { "epoch": 1.5129754801119302, "grad_norm": 1.1121567487716675, "learning_rate": 1.3963432772735036e-05, "loss": 0.8356, "step": 236820 }, { "epoch": 1.513039367261669, "grad_norm": 2.9160475730895996, "learning_rate": 1.3959954622248078e-05, "loss": 0.9628, "step": 236830 }, { "epoch": 1.5131032544114076, "grad_norm": 0.8892576694488525, "learning_rate": 1.3956476834715544e-05, "loss": 0.8443, "step": 236840 }, { "epoch": 1.5131671415611465, "grad_norm": 0.9479966759681702, "learning_rate": 1.3952999410172485e-05, "loss": 0.7571, "step": 236850 }, { "epoch": 1.513231028710885, "grad_norm": 1.493237853050232, "learning_rate": 1.3949522348653887e-05, "loss": 0.9724, "step": 236860 }, { "epoch": 1.513294915860624, "grad_norm": 0.968614399433136, "learning_rate": 1.3946045650194806e-05, "loss": 0.8699, "step": 236870 }, { "epoch": 1.5133588030103624, "grad_norm": 0.9419978857040405, "learning_rate": 1.3942569314830218e-05, "loss": 1.1052, "step": 236880 }, { "epoch": 1.5134226901601013, "grad_norm": 1.1808056831359863, "learning_rate": 1.3939093342595172e-05, "loss": 1.2063, "step": 236890 }, { "epoch": 1.5134865773098398, "grad_norm": 1.5204524993896484, "learning_rate": 1.3935617733524636e-05, "loss": 0.6257, "step": 236900 }, { "epoch": 1.5135504644595787, "grad_norm": 1.0782991647720337, "learning_rate": 1.3932142487653627e-05, "loss": 0.7599, "step": 236910 }, { "epoch": 1.5136143516093172, "grad_norm": 1.501810073852539, "learning_rate": 1.3928667605017165e-05, "loss": 1.0803, "step": 236920 }, { "epoch": 1.5136782387590562, "grad_norm": 0.753489077091217, "learning_rate": 1.3925193085650207e-05, "loss": 1.2847, "step": 236930 }, { "epoch": 1.5137421259087946, "grad_norm": 0.9196399450302124, "learning_rate": 1.3921718929587779e-05, "loss": 0.8163, "step": 236940 }, { "epoch": 1.5138060130585336, "grad_norm": 1.3756850957870483, "learning_rate": 1.3918245136864844e-05, "loss": 0.8701, "step": 236950 }, { "epoch": 1.513869900208272, "grad_norm": 1.2189220190048218, "learning_rate": 1.3914771707516406e-05, "loss": 0.7241, "step": 236960 }, { "epoch": 1.5139337873580108, "grad_norm": 0.7605637311935425, "learning_rate": 1.3911298641577425e-05, "loss": 0.7165, "step": 236970 }, { "epoch": 1.5139976745077495, "grad_norm": 1.6095051765441895, "learning_rate": 1.3907825939082897e-05, "loss": 0.928, "step": 236980 }, { "epoch": 1.5140615616574882, "grad_norm": 1.8448421955108643, "learning_rate": 1.390435360006777e-05, "loss": 0.8557, "step": 236990 }, { "epoch": 1.5141254488072269, "grad_norm": 1.3630905151367188, "learning_rate": 1.3900881624567053e-05, "loss": 1.0013, "step": 237000 }, { "epoch": 1.5141893359569656, "grad_norm": 0.9617922306060791, "learning_rate": 1.3897410012615664e-05, "loss": 0.8883, "step": 237010 }, { "epoch": 1.5142532231067043, "grad_norm": 1.3461328744888306, "learning_rate": 1.3893938764248609e-05, "loss": 1.0668, "step": 237020 }, { "epoch": 1.514317110256443, "grad_norm": 1.3913925886154175, "learning_rate": 1.3890467879500813e-05, "loss": 1.0365, "step": 237030 }, { "epoch": 1.5143809974061817, "grad_norm": 1.317252516746521, "learning_rate": 1.3886997358407256e-05, "loss": 0.8252, "step": 237040 }, { "epoch": 1.5144448845559204, "grad_norm": 1.155108094215393, "learning_rate": 1.388352720100286e-05, "loss": 0.9447, "step": 237050 }, { "epoch": 1.5145087717056591, "grad_norm": 0.9392795562744141, "learning_rate": 1.3880057407322612e-05, "loss": 0.8071, "step": 237060 }, { "epoch": 1.5145726588553978, "grad_norm": 0.9975323677062988, "learning_rate": 1.3876587977401418e-05, "loss": 0.9965, "step": 237070 }, { "epoch": 1.5146365460051365, "grad_norm": 0.9569724798202515, "learning_rate": 1.3873118911274247e-05, "loss": 0.5875, "step": 237080 }, { "epoch": 1.5147004331548752, "grad_norm": 0.8880897164344788, "learning_rate": 1.386965020897601e-05, "loss": 0.9679, "step": 237090 }, { "epoch": 1.514764320304614, "grad_norm": 1.2377578020095825, "learning_rate": 1.3866181870541667e-05, "loss": 1.1771, "step": 237100 }, { "epoch": 1.5148282074543526, "grad_norm": 1.317079782485962, "learning_rate": 1.3862713896006118e-05, "loss": 0.6383, "step": 237110 }, { "epoch": 1.5148920946040914, "grad_norm": 1.1456557512283325, "learning_rate": 1.3859246285404304e-05, "loss": 0.6171, "step": 237120 }, { "epoch": 1.51495598175383, "grad_norm": 0.9332743883132935, "learning_rate": 1.3855779038771156e-05, "loss": 1.0269, "step": 237130 }, { "epoch": 1.5150198689035688, "grad_norm": 1.2689080238342285, "learning_rate": 1.3852312156141573e-05, "loss": 0.7387, "step": 237140 }, { "epoch": 1.5150837560533075, "grad_norm": 1.0080506801605225, "learning_rate": 1.384884563755049e-05, "loss": 0.7163, "step": 237150 }, { "epoch": 1.5151476432030462, "grad_norm": 0.8763740062713623, "learning_rate": 1.3845379483032794e-05, "loss": 0.7504, "step": 237160 }, { "epoch": 1.5152115303527849, "grad_norm": 1.120248794555664, "learning_rate": 1.3841913692623421e-05, "loss": 0.8088, "step": 237170 }, { "epoch": 1.5152754175025236, "grad_norm": 1.2910860776901245, "learning_rate": 1.383844826635724e-05, "loss": 0.9404, "step": 237180 }, { "epoch": 1.5153393046522623, "grad_norm": 1.2270375490188599, "learning_rate": 1.3834983204269186e-05, "loss": 0.8614, "step": 237190 }, { "epoch": 1.515403191802001, "grad_norm": 1.0125607252120972, "learning_rate": 1.383151850639412e-05, "loss": 0.9424, "step": 237200 }, { "epoch": 1.5154670789517395, "grad_norm": 1.463198184967041, "learning_rate": 1.3828054172766974e-05, "loss": 0.927, "step": 237210 }, { "epoch": 1.5155309661014784, "grad_norm": 1.2470359802246094, "learning_rate": 1.382459020342261e-05, "loss": 0.9859, "step": 237220 }, { "epoch": 1.515594853251217, "grad_norm": 0.8071841597557068, "learning_rate": 1.3821126598395906e-05, "loss": 0.8442, "step": 237230 }, { "epoch": 1.5156587404009558, "grad_norm": 0.9477114677429199, "learning_rate": 1.3817663357721772e-05, "loss": 0.9196, "step": 237240 }, { "epoch": 1.5157226275506943, "grad_norm": 1.4145262241363525, "learning_rate": 1.3814200481435057e-05, "loss": 0.6977, "step": 237250 }, { "epoch": 1.5157865147004332, "grad_norm": 0.6105630397796631, "learning_rate": 1.3810737969570659e-05, "loss": 0.8531, "step": 237260 }, { "epoch": 1.5158504018501717, "grad_norm": 0.530202329158783, "learning_rate": 1.3807275822163429e-05, "loss": 1.0427, "step": 237270 }, { "epoch": 1.5159142889999107, "grad_norm": 1.427032709121704, "learning_rate": 1.3803814039248247e-05, "loss": 0.9795, "step": 237280 }, { "epoch": 1.5159781761496491, "grad_norm": 1.3998620510101318, "learning_rate": 1.380035262085997e-05, "loss": 1.2296, "step": 237290 }, { "epoch": 1.516042063299388, "grad_norm": 0.7057076096534729, "learning_rate": 1.3796891567033466e-05, "loss": 0.8736, "step": 237300 }, { "epoch": 1.5161059504491265, "grad_norm": 0.9350077509880066, "learning_rate": 1.379343087780357e-05, "loss": 0.5734, "step": 237310 }, { "epoch": 1.5161698375988655, "grad_norm": 1.0376969575881958, "learning_rate": 1.3789970553205161e-05, "loss": 0.6677, "step": 237320 }, { "epoch": 1.516233724748604, "grad_norm": 0.796563982963562, "learning_rate": 1.3786510593273067e-05, "loss": 0.7587, "step": 237330 }, { "epoch": 1.516297611898343, "grad_norm": 1.1124401092529297, "learning_rate": 1.3783050998042146e-05, "loss": 0.7973, "step": 237340 }, { "epoch": 1.5163614990480814, "grad_norm": 0.6725632548332214, "learning_rate": 1.3779591767547223e-05, "loss": 0.7605, "step": 237350 }, { "epoch": 1.5164253861978203, "grad_norm": 0.9825790524482727, "learning_rate": 1.3776132901823163e-05, "loss": 0.8773, "step": 237360 }, { "epoch": 1.5164892733475588, "grad_norm": 0.533623456954956, "learning_rate": 1.3772674400904766e-05, "loss": 0.7198, "step": 237370 }, { "epoch": 1.5165531604972977, "grad_norm": 0.9449812173843384, "learning_rate": 1.3769216264826895e-05, "loss": 0.6792, "step": 237380 }, { "epoch": 1.5166170476470362, "grad_norm": 0.7161667346954346, "learning_rate": 1.3765758493624348e-05, "loss": 0.7869, "step": 237390 }, { "epoch": 1.5166809347967751, "grad_norm": 1.3305622339248657, "learning_rate": 1.376230108733197e-05, "loss": 0.9165, "step": 237400 }, { "epoch": 1.5167448219465136, "grad_norm": 0.9296539425849915, "learning_rate": 1.3758844045984553e-05, "loss": 0.6234, "step": 237410 }, { "epoch": 1.5168087090962525, "grad_norm": 0.8305658102035522, "learning_rate": 1.3755387369616952e-05, "loss": 0.9068, "step": 237420 }, { "epoch": 1.516872596245991, "grad_norm": 0.8052715063095093, "learning_rate": 1.3751931058263933e-05, "loss": 0.8243, "step": 237430 }, { "epoch": 1.5169364833957297, "grad_norm": 0.8373987078666687, "learning_rate": 1.3748475111960334e-05, "loss": 0.9154, "step": 237440 }, { "epoch": 1.5170003705454684, "grad_norm": 1.054856538772583, "learning_rate": 1.3745019530740965e-05, "loss": 0.8902, "step": 237450 }, { "epoch": 1.5170642576952071, "grad_norm": 0.9032215476036072, "learning_rate": 1.3741564314640599e-05, "loss": 0.8574, "step": 237460 }, { "epoch": 1.5171281448449458, "grad_norm": 0.979597806930542, "learning_rate": 1.3738109463694065e-05, "loss": 1.0963, "step": 237470 }, { "epoch": 1.5171920319946846, "grad_norm": 1.0569349527359009, "learning_rate": 1.3734654977936123e-05, "loss": 1.2293, "step": 237480 }, { "epoch": 1.5172559191444233, "grad_norm": 0.7811325192451477, "learning_rate": 1.3731200857401594e-05, "loss": 0.8444, "step": 237490 }, { "epoch": 1.517319806294162, "grad_norm": 0.9610899090766907, "learning_rate": 1.3727747102125239e-05, "loss": 1.0613, "step": 237500 }, { "epoch": 1.5173836934439007, "grad_norm": 1.7076857089996338, "learning_rate": 1.3724293712141862e-05, "loss": 0.8392, "step": 237510 }, { "epoch": 1.5174475805936394, "grad_norm": 0.8330065608024597, "learning_rate": 1.3720840687486215e-05, "loss": 0.7585, "step": 237520 }, { "epoch": 1.517511467743378, "grad_norm": 1.0238244533538818, "learning_rate": 1.3717388028193101e-05, "loss": 0.9074, "step": 237530 }, { "epoch": 1.5175753548931168, "grad_norm": 0.796726405620575, "learning_rate": 1.3713935734297268e-05, "loss": 0.8588, "step": 237540 }, { "epoch": 1.5176392420428555, "grad_norm": 1.0146052837371826, "learning_rate": 1.3710483805833507e-05, "loss": 0.8888, "step": 237550 }, { "epoch": 1.5177031291925942, "grad_norm": 0.8111674189567566, "learning_rate": 1.3707032242836554e-05, "loss": 1.0352, "step": 237560 }, { "epoch": 1.517767016342333, "grad_norm": 1.2000855207443237, "learning_rate": 1.3703581045341196e-05, "loss": 0.9485, "step": 237570 }, { "epoch": 1.5178309034920716, "grad_norm": 0.7340511083602905, "learning_rate": 1.3700130213382173e-05, "loss": 0.9307, "step": 237580 }, { "epoch": 1.5178947906418103, "grad_norm": 1.05705988407135, "learning_rate": 1.3696679746994251e-05, "loss": 0.7477, "step": 237590 }, { "epoch": 1.517958677791549, "grad_norm": 1.2210100889205933, "learning_rate": 1.3693229646212153e-05, "loss": 0.8642, "step": 237600 }, { "epoch": 1.5180225649412877, "grad_norm": 0.6993927955627441, "learning_rate": 1.368977991107066e-05, "loss": 0.6987, "step": 237610 }, { "epoch": 1.5180864520910264, "grad_norm": 1.8930222988128662, "learning_rate": 1.3686330541604481e-05, "loss": 0.7734, "step": 237620 }, { "epoch": 1.5181503392407651, "grad_norm": 0.5977169275283813, "learning_rate": 1.3682881537848385e-05, "loss": 0.8858, "step": 237630 }, { "epoch": 1.5182142263905039, "grad_norm": 0.9742574095726013, "learning_rate": 1.3679432899837075e-05, "loss": 0.9346, "step": 237640 }, { "epoch": 1.5182781135402426, "grad_norm": 0.7708993554115295, "learning_rate": 1.3675984627605298e-05, "loss": 0.9882, "step": 237650 }, { "epoch": 1.5183420006899813, "grad_norm": 1.17482590675354, "learning_rate": 1.367253672118779e-05, "loss": 0.9899, "step": 237660 }, { "epoch": 1.51840588783972, "grad_norm": 2.454292058944702, "learning_rate": 1.3669089180619255e-05, "loss": 1.1206, "step": 237670 }, { "epoch": 1.5184697749894585, "grad_norm": 1.0289863348007202, "learning_rate": 1.3665642005934436e-05, "loss": 0.9492, "step": 237680 }, { "epoch": 1.5185336621391974, "grad_norm": 1.0246806144714355, "learning_rate": 1.3662195197168026e-05, "loss": 0.7844, "step": 237690 }, { "epoch": 1.5185975492889359, "grad_norm": 0.8510035276412964, "learning_rate": 1.3658748754354773e-05, "loss": 0.8301, "step": 237700 }, { "epoch": 1.5186614364386748, "grad_norm": 1.074318289756775, "learning_rate": 1.365530267752933e-05, "loss": 0.573, "step": 237710 }, { "epoch": 1.5187253235884133, "grad_norm": 1.842522382736206, "learning_rate": 1.3651856966726445e-05, "loss": 0.9324, "step": 237720 }, { "epoch": 1.5187892107381522, "grad_norm": 0.6383247375488281, "learning_rate": 1.3648411621980794e-05, "loss": 1.0601, "step": 237730 }, { "epoch": 1.5188530978878907, "grad_norm": 0.7060561180114746, "learning_rate": 1.3644966643327101e-05, "loss": 0.7332, "step": 237740 }, { "epoch": 1.5189169850376296, "grad_norm": 1.0963681936264038, "learning_rate": 1.3641522030800025e-05, "loss": 0.9709, "step": 237750 }, { "epoch": 1.518980872187368, "grad_norm": 0.8624010682106018, "learning_rate": 1.3638077784434283e-05, "loss": 0.8324, "step": 237760 }, { "epoch": 1.519044759337107, "grad_norm": 0.6828634738922119, "learning_rate": 1.3634633904264572e-05, "loss": 0.7604, "step": 237770 }, { "epoch": 1.5191086464868455, "grad_norm": 1.0846530199050903, "learning_rate": 1.3631190390325539e-05, "loss": 0.7711, "step": 237780 }, { "epoch": 1.5191725336365844, "grad_norm": 0.6594371795654297, "learning_rate": 1.3627747242651895e-05, "loss": 0.7386, "step": 237790 }, { "epoch": 1.519236420786323, "grad_norm": 1.0198477506637573, "learning_rate": 1.3624304461278292e-05, "loss": 0.8324, "step": 237800 }, { "epoch": 1.5193003079360619, "grad_norm": 0.8147205710411072, "learning_rate": 1.3620862046239425e-05, "loss": 1.0197, "step": 237810 }, { "epoch": 1.5193641950858003, "grad_norm": 0.9647092819213867, "learning_rate": 1.3617419997569936e-05, "loss": 0.8689, "step": 237820 }, { "epoch": 1.5194280822355393, "grad_norm": 1.2450505495071411, "learning_rate": 1.361397831530452e-05, "loss": 0.9359, "step": 237830 }, { "epoch": 1.5194919693852778, "grad_norm": 1.0000580549240112, "learning_rate": 1.3610536999477802e-05, "loss": 1.0524, "step": 237840 }, { "epoch": 1.5195558565350167, "grad_norm": 0.6471056342124939, "learning_rate": 1.3607096050124474e-05, "loss": 1.0834, "step": 237850 }, { "epoch": 1.5196197436847552, "grad_norm": 0.9813455939292908, "learning_rate": 1.3603655467279158e-05, "loss": 0.8514, "step": 237860 }, { "epoch": 1.519683630834494, "grad_norm": 0.793258011341095, "learning_rate": 1.3600215250976533e-05, "loss": 0.8319, "step": 237870 }, { "epoch": 1.5197475179842326, "grad_norm": 0.9160090684890747, "learning_rate": 1.3596775401251222e-05, "loss": 0.9845, "step": 237880 }, { "epoch": 1.5198114051339715, "grad_norm": 0.8141654133796692, "learning_rate": 1.3593335918137883e-05, "loss": 0.9324, "step": 237890 }, { "epoch": 1.51987529228371, "grad_norm": 1.104178786277771, "learning_rate": 1.3589896801671137e-05, "loss": 0.9644, "step": 237900 }, { "epoch": 1.519939179433449, "grad_norm": 0.8147308230400085, "learning_rate": 1.3586458051885641e-05, "loss": 0.8855, "step": 237910 }, { "epoch": 1.5200030665831874, "grad_norm": 0.9954264163970947, "learning_rate": 1.3583019668816004e-05, "loss": 0.7827, "step": 237920 }, { "epoch": 1.520066953732926, "grad_norm": 1.1882027387619019, "learning_rate": 1.3579581652496875e-05, "loss": 0.9866, "step": 237930 }, { "epoch": 1.5201308408826648, "grad_norm": 0.8633627891540527, "learning_rate": 1.3576144002962854e-05, "loss": 0.7234, "step": 237940 }, { "epoch": 1.5201947280324035, "grad_norm": 4.5558671951293945, "learning_rate": 1.3572706720248584e-05, "loss": 0.8573, "step": 237950 }, { "epoch": 1.5202586151821422, "grad_norm": 1.0978244543075562, "learning_rate": 1.3569269804388663e-05, "loss": 0.8994, "step": 237960 }, { "epoch": 1.520322502331881, "grad_norm": 1.1287128925323486, "learning_rate": 1.3566176893803823e-05, "loss": 0.9077, "step": 237970 }, { "epoch": 1.5203863894816196, "grad_norm": 1.094420313835144, "learning_rate": 1.3562740675062546e-05, "loss": 0.8999, "step": 237980 }, { "epoch": 1.5204502766313583, "grad_norm": 0.8271204233169556, "learning_rate": 1.3559304823275987e-05, "loss": 0.8423, "step": 237990 }, { "epoch": 1.520514163781097, "grad_norm": 1.0374590158462524, "learning_rate": 1.355586933847876e-05, "loss": 0.9159, "step": 238000 }, { "epoch": 1.5205780509308358, "grad_norm": 0.7326343655586243, "learning_rate": 1.3552434220705446e-05, "loss": 0.7634, "step": 238010 }, { "epoch": 1.5206419380805745, "grad_norm": 0.7431874871253967, "learning_rate": 1.3548999469990663e-05, "loss": 0.895, "step": 238020 }, { "epoch": 1.5207058252303132, "grad_norm": 1.8628556728363037, "learning_rate": 1.3545565086368977e-05, "loss": 0.9853, "step": 238030 }, { "epoch": 1.5207697123800519, "grad_norm": 1.555378794670105, "learning_rate": 1.3542131069875007e-05, "loss": 1.007, "step": 238040 }, { "epoch": 1.5208335995297906, "grad_norm": 1.2259501218795776, "learning_rate": 1.35386974205433e-05, "loss": 0.7222, "step": 238050 }, { "epoch": 1.5208974866795293, "grad_norm": 0.804809033870697, "learning_rate": 1.3535264138408466e-05, "loss": 0.8414, "step": 238060 }, { "epoch": 1.520961373829268, "grad_norm": 0.8649407625198364, "learning_rate": 1.3531831223505059e-05, "loss": 0.6925, "step": 238070 }, { "epoch": 1.5210252609790067, "grad_norm": 0.8720270991325378, "learning_rate": 1.3528398675867653e-05, "loss": 0.7738, "step": 238080 }, { "epoch": 1.5210891481287454, "grad_norm": 0.8932050466537476, "learning_rate": 1.352496649553085e-05, "loss": 0.9991, "step": 238090 }, { "epoch": 1.5211530352784841, "grad_norm": 1.077694296836853, "learning_rate": 1.3521534682529163e-05, "loss": 0.9286, "step": 238100 }, { "epoch": 1.5212169224282228, "grad_norm": 0.7399711012840271, "learning_rate": 1.3518103236897206e-05, "loss": 0.8307, "step": 238110 }, { "epoch": 1.5212808095779615, "grad_norm": 0.7662681937217712, "learning_rate": 1.351467215866949e-05, "loss": 0.6985, "step": 238120 }, { "epoch": 1.5213446967277002, "grad_norm": 0.9886029958724976, "learning_rate": 1.3511241447880606e-05, "loss": 0.9631, "step": 238130 }, { "epoch": 1.521408583877439, "grad_norm": 0.6486461758613586, "learning_rate": 1.3507811104565072e-05, "loss": 1.0866, "step": 238140 }, { "epoch": 1.5214724710271776, "grad_norm": 0.5925334095954895, "learning_rate": 1.3504381128757465e-05, "loss": 0.7503, "step": 238150 }, { "epoch": 1.5215363581769163, "grad_norm": 0.9701249599456787, "learning_rate": 1.35009515204923e-05, "loss": 0.8577, "step": 238160 }, { "epoch": 1.5216002453266548, "grad_norm": 1.4359204769134521, "learning_rate": 1.3497522279804142e-05, "loss": 0.9309, "step": 238170 }, { "epoch": 1.5216641324763938, "grad_norm": 0.606021523475647, "learning_rate": 1.3494093406727504e-05, "loss": 0.8931, "step": 238180 }, { "epoch": 1.5217280196261322, "grad_norm": 1.6420189142227173, "learning_rate": 1.3490664901296935e-05, "loss": 1.0485, "step": 238190 }, { "epoch": 1.5217919067758712, "grad_norm": 0.7900764346122742, "learning_rate": 1.3487236763546946e-05, "loss": 0.8114, "step": 238200 }, { "epoch": 1.5218557939256097, "grad_norm": 1.0666368007659912, "learning_rate": 1.3483808993512088e-05, "loss": 0.962, "step": 238210 }, { "epoch": 1.5219196810753486, "grad_norm": 0.7549812197685242, "learning_rate": 1.3480381591226843e-05, "loss": 0.9305, "step": 238220 }, { "epoch": 1.521983568225087, "grad_norm": 1.072159767150879, "learning_rate": 1.3476954556725768e-05, "loss": 0.7658, "step": 238230 }, { "epoch": 1.522047455374826, "grad_norm": 1.3268358707427979, "learning_rate": 1.3473527890043342e-05, "loss": 1.0262, "step": 238240 }, { "epoch": 1.5221113425245645, "grad_norm": 0.9297296404838562, "learning_rate": 1.3470101591214102e-05, "loss": 0.8517, "step": 238250 }, { "epoch": 1.5221752296743034, "grad_norm": 0.6360511183738708, "learning_rate": 1.3466675660272532e-05, "loss": 0.6929, "step": 238260 }, { "epoch": 1.522239116824042, "grad_norm": 1.2996023893356323, "learning_rate": 1.3463250097253155e-05, "loss": 0.7778, "step": 238270 }, { "epoch": 1.5223030039737808, "grad_norm": 0.7415247559547424, "learning_rate": 1.3459824902190444e-05, "loss": 0.7141, "step": 238280 }, { "epoch": 1.5223668911235193, "grad_norm": 1.4085605144500732, "learning_rate": 1.3456400075118907e-05, "loss": 0.7232, "step": 238290 }, { "epoch": 1.5224307782732582, "grad_norm": 1.129459261894226, "learning_rate": 1.3452975616073054e-05, "loss": 0.6778, "step": 238300 }, { "epoch": 1.5224946654229967, "grad_norm": 1.0583432912826538, "learning_rate": 1.3449551525087339e-05, "loss": 1.1301, "step": 238310 }, { "epoch": 1.5225585525727356, "grad_norm": 1.1752994060516357, "learning_rate": 1.3446127802196273e-05, "loss": 0.81, "step": 238320 }, { "epoch": 1.5226224397224741, "grad_norm": 2.07598614692688, "learning_rate": 1.3442704447434313e-05, "loss": 0.9167, "step": 238330 }, { "epoch": 1.522686326872213, "grad_norm": 0.9651932716369629, "learning_rate": 1.3439281460835973e-05, "loss": 0.68, "step": 238340 }, { "epoch": 1.5227502140219515, "grad_norm": 0.8868149518966675, "learning_rate": 1.3435858842435667e-05, "loss": 0.9032, "step": 238350 }, { "epoch": 1.5228141011716905, "grad_norm": 0.7346360683441162, "learning_rate": 1.343243659226791e-05, "loss": 0.8223, "step": 238360 }, { "epoch": 1.522877988321429, "grad_norm": 0.7869923114776611, "learning_rate": 1.3429014710367139e-05, "loss": 1.0532, "step": 238370 }, { "epoch": 1.5229418754711679, "grad_norm": 1.0384732484817505, "learning_rate": 1.3425593196767844e-05, "loss": 0.8243, "step": 238380 }, { "epoch": 1.5230057626209064, "grad_norm": 0.7875847816467285, "learning_rate": 1.3422172051504445e-05, "loss": 0.7631, "step": 238390 }, { "epoch": 1.5230696497706453, "grad_norm": 0.836666464805603, "learning_rate": 1.3418751274611424e-05, "loss": 0.9747, "step": 238400 }, { "epoch": 1.5231335369203838, "grad_norm": 1.0695717334747314, "learning_rate": 1.3415330866123244e-05, "loss": 1.1035, "step": 238410 }, { "epoch": 1.5231974240701225, "grad_norm": 0.823056161403656, "learning_rate": 1.3411910826074308e-05, "loss": 0.9714, "step": 238420 }, { "epoch": 1.5232613112198612, "grad_norm": 0.8672632575035095, "learning_rate": 1.3408491154499103e-05, "loss": 0.8351, "step": 238430 }, { "epoch": 1.5233251983696, "grad_norm": 0.9678645133972168, "learning_rate": 1.3405071851432034e-05, "loss": 0.9878, "step": 238440 }, { "epoch": 1.5233890855193386, "grad_norm": 0.7150598168373108, "learning_rate": 1.3401652916907564e-05, "loss": 1.0002, "step": 238450 }, { "epoch": 1.5234529726690773, "grad_norm": 1.2159661054611206, "learning_rate": 1.3398234350960098e-05, "loss": 0.9008, "step": 238460 }, { "epoch": 1.523516859818816, "grad_norm": 0.9079484939575195, "learning_rate": 1.339481615362409e-05, "loss": 1.0404, "step": 238470 }, { "epoch": 1.5235807469685547, "grad_norm": 0.8452890515327454, "learning_rate": 1.3391398324933934e-05, "loss": 1.0406, "step": 238480 }, { "epoch": 1.5236446341182934, "grad_norm": 0.7699106931686401, "learning_rate": 1.3387980864924087e-05, "loss": 0.9912, "step": 238490 }, { "epoch": 1.5237085212680321, "grad_norm": 1.040332555770874, "learning_rate": 1.3384563773628932e-05, "loss": 0.7277, "step": 238500 }, { "epoch": 1.5237724084177708, "grad_norm": 3.4283716678619385, "learning_rate": 1.338114705108291e-05, "loss": 0.7815, "step": 238510 }, { "epoch": 1.5238362955675095, "grad_norm": 0.5395203232765198, "learning_rate": 1.33777306973204e-05, "loss": 0.9668, "step": 238520 }, { "epoch": 1.5239001827172483, "grad_norm": 0.8401491045951843, "learning_rate": 1.3374314712375845e-05, "loss": 0.6885, "step": 238530 }, { "epoch": 1.523964069866987, "grad_norm": 1.065290927886963, "learning_rate": 1.3370899096283607e-05, "loss": 0.8173, "step": 238540 }, { "epoch": 1.5240279570167257, "grad_norm": 1.0944569110870361, "learning_rate": 1.3367483849078122e-05, "loss": 0.7403, "step": 238550 }, { "epoch": 1.5240918441664644, "grad_norm": 0.7220026850700378, "learning_rate": 1.3364068970793746e-05, "loss": 1.334, "step": 238560 }, { "epoch": 1.524155731316203, "grad_norm": 0.9048059582710266, "learning_rate": 1.3360654461464912e-05, "loss": 0.7798, "step": 238570 }, { "epoch": 1.5242196184659418, "grad_norm": 1.1797586679458618, "learning_rate": 1.3357240321125963e-05, "loss": 0.8042, "step": 238580 }, { "epoch": 1.5242835056156805, "grad_norm": 0.9054034948348999, "learning_rate": 1.335382654981132e-05, "loss": 0.7306, "step": 238590 }, { "epoch": 1.5243473927654192, "grad_norm": 0.8827263712882996, "learning_rate": 1.335041314755534e-05, "loss": 0.9916, "step": 238600 }, { "epoch": 1.524411279915158, "grad_norm": 1.2693238258361816, "learning_rate": 1.3347000114392399e-05, "loss": 0.6009, "step": 238610 }, { "epoch": 1.5244751670648966, "grad_norm": 0.9437078833580017, "learning_rate": 1.3343587450356893e-05, "loss": 0.9352, "step": 238620 }, { "epoch": 1.5245390542146353, "grad_norm": 1.0291279554367065, "learning_rate": 1.3340175155483158e-05, "loss": 0.8882, "step": 238630 }, { "epoch": 1.524602941364374, "grad_norm": 0.6132214665412903, "learning_rate": 1.3336763229805593e-05, "loss": 0.8061, "step": 238640 }, { "epoch": 1.5246668285141127, "grad_norm": 0.7776349186897278, "learning_rate": 1.3333351673358524e-05, "loss": 0.8665, "step": 238650 }, { "epoch": 1.5247307156638512, "grad_norm": 1.9750133752822876, "learning_rate": 1.332994048617634e-05, "loss": 0.7649, "step": 238660 }, { "epoch": 1.5247946028135901, "grad_norm": 0.7864769697189331, "learning_rate": 1.3326529668293364e-05, "loss": 0.8988, "step": 238670 }, { "epoch": 1.5248584899633286, "grad_norm": 1.1065561771392822, "learning_rate": 1.3323119219743974e-05, "loss": 0.936, "step": 238680 }, { "epoch": 1.5249223771130676, "grad_norm": 0.6974082589149475, "learning_rate": 1.3319709140562492e-05, "loss": 0.8567, "step": 238690 }, { "epoch": 1.524986264262806, "grad_norm": 0.8889352679252625, "learning_rate": 1.331629943078328e-05, "loss": 0.9785, "step": 238700 }, { "epoch": 1.525050151412545, "grad_norm": 1.8425053358078003, "learning_rate": 1.331289009044066e-05, "loss": 0.8213, "step": 238710 }, { "epoch": 1.5251140385622834, "grad_norm": 0.8023721575737, "learning_rate": 1.330948111956899e-05, "loss": 0.9311, "step": 238720 }, { "epoch": 1.5251779257120224, "grad_norm": 0.7577361464500427, "learning_rate": 1.3306072518202573e-05, "loss": 0.9213, "step": 238730 }, { "epoch": 1.5252418128617609, "grad_norm": 0.9880037307739258, "learning_rate": 1.330266428637576e-05, "loss": 1.0437, "step": 238740 }, { "epoch": 1.5253057000114998, "grad_norm": 0.6256246566772461, "learning_rate": 1.3299256424122857e-05, "loss": 0.6359, "step": 238750 }, { "epoch": 1.5253695871612383, "grad_norm": 0.5575620532035828, "learning_rate": 1.3295848931478206e-05, "loss": 0.7845, "step": 238760 }, { "epoch": 1.5254334743109772, "grad_norm": 0.7465316653251648, "learning_rate": 1.3292441808476091e-05, "loss": 0.8861, "step": 238770 }, { "epoch": 1.5254973614607157, "grad_norm": 2.4132463932037354, "learning_rate": 1.328903505515086e-05, "loss": 0.7827, "step": 238780 }, { "epoch": 1.5255612486104546, "grad_norm": 0.8057778477668762, "learning_rate": 1.3285628671536793e-05, "loss": 0.7437, "step": 238790 }, { "epoch": 1.525625135760193, "grad_norm": 0.8477099537849426, "learning_rate": 1.328222265766822e-05, "loss": 1.0969, "step": 238800 }, { "epoch": 1.525689022909932, "grad_norm": 1.326920986175537, "learning_rate": 1.3278817013579414e-05, "loss": 0.7555, "step": 238810 }, { "epoch": 1.5257529100596705, "grad_norm": 0.48724564909935, "learning_rate": 1.327541173930471e-05, "loss": 0.8383, "step": 238820 }, { "epoch": 1.5258167972094094, "grad_norm": 0.7151726484298706, "learning_rate": 1.3272006834878376e-05, "loss": 0.9653, "step": 238830 }, { "epoch": 1.525880684359148, "grad_norm": 0.9274958372116089, "learning_rate": 1.3268602300334692e-05, "loss": 0.7029, "step": 238840 }, { "epoch": 1.5259445715088868, "grad_norm": 0.8506492376327515, "learning_rate": 1.3265198135707979e-05, "loss": 0.9839, "step": 238850 }, { "epoch": 1.5260084586586253, "grad_norm": 1.3150792121887207, "learning_rate": 1.3261794341032486e-05, "loss": 0.8877, "step": 238860 }, { "epoch": 1.5260723458083643, "grad_norm": 0.9653270244598389, "learning_rate": 1.3258390916342517e-05, "loss": 0.977, "step": 238870 }, { "epoch": 1.5261362329581027, "grad_norm": 1.3301193714141846, "learning_rate": 1.3254987861672325e-05, "loss": 1.0401, "step": 238880 }, { "epoch": 1.5262001201078417, "grad_norm": 0.6298654675483704, "learning_rate": 1.3251585177056208e-05, "loss": 0.7647, "step": 238890 }, { "epoch": 1.5262640072575802, "grad_norm": 0.9454180598258972, "learning_rate": 1.3248182862528407e-05, "loss": 0.9728, "step": 238900 }, { "epoch": 1.5263278944073189, "grad_norm": 0.7858530879020691, "learning_rate": 1.3244780918123217e-05, "loss": 0.8936, "step": 238910 }, { "epoch": 1.5263917815570576, "grad_norm": 0.6082397699356079, "learning_rate": 1.3241379343874866e-05, "loss": 0.7736, "step": 238920 }, { "epoch": 1.5264556687067963, "grad_norm": 1.0330744981765747, "learning_rate": 1.323797813981762e-05, "loss": 0.8991, "step": 238930 }, { "epoch": 1.526519555856535, "grad_norm": 1.9987537860870361, "learning_rate": 1.3234577305985752e-05, "loss": 0.8497, "step": 238940 }, { "epoch": 1.5265834430062737, "grad_norm": 1.100146770477295, "learning_rate": 1.323117684241349e-05, "loss": 0.7261, "step": 238950 }, { "epoch": 1.5266473301560124, "grad_norm": 2.2519874572753906, "learning_rate": 1.3227776749135095e-05, "loss": 0.7635, "step": 238960 }, { "epoch": 1.526711217305751, "grad_norm": 0.9835031628608704, "learning_rate": 1.3224377026184787e-05, "loss": 0.9724, "step": 238970 }, { "epoch": 1.5267751044554898, "grad_norm": 0.9068618416786194, "learning_rate": 1.3220977673596835e-05, "loss": 0.8858, "step": 238980 }, { "epoch": 1.5268389916052285, "grad_norm": 1.3341480493545532, "learning_rate": 1.3217578691405441e-05, "loss": 0.993, "step": 238990 }, { "epoch": 1.5269028787549672, "grad_norm": 0.763285219669342, "learning_rate": 1.3214180079644866e-05, "loss": 0.74, "step": 239000 }, { "epoch": 1.526966765904706, "grad_norm": 1.4933006763458252, "learning_rate": 1.3210781838349306e-05, "loss": 1.0131, "step": 239010 }, { "epoch": 1.5270306530544446, "grad_norm": 0.6231247782707214, "learning_rate": 1.3207383967553017e-05, "loss": 0.9825, "step": 239020 }, { "epoch": 1.5270945402041833, "grad_norm": 0.9345896244049072, "learning_rate": 1.320398646729018e-05, "loss": 1.005, "step": 239030 }, { "epoch": 1.527158427353922, "grad_norm": 1.0000743865966797, "learning_rate": 1.3200589337595054e-05, "loss": 0.9002, "step": 239040 }, { "epoch": 1.5272223145036607, "grad_norm": 0.8784435391426086, "learning_rate": 1.319719257850181e-05, "loss": 0.8738, "step": 239050 }, { "epoch": 1.5272862016533995, "grad_norm": 1.6685031652450562, "learning_rate": 1.3193796190044694e-05, "loss": 0.966, "step": 239060 }, { "epoch": 1.5273500888031382, "grad_norm": 0.9063624739646912, "learning_rate": 1.3190400172257877e-05, "loss": 1.0455, "step": 239070 }, { "epoch": 1.5274139759528769, "grad_norm": 0.9266459345817566, "learning_rate": 1.318700452517559e-05, "loss": 0.6222, "step": 239080 }, { "epoch": 1.5274778631026156, "grad_norm": 0.9492422938346863, "learning_rate": 1.3183609248832001e-05, "loss": 0.6274, "step": 239090 }, { "epoch": 1.5275417502523543, "grad_norm": 1.0158274173736572, "learning_rate": 1.3180214343261333e-05, "loss": 0.8135, "step": 239100 }, { "epoch": 1.527605637402093, "grad_norm": 1.1831732988357544, "learning_rate": 1.3176819808497742e-05, "loss": 1.0633, "step": 239110 }, { "epoch": 1.5276695245518317, "grad_norm": 1.1651058197021484, "learning_rate": 1.3173425644575449e-05, "loss": 1.1724, "step": 239120 }, { "epoch": 1.5277334117015704, "grad_norm": 0.7208083868026733, "learning_rate": 1.3170031851528602e-05, "loss": 0.7644, "step": 239130 }, { "epoch": 1.527797298851309, "grad_norm": 0.7914683818817139, "learning_rate": 1.3166638429391398e-05, "loss": 0.7051, "step": 239140 }, { "epoch": 1.5278611860010476, "grad_norm": 0.7424027919769287, "learning_rate": 1.3163245378198025e-05, "loss": 0.862, "step": 239150 }, { "epoch": 1.5279250731507865, "grad_norm": 1.3931986093521118, "learning_rate": 1.3159852697982627e-05, "loss": 0.6573, "step": 239160 }, { "epoch": 1.527988960300525, "grad_norm": 0.9321253895759583, "learning_rate": 1.3156460388779396e-05, "loss": 1.0719, "step": 239170 }, { "epoch": 1.528052847450264, "grad_norm": 0.8587426543235779, "learning_rate": 1.3153068450622468e-05, "loss": 0.7854, "step": 239180 }, { "epoch": 1.5281167346000024, "grad_norm": 0.9214495420455933, "learning_rate": 1.3149676883546035e-05, "loss": 0.9369, "step": 239190 }, { "epoch": 1.5281806217497413, "grad_norm": 1.1665353775024414, "learning_rate": 1.314628568758422e-05, "loss": 0.8909, "step": 239200 }, { "epoch": 1.5282445088994798, "grad_norm": 1.0801894664764404, "learning_rate": 1.3142894862771205e-05, "loss": 0.8334, "step": 239210 }, { "epoch": 1.5283083960492188, "grad_norm": 0.8318184018135071, "learning_rate": 1.3139504409141113e-05, "loss": 0.8276, "step": 239220 }, { "epoch": 1.5283722831989572, "grad_norm": 0.9736336469650269, "learning_rate": 1.313611432672811e-05, "loss": 0.7969, "step": 239230 }, { "epoch": 1.5284361703486962, "grad_norm": 0.8926606178283691, "learning_rate": 1.3132724615566317e-05, "loss": 0.7547, "step": 239240 }, { "epoch": 1.5285000574984346, "grad_norm": 0.926861584186554, "learning_rate": 1.3129335275689897e-05, "loss": 0.9206, "step": 239250 }, { "epoch": 1.5285639446481736, "grad_norm": 0.6626387238502502, "learning_rate": 1.3125946307132947e-05, "loss": 0.747, "step": 239260 }, { "epoch": 1.528627831797912, "grad_norm": 2.2653844356536865, "learning_rate": 1.3122557709929639e-05, "loss": 0.9802, "step": 239270 }, { "epoch": 1.528691718947651, "grad_norm": 0.8143163323402405, "learning_rate": 1.3119169484114063e-05, "loss": 0.896, "step": 239280 }, { "epoch": 1.5287556060973895, "grad_norm": 1.0263252258300781, "learning_rate": 1.311578162972037e-05, "loss": 0.897, "step": 239290 }, { "epoch": 1.5288194932471284, "grad_norm": 1.1428170204162598, "learning_rate": 1.3112394146782653e-05, "loss": 0.7813, "step": 239300 }, { "epoch": 1.5288833803968669, "grad_norm": 1.001025915145874, "learning_rate": 1.3109007035335052e-05, "loss": 0.7202, "step": 239310 }, { "epoch": 1.5289472675466058, "grad_norm": 1.0566990375518799, "learning_rate": 1.310562029541167e-05, "loss": 0.9607, "step": 239320 }, { "epoch": 1.5290111546963443, "grad_norm": 1.396536111831665, "learning_rate": 1.3102233927046586e-05, "loss": 0.717, "step": 239330 }, { "epoch": 1.5290750418460832, "grad_norm": 0.9807648062705994, "learning_rate": 1.3098847930273949e-05, "loss": 0.8423, "step": 239340 }, { "epoch": 1.5291389289958217, "grad_norm": 1.382917046546936, "learning_rate": 1.309546230512782e-05, "loss": 0.7852, "step": 239350 }, { "epoch": 1.5292028161455606, "grad_norm": 1.0471018552780151, "learning_rate": 1.3092077051642332e-05, "loss": 0.8256, "step": 239360 }, { "epoch": 1.5292667032952991, "grad_norm": 1.5218364000320435, "learning_rate": 1.3088692169851535e-05, "loss": 0.8289, "step": 239370 }, { "epoch": 1.5293305904450378, "grad_norm": 0.7558670043945312, "learning_rate": 1.3085307659789559e-05, "loss": 0.8752, "step": 239380 }, { "epoch": 1.5293944775947765, "grad_norm": 0.6498563289642334, "learning_rate": 1.3081923521490463e-05, "loss": 0.9807, "step": 239390 }, { "epoch": 1.5294583647445152, "grad_norm": 0.7214164733886719, "learning_rate": 1.3078539754988339e-05, "loss": 1.0477, "step": 239400 }, { "epoch": 1.529522251894254, "grad_norm": 0.9879746437072754, "learning_rate": 1.3075156360317253e-05, "loss": 0.8445, "step": 239410 }, { "epoch": 1.5295861390439927, "grad_norm": 0.7819598913192749, "learning_rate": 1.30717733375113e-05, "loss": 0.9972, "step": 239420 }, { "epoch": 1.5296500261937314, "grad_norm": 2.294267177581787, "learning_rate": 1.3068390686604525e-05, "loss": 1.0819, "step": 239430 }, { "epoch": 1.52971391334347, "grad_norm": 1.4084625244140625, "learning_rate": 1.306500840763102e-05, "loss": 0.9889, "step": 239440 }, { "epoch": 1.5297778004932088, "grad_norm": 0.8941295146942139, "learning_rate": 1.3061626500624819e-05, "loss": 0.6393, "step": 239450 }, { "epoch": 1.5298416876429475, "grad_norm": 0.7554815411567688, "learning_rate": 1.3058244965619993e-05, "loss": 0.7597, "step": 239460 }, { "epoch": 1.5299055747926862, "grad_norm": 1.0731430053710938, "learning_rate": 1.3054863802650613e-05, "loss": 0.9949, "step": 239470 }, { "epoch": 1.529969461942425, "grad_norm": 0.7292632460594177, "learning_rate": 1.3051483011750704e-05, "loss": 0.8798, "step": 239480 }, { "epoch": 1.5300333490921636, "grad_norm": 0.9323152899742126, "learning_rate": 1.3048102592954342e-05, "loss": 0.6778, "step": 239490 }, { "epoch": 1.5300972362419023, "grad_norm": 0.7616154551506042, "learning_rate": 1.304472254629554e-05, "loss": 0.8363, "step": 239500 }, { "epoch": 1.530161123391641, "grad_norm": 0.960150420665741, "learning_rate": 1.3041342871808366e-05, "loss": 0.7406, "step": 239510 }, { "epoch": 1.5302250105413797, "grad_norm": 1.2647629976272583, "learning_rate": 1.3037963569526829e-05, "loss": 0.9212, "step": 239520 }, { "epoch": 1.5302888976911184, "grad_norm": 0.916258692741394, "learning_rate": 1.3034584639484986e-05, "loss": 0.7491, "step": 239530 }, { "epoch": 1.5303527848408571, "grad_norm": 2.347499132156372, "learning_rate": 1.3031206081716846e-05, "loss": 0.9045, "step": 239540 }, { "epoch": 1.5304166719905958, "grad_norm": 1.465839147567749, "learning_rate": 1.3027827896256456e-05, "loss": 0.6732, "step": 239550 }, { "epoch": 1.5304805591403345, "grad_norm": 0.8912522196769714, "learning_rate": 1.3024450083137812e-05, "loss": 0.8075, "step": 239560 }, { "epoch": 1.5305444462900732, "grad_norm": 0.9872187376022339, "learning_rate": 1.302107264239496e-05, "loss": 0.8532, "step": 239570 }, { "epoch": 1.530608333439812, "grad_norm": 0.9597538709640503, "learning_rate": 1.3017695574061878e-05, "loss": 0.7483, "step": 239580 }, { "epoch": 1.5306722205895507, "grad_norm": 0.959831953048706, "learning_rate": 1.3014318878172615e-05, "loss": 0.9163, "step": 239590 }, { "epoch": 1.5307361077392894, "grad_norm": 1.2454932928085327, "learning_rate": 1.3010942554761141e-05, "loss": 0.9478, "step": 239600 }, { "epoch": 1.530799994889028, "grad_norm": 1.333516001701355, "learning_rate": 1.3007566603861493e-05, "loss": 1.1341, "step": 239610 }, { "epoch": 1.5308638820387668, "grad_norm": 2.669330358505249, "learning_rate": 1.3004191025507628e-05, "loss": 0.8008, "step": 239620 }, { "epoch": 1.5309277691885055, "grad_norm": 1.3271132707595825, "learning_rate": 1.3000815819733592e-05, "loss": 0.8742, "step": 239630 }, { "epoch": 1.530991656338244, "grad_norm": 0.5166794657707214, "learning_rate": 1.2997440986573327e-05, "loss": 0.7715, "step": 239640 }, { "epoch": 1.531055543487983, "grad_norm": 1.243624210357666, "learning_rate": 1.299406652606086e-05, "loss": 1.0447, "step": 239650 }, { "epoch": 1.5311194306377214, "grad_norm": 0.8429069519042969, "learning_rate": 1.2990692438230151e-05, "loss": 0.822, "step": 239660 }, { "epoch": 1.5311833177874603, "grad_norm": 0.8280727863311768, "learning_rate": 1.2987318723115177e-05, "loss": 0.7127, "step": 239670 }, { "epoch": 1.5312472049371988, "grad_norm": 0.8618836402893066, "learning_rate": 1.298394538074994e-05, "loss": 0.8952, "step": 239680 }, { "epoch": 1.5313110920869377, "grad_norm": 0.7919678092002869, "learning_rate": 1.2980572411168384e-05, "loss": 0.9811, "step": 239690 }, { "epoch": 1.5313749792366762, "grad_norm": 0.9257929921150208, "learning_rate": 1.2977199814404505e-05, "loss": 0.9175, "step": 239700 }, { "epoch": 1.5314388663864151, "grad_norm": 1.1609474420547485, "learning_rate": 1.2973827590492238e-05, "loss": 0.7815, "step": 239710 }, { "epoch": 1.5315027535361536, "grad_norm": 1.3709371089935303, "learning_rate": 1.297045573946557e-05, "loss": 0.8933, "step": 239720 }, { "epoch": 1.5315666406858925, "grad_norm": 0.5066688656806946, "learning_rate": 1.2967084261358436e-05, "loss": 0.6719, "step": 239730 }, { "epoch": 1.531630527835631, "grad_norm": 0.6616301536560059, "learning_rate": 1.296371315620482e-05, "loss": 0.8534, "step": 239740 }, { "epoch": 1.53169441498537, "grad_norm": 0.8841010332107544, "learning_rate": 1.2960342424038636e-05, "loss": 0.9464, "step": 239750 }, { "epoch": 1.5317583021351084, "grad_norm": 0.9628351330757141, "learning_rate": 1.2956972064893857e-05, "loss": 0.7827, "step": 239760 }, { "epoch": 1.5318221892848474, "grad_norm": 2.0725762844085693, "learning_rate": 1.2953602078804411e-05, "loss": 0.8328, "step": 239770 }, { "epoch": 1.5318860764345859, "grad_norm": 0.7003540992736816, "learning_rate": 1.2950232465804252e-05, "loss": 0.8365, "step": 239780 }, { "epoch": 1.5319499635843248, "grad_norm": 0.5919496417045593, "learning_rate": 1.2946863225927302e-05, "loss": 0.9907, "step": 239790 }, { "epoch": 1.5320138507340633, "grad_norm": 0.9051730036735535, "learning_rate": 1.294349435920748e-05, "loss": 0.8912, "step": 239800 }, { "epoch": 1.5320777378838022, "grad_norm": 0.8595243692398071, "learning_rate": 1.2940125865678748e-05, "loss": 1.1719, "step": 239810 }, { "epoch": 1.5321416250335407, "grad_norm": 1.0000742673873901, "learning_rate": 1.2936757745374994e-05, "loss": 0.9054, "step": 239820 }, { "epoch": 1.5322055121832796, "grad_norm": 1.2009698152542114, "learning_rate": 1.2933389998330164e-05, "loss": 0.8474, "step": 239830 }, { "epoch": 1.532269399333018, "grad_norm": 0.7443961501121521, "learning_rate": 1.2930022624578153e-05, "loss": 0.9226, "step": 239840 }, { "epoch": 1.532333286482757, "grad_norm": 0.5982275009155273, "learning_rate": 1.29266556241529e-05, "loss": 0.7455, "step": 239850 }, { "epoch": 1.5323971736324955, "grad_norm": 0.5646331906318665, "learning_rate": 1.2923288997088284e-05, "loss": 0.7711, "step": 239860 }, { "epoch": 1.5324610607822342, "grad_norm": 1.0039056539535522, "learning_rate": 1.2919922743418234e-05, "loss": 0.8096, "step": 239870 }, { "epoch": 1.532524947931973, "grad_norm": 1.1351054906845093, "learning_rate": 1.2916556863176627e-05, "loss": 0.6707, "step": 239880 }, { "epoch": 1.5325888350817116, "grad_norm": 1.437688946723938, "learning_rate": 1.2913191356397392e-05, "loss": 0.7691, "step": 239890 }, { "epoch": 1.5326527222314503, "grad_norm": 0.7194865942001343, "learning_rate": 1.2909826223114385e-05, "loss": 0.7505, "step": 239900 }, { "epoch": 1.532716609381189, "grad_norm": 0.7203089594841003, "learning_rate": 1.2906461463361536e-05, "loss": 0.702, "step": 239910 }, { "epoch": 1.5327804965309277, "grad_norm": 0.7741053104400635, "learning_rate": 1.2903097077172693e-05, "loss": 0.973, "step": 239920 }, { "epoch": 1.5328443836806664, "grad_norm": 0.8206771016120911, "learning_rate": 1.2899733064581771e-05, "loss": 0.8626, "step": 239930 }, { "epoch": 1.5329082708304052, "grad_norm": 1.8737119436264038, "learning_rate": 1.2896369425622617e-05, "loss": 0.7082, "step": 239940 }, { "epoch": 1.5329721579801439, "grad_norm": 0.880370020866394, "learning_rate": 1.2893006160329135e-05, "loss": 0.8168, "step": 239950 }, { "epoch": 1.5330360451298826, "grad_norm": 0.7264848947525024, "learning_rate": 1.2889643268735174e-05, "loss": 0.6213, "step": 239960 }, { "epoch": 1.5330999322796213, "grad_norm": 0.7904850244522095, "learning_rate": 1.2886280750874624e-05, "loss": 0.9023, "step": 239970 }, { "epoch": 1.53316381942936, "grad_norm": 1.2321357727050781, "learning_rate": 1.288291860678132e-05, "loss": 0.9196, "step": 239980 }, { "epoch": 1.5332277065790987, "grad_norm": 1.2595999240875244, "learning_rate": 1.2879556836489131e-05, "loss": 0.9804, "step": 239990 }, { "epoch": 1.5332915937288374, "grad_norm": 0.8715834021568298, "learning_rate": 1.2876195440031936e-05, "loss": 0.9739, "step": 240000 }, { "epoch": 1.533355480878576, "grad_norm": 0.9129816293716431, "learning_rate": 1.2872834417443559e-05, "loss": 0.8105, "step": 240010 }, { "epoch": 1.5334193680283148, "grad_norm": 0.6168598532676697, "learning_rate": 1.2869473768757867e-05, "loss": 0.7125, "step": 240020 }, { "epoch": 1.5334832551780535, "grad_norm": 0.7878352403640747, "learning_rate": 1.2866113494008686e-05, "loss": 0.7464, "step": 240030 }, { "epoch": 1.5335471423277922, "grad_norm": 0.7556173205375671, "learning_rate": 1.2862753593229882e-05, "loss": 0.7382, "step": 240040 }, { "epoch": 1.533611029477531, "grad_norm": 1.1012219190597534, "learning_rate": 1.2859394066455265e-05, "loss": 0.8976, "step": 240050 }, { "epoch": 1.5336749166272696, "grad_norm": 1.080224871635437, "learning_rate": 1.2856034913718696e-05, "loss": 0.8698, "step": 240060 }, { "epoch": 1.5337388037770083, "grad_norm": 1.3037000894546509, "learning_rate": 1.285267613505397e-05, "loss": 0.9602, "step": 240070 }, { "epoch": 1.533802690926747, "grad_norm": 0.8306534886360168, "learning_rate": 1.2849317730494948e-05, "loss": 0.8438, "step": 240080 }, { "epoch": 1.5338665780764857, "grad_norm": 1.6114468574523926, "learning_rate": 1.2845959700075421e-05, "loss": 0.7366, "step": 240090 }, { "epoch": 1.5339304652262244, "grad_norm": 0.9883949160575867, "learning_rate": 1.2842602043829239e-05, "loss": 0.8261, "step": 240100 }, { "epoch": 1.533994352375963, "grad_norm": 1.1887831687927246, "learning_rate": 1.2839244761790187e-05, "loss": 0.9771, "step": 240110 }, { "epoch": 1.5340582395257019, "grad_norm": 0.5754899978637695, "learning_rate": 1.2835887853992102e-05, "loss": 1.1814, "step": 240120 }, { "epoch": 1.5341221266754403, "grad_norm": 1.040809988975525, "learning_rate": 1.2832531320468765e-05, "loss": 0.9311, "step": 240130 }, { "epoch": 1.5341860138251793, "grad_norm": 1.1350606679916382, "learning_rate": 1.2829175161254003e-05, "loss": 0.9305, "step": 240140 }, { "epoch": 1.5342499009749178, "grad_norm": 1.1955920457839966, "learning_rate": 1.2825819376381593e-05, "loss": 0.8709, "step": 240150 }, { "epoch": 1.5343137881246567, "grad_norm": 0.953775942325592, "learning_rate": 1.2822463965885356e-05, "loss": 0.8353, "step": 240160 }, { "epoch": 1.5343776752743952, "grad_norm": 0.9397642612457275, "learning_rate": 1.2819108929799051e-05, "loss": 0.8255, "step": 240170 }, { "epoch": 1.534441562424134, "grad_norm": 1.1109893321990967, "learning_rate": 1.2815754268156505e-05, "loss": 0.767, "step": 240180 }, { "epoch": 1.5345054495738726, "grad_norm": 1.0977774858474731, "learning_rate": 1.2812399980991469e-05, "loss": 0.8658, "step": 240190 }, { "epoch": 1.5345693367236115, "grad_norm": 1.2020318508148193, "learning_rate": 1.2809046068337732e-05, "loss": 0.8189, "step": 240200 }, { "epoch": 1.53463322387335, "grad_norm": 2.4566478729248047, "learning_rate": 1.2805692530229096e-05, "loss": 0.8471, "step": 240210 }, { "epoch": 1.534697111023089, "grad_norm": 0.9017288088798523, "learning_rate": 1.2802339366699295e-05, "loss": 0.8161, "step": 240220 }, { "epoch": 1.5347609981728274, "grad_norm": 0.9971731305122375, "learning_rate": 1.2798986577782135e-05, "loss": 0.9237, "step": 240230 }, { "epoch": 1.5348248853225663, "grad_norm": 0.6848049163818359, "learning_rate": 1.2795634163511345e-05, "loss": 0.7426, "step": 240240 }, { "epoch": 1.5348887724723048, "grad_norm": 0.6405489444732666, "learning_rate": 1.2792282123920719e-05, "loss": 0.7286, "step": 240250 }, { "epoch": 1.5349526596220437, "grad_norm": 0.6404396295547485, "learning_rate": 1.278893045904399e-05, "loss": 0.7565, "step": 240260 }, { "epoch": 1.5350165467717822, "grad_norm": 0.7233564853668213, "learning_rate": 1.278557916891494e-05, "loss": 1.0074, "step": 240270 }, { "epoch": 1.5350804339215212, "grad_norm": 1.2777774333953857, "learning_rate": 1.2782228253567303e-05, "loss": 0.8462, "step": 240280 }, { "epoch": 1.5351443210712596, "grad_norm": 1.4462738037109375, "learning_rate": 1.277887771303482e-05, "loss": 0.8536, "step": 240290 }, { "epoch": 1.5352082082209986, "grad_norm": 1.3389897346496582, "learning_rate": 1.2775527547351229e-05, "loss": 0.7831, "step": 240300 }, { "epoch": 1.535272095370737, "grad_norm": 0.7546529173851013, "learning_rate": 1.2772177756550274e-05, "loss": 0.9307, "step": 240310 }, { "epoch": 1.535335982520476, "grad_norm": 1.994634747505188, "learning_rate": 1.2768828340665717e-05, "loss": 0.7946, "step": 240320 }, { "epoch": 1.5353998696702145, "grad_norm": 1.2724889516830444, "learning_rate": 1.2765479299731254e-05, "loss": 0.7879, "step": 240330 }, { "epoch": 1.5354637568199534, "grad_norm": 0.7900464534759521, "learning_rate": 1.2762130633780634e-05, "loss": 0.8841, "step": 240340 }, { "epoch": 1.5355276439696919, "grad_norm": 3.2594213485717773, "learning_rate": 1.2758782342847564e-05, "loss": 0.9508, "step": 240350 }, { "epoch": 1.5355915311194306, "grad_norm": 0.8626235127449036, "learning_rate": 1.275543442696579e-05, "loss": 0.6073, "step": 240360 }, { "epoch": 1.5356554182691693, "grad_norm": 0.8508633971214294, "learning_rate": 1.2752086886168996e-05, "loss": 0.9052, "step": 240370 }, { "epoch": 1.535719305418908, "grad_norm": 0.630959153175354, "learning_rate": 1.2748739720490926e-05, "loss": 0.6618, "step": 240380 }, { "epoch": 1.5357831925686467, "grad_norm": 0.8615701794624329, "learning_rate": 1.274539292996526e-05, "loss": 0.7737, "step": 240390 }, { "epoch": 1.5358470797183854, "grad_norm": 0.7362240552902222, "learning_rate": 1.2742046514625728e-05, "loss": 0.859, "step": 240400 }, { "epoch": 1.5359109668681241, "grad_norm": 0.824510395526886, "learning_rate": 1.273870047450601e-05, "loss": 0.883, "step": 240410 }, { "epoch": 1.5359748540178628, "grad_norm": 0.8618409037590027, "learning_rate": 1.2735354809639827e-05, "loss": 0.5913, "step": 240420 }, { "epoch": 1.5360387411676015, "grad_norm": 1.2590079307556152, "learning_rate": 1.2732009520060844e-05, "loss": 0.8846, "step": 240430 }, { "epoch": 1.5361026283173402, "grad_norm": 0.8605117797851562, "learning_rate": 1.272866460580278e-05, "loss": 0.9529, "step": 240440 }, { "epoch": 1.536166515467079, "grad_norm": 1.3561952114105225, "learning_rate": 1.2725320066899294e-05, "loss": 0.987, "step": 240450 }, { "epoch": 1.5362304026168176, "grad_norm": 0.9079533815383911, "learning_rate": 1.2721975903384097e-05, "loss": 0.8781, "step": 240460 }, { "epoch": 1.5362942897665564, "grad_norm": 1.1836191415786743, "learning_rate": 1.271863211529084e-05, "loss": 0.9204, "step": 240470 }, { "epoch": 1.536358176916295, "grad_norm": 0.9336318373680115, "learning_rate": 1.2715288702653228e-05, "loss": 0.8499, "step": 240480 }, { "epoch": 1.5364220640660338, "grad_norm": 0.957920253276825, "learning_rate": 1.2711945665504894e-05, "loss": 0.9255, "step": 240490 }, { "epoch": 1.5364859512157725, "grad_norm": 0.9811254739761353, "learning_rate": 1.2708603003879544e-05, "loss": 0.7817, "step": 240500 }, { "epoch": 1.5365498383655112, "grad_norm": 1.009942889213562, "learning_rate": 1.2705260717810808e-05, "loss": 0.8197, "step": 240510 }, { "epoch": 1.5366137255152499, "grad_norm": 1.3132307529449463, "learning_rate": 1.2701918807332358e-05, "loss": 0.8946, "step": 240520 }, { "epoch": 1.5366776126649886, "grad_norm": 0.5535053610801697, "learning_rate": 1.2698577272477868e-05, "loss": 1.0185, "step": 240530 }, { "epoch": 1.5367414998147273, "grad_norm": 0.8424240946769714, "learning_rate": 1.2695236113280967e-05, "loss": 0.8985, "step": 240540 }, { "epoch": 1.536805386964466, "grad_norm": 0.9007782936096191, "learning_rate": 1.2691895329775321e-05, "loss": 0.799, "step": 240550 }, { "epoch": 1.5368692741142047, "grad_norm": 2.0551929473876953, "learning_rate": 1.268855492199455e-05, "loss": 0.8517, "step": 240560 }, { "epoch": 1.5369331612639434, "grad_norm": 0.8320748805999756, "learning_rate": 1.268521488997233e-05, "loss": 0.8527, "step": 240570 }, { "epoch": 1.5369970484136821, "grad_norm": 0.9764295220375061, "learning_rate": 1.2681875233742258e-05, "loss": 0.858, "step": 240580 }, { "epoch": 1.5370609355634208, "grad_norm": 0.9124672412872314, "learning_rate": 1.2678535953338e-05, "loss": 1.1554, "step": 240590 }, { "epoch": 1.5371248227131593, "grad_norm": 0.9281206130981445, "learning_rate": 1.2675197048793164e-05, "loss": 0.9436, "step": 240600 }, { "epoch": 1.5371887098628982, "grad_norm": 0.7381476759910583, "learning_rate": 1.2671858520141394e-05, "loss": 0.9653, "step": 240610 }, { "epoch": 1.5372525970126367, "grad_norm": 0.7912752628326416, "learning_rate": 1.2668520367416286e-05, "loss": 1.0923, "step": 240620 }, { "epoch": 1.5373164841623757, "grad_norm": 1.3308603763580322, "learning_rate": 1.2665182590651498e-05, "loss": 0.9441, "step": 240630 }, { "epoch": 1.5373803713121141, "grad_norm": 1.331600546836853, "learning_rate": 1.26618451898806e-05, "loss": 0.7931, "step": 240640 }, { "epoch": 1.537444258461853, "grad_norm": 0.9687751531600952, "learning_rate": 1.2658508165137234e-05, "loss": 0.8497, "step": 240650 }, { "epoch": 1.5375081456115915, "grad_norm": 0.7466058731079102, "learning_rate": 1.2655505164399511e-05, "loss": 0.7598, "step": 240660 }, { "epoch": 1.5375720327613305, "grad_norm": 1.5060503482818604, "learning_rate": 1.2652168854201007e-05, "loss": 1.0108, "step": 240670 }, { "epoch": 1.537635919911069, "grad_norm": 1.3408730030059814, "learning_rate": 1.264883292012749e-05, "loss": 0.8109, "step": 240680 }, { "epoch": 1.5376998070608079, "grad_norm": 1.010647177696228, "learning_rate": 1.2645497362212521e-05, "loss": 1.1945, "step": 240690 }, { "epoch": 1.5377636942105464, "grad_norm": 0.8791874647140503, "learning_rate": 1.2642162180489724e-05, "loss": 0.7551, "step": 240700 }, { "epoch": 1.5378275813602853, "grad_norm": 1.3113328218460083, "learning_rate": 1.2638827374992662e-05, "loss": 0.885, "step": 240710 }, { "epoch": 1.5378914685100238, "grad_norm": 0.8393133282661438, "learning_rate": 1.263549294575494e-05, "loss": 0.9787, "step": 240720 }, { "epoch": 1.5379553556597627, "grad_norm": 0.7710729241371155, "learning_rate": 1.263215889281012e-05, "loss": 1.0568, "step": 240730 }, { "epoch": 1.5380192428095012, "grad_norm": 0.9903393387794495, "learning_rate": 1.2628825216191802e-05, "loss": 0.8698, "step": 240740 }, { "epoch": 1.5380831299592401, "grad_norm": 0.9258543252944946, "learning_rate": 1.2625491915933524e-05, "loss": 0.89, "step": 240750 }, { "epoch": 1.5381470171089786, "grad_norm": 0.8964667916297913, "learning_rate": 1.26221589920689e-05, "loss": 0.8349, "step": 240760 }, { "epoch": 1.5382109042587175, "grad_norm": 2.6720521450042725, "learning_rate": 1.2618826444631448e-05, "loss": 0.9522, "step": 240770 }, { "epoch": 1.538274791408456, "grad_norm": 1.3586536645889282, "learning_rate": 1.2615494273654771e-05, "loss": 0.921, "step": 240780 }, { "epoch": 1.538338678558195, "grad_norm": 1.9273499250411987, "learning_rate": 1.2612162479172395e-05, "loss": 0.7124, "step": 240790 }, { "epoch": 1.5384025657079334, "grad_norm": 2.5090017318725586, "learning_rate": 1.2608831061217902e-05, "loss": 0.834, "step": 240800 }, { "epoch": 1.5384664528576724, "grad_norm": 1.410423994064331, "learning_rate": 1.2605500019824811e-05, "loss": 0.775, "step": 240810 }, { "epoch": 1.5385303400074108, "grad_norm": 1.0567747354507446, "learning_rate": 1.2602169355026705e-05, "loss": 0.9427, "step": 240820 }, { "epoch": 1.5385942271571498, "grad_norm": 0.9426378607749939, "learning_rate": 1.2598839066857094e-05, "loss": 0.7259, "step": 240830 }, { "epoch": 1.5386581143068883, "grad_norm": 0.8864075541496277, "learning_rate": 1.2595509155349522e-05, "loss": 0.8738, "step": 240840 }, { "epoch": 1.538722001456627, "grad_norm": 2.0888774394989014, "learning_rate": 1.2592179620537554e-05, "loss": 0.7952, "step": 240850 }, { "epoch": 1.5387858886063657, "grad_norm": 0.895809531211853, "learning_rate": 1.2588850462454682e-05, "loss": 0.723, "step": 240860 }, { "epoch": 1.5388497757561044, "grad_norm": 0.8410398364067078, "learning_rate": 1.2585521681134466e-05, "loss": 0.9484, "step": 240870 }, { "epoch": 1.538913662905843, "grad_norm": 1.5574727058410645, "learning_rate": 1.2582193276610398e-05, "loss": 1.06, "step": 240880 }, { "epoch": 1.5389775500555818, "grad_norm": 1.247363805770874, "learning_rate": 1.2578865248916039e-05, "loss": 0.9964, "step": 240890 }, { "epoch": 1.5390414372053205, "grad_norm": 0.6102619767189026, "learning_rate": 1.2575537598084853e-05, "loss": 0.8924, "step": 240900 }, { "epoch": 1.5391053243550592, "grad_norm": 0.8611202239990234, "learning_rate": 1.2572210324150419e-05, "loss": 0.9274, "step": 240910 }, { "epoch": 1.539169211504798, "grad_norm": 4.812923431396484, "learning_rate": 1.2568883427146172e-05, "loss": 0.8025, "step": 240920 }, { "epoch": 1.5392330986545366, "grad_norm": 1.0879570245742798, "learning_rate": 1.2565556907105669e-05, "loss": 0.9616, "step": 240930 }, { "epoch": 1.5392969858042753, "grad_norm": 1.8127998113632202, "learning_rate": 1.2562230764062377e-05, "loss": 0.8644, "step": 240940 }, { "epoch": 1.539360872954014, "grad_norm": 0.8016753792762756, "learning_rate": 1.2558904998049808e-05, "loss": 0.9531, "step": 240950 }, { "epoch": 1.5394247601037527, "grad_norm": 0.5597435235977173, "learning_rate": 1.2555579609101475e-05, "loss": 0.6647, "step": 240960 }, { "epoch": 1.5394886472534914, "grad_norm": 1.3119679689407349, "learning_rate": 1.2552254597250835e-05, "loss": 0.8009, "step": 240970 }, { "epoch": 1.5395525344032301, "grad_norm": 1.1380845308303833, "learning_rate": 1.2548929962531397e-05, "loss": 0.8643, "step": 240980 }, { "epoch": 1.5396164215529689, "grad_norm": 0.8482895493507385, "learning_rate": 1.2545605704976626e-05, "loss": 0.8643, "step": 240990 }, { "epoch": 1.5396803087027076, "grad_norm": 0.9055123925209045, "learning_rate": 1.2542281824620017e-05, "loss": 0.6563, "step": 241000 }, { "epoch": 1.5397441958524463, "grad_norm": 1.2197997570037842, "learning_rate": 1.2538958321495026e-05, "loss": 0.9265, "step": 241010 }, { "epoch": 1.539808083002185, "grad_norm": 0.9531196355819702, "learning_rate": 1.2535635195635147e-05, "loss": 0.9722, "step": 241020 }, { "epoch": 1.5398719701519237, "grad_norm": 1.799153447151184, "learning_rate": 1.2532312447073818e-05, "loss": 0.7653, "step": 241030 }, { "epoch": 1.5399358573016624, "grad_norm": 0.8743974566459656, "learning_rate": 1.2528990075844527e-05, "loss": 0.6515, "step": 241040 }, { "epoch": 1.539999744451401, "grad_norm": 1.757636547088623, "learning_rate": 1.2525668081980712e-05, "loss": 0.7872, "step": 241050 }, { "epoch": 1.5400636316011398, "grad_norm": 1.1052943468093872, "learning_rate": 1.2522346465515855e-05, "loss": 0.9347, "step": 241060 }, { "epoch": 1.5401275187508785, "grad_norm": 0.6515098810195923, "learning_rate": 1.2519025226483378e-05, "loss": 0.8837, "step": 241070 }, { "epoch": 1.5401914059006172, "grad_norm": 0.7127335667610168, "learning_rate": 1.2515704364916758e-05, "loss": 0.8703, "step": 241080 }, { "epoch": 1.5402552930503557, "grad_norm": 0.633703887462616, "learning_rate": 1.2512383880849404e-05, "loss": 0.7829, "step": 241090 }, { "epoch": 1.5403191802000946, "grad_norm": 1.5734481811523438, "learning_rate": 1.2509063774314795e-05, "loss": 0.9246, "step": 241100 }, { "epoch": 1.540383067349833, "grad_norm": 1.2697124481201172, "learning_rate": 1.2505744045346329e-05, "loss": 0.7494, "step": 241110 }, { "epoch": 1.540446954499572, "grad_norm": 0.903329610824585, "learning_rate": 1.2502424693977476e-05, "loss": 0.8019, "step": 241120 }, { "epoch": 1.5405108416493105, "grad_norm": 0.7796242237091064, "learning_rate": 1.2499105720241628e-05, "loss": 0.9078, "step": 241130 }, { "epoch": 1.5405747287990494, "grad_norm": 0.9284679293632507, "learning_rate": 1.2495787124172248e-05, "loss": 0.6733, "step": 241140 }, { "epoch": 1.540638615948788, "grad_norm": 0.6304734945297241, "learning_rate": 1.2492468905802717e-05, "loss": 0.8974, "step": 241150 }, { "epoch": 1.5407025030985269, "grad_norm": 0.8838341236114502, "learning_rate": 1.2489151065166476e-05, "loss": 0.8293, "step": 241160 }, { "epoch": 1.5407663902482653, "grad_norm": 1.1389199495315552, "learning_rate": 1.2485833602296953e-05, "loss": 0.7646, "step": 241170 }, { "epoch": 1.5408302773980043, "grad_norm": 0.9645891189575195, "learning_rate": 1.2482516517227522e-05, "loss": 0.7372, "step": 241180 }, { "epoch": 1.5408941645477428, "grad_norm": 1.2675881385803223, "learning_rate": 1.247919980999162e-05, "loss": 0.8884, "step": 241190 }, { "epoch": 1.5409580516974817, "grad_norm": 1.097831130027771, "learning_rate": 1.2475883480622624e-05, "loss": 1.1137, "step": 241200 }, { "epoch": 1.5410219388472202, "grad_norm": 0.7022253274917603, "learning_rate": 1.2472567529153955e-05, "loss": 1.0512, "step": 241210 }, { "epoch": 1.541085825996959, "grad_norm": 1.3515077829360962, "learning_rate": 1.2469251955618988e-05, "loss": 0.9112, "step": 241220 }, { "epoch": 1.5411497131466976, "grad_norm": 0.9095216989517212, "learning_rate": 1.2465936760051133e-05, "loss": 0.8354, "step": 241230 }, { "epoch": 1.5412136002964365, "grad_norm": 0.9183611869812012, "learning_rate": 1.2462621942483749e-05, "loss": 0.9475, "step": 241240 }, { "epoch": 1.541277487446175, "grad_norm": 1.1954056024551392, "learning_rate": 1.2459307502950256e-05, "loss": 1.0327, "step": 241250 }, { "epoch": 1.541341374595914, "grad_norm": 1.835919737815857, "learning_rate": 1.2455993441483999e-05, "loss": 0.7387, "step": 241260 }, { "epoch": 1.5414052617456524, "grad_norm": 0.7012445330619812, "learning_rate": 1.245267975811838e-05, "loss": 0.7472, "step": 241270 }, { "epoch": 1.5414691488953913, "grad_norm": 0.7782658338546753, "learning_rate": 1.2449366452886752e-05, "loss": 0.93, "step": 241280 }, { "epoch": 1.5415330360451298, "grad_norm": 1.1713647842407227, "learning_rate": 1.2446053525822498e-05, "loss": 0.8349, "step": 241290 }, { "epoch": 1.5415969231948687, "grad_norm": 0.959614634513855, "learning_rate": 1.2442740976958955e-05, "loss": 0.9754, "step": 241300 }, { "epoch": 1.5416608103446072, "grad_norm": 0.936188280582428, "learning_rate": 1.2439428806329522e-05, "loss": 0.8709, "step": 241310 }, { "epoch": 1.5417246974943462, "grad_norm": 0.8712064623832703, "learning_rate": 1.2436117013967525e-05, "loss": 0.8923, "step": 241320 }, { "epoch": 1.5417885846440846, "grad_norm": 0.8634532690048218, "learning_rate": 1.2432805599906332e-05, "loss": 0.8087, "step": 241330 }, { "epoch": 1.5418524717938233, "grad_norm": 1.1293830871582031, "learning_rate": 1.2429494564179278e-05, "loss": 0.7126, "step": 241340 }, { "epoch": 1.541916358943562, "grad_norm": 1.6642500162124634, "learning_rate": 1.2426183906819733e-05, "loss": 1.0368, "step": 241350 }, { "epoch": 1.5419802460933008, "grad_norm": 0.9231435060501099, "learning_rate": 1.2422873627861004e-05, "loss": 0.7463, "step": 241360 }, { "epoch": 1.5420441332430395, "grad_norm": 1.0752265453338623, "learning_rate": 1.2419563727336447e-05, "loss": 0.8077, "step": 241370 }, { "epoch": 1.5421080203927782, "grad_norm": 1.3547260761260986, "learning_rate": 1.241625420527941e-05, "loss": 0.9673, "step": 241380 }, { "epoch": 1.5421719075425169, "grad_norm": 1.10608971118927, "learning_rate": 1.2412945061723192e-05, "loss": 0.6985, "step": 241390 }, { "epoch": 1.5422357946922556, "grad_norm": 0.6865934729576111, "learning_rate": 1.2409636296701166e-05, "loss": 0.7879, "step": 241400 }, { "epoch": 1.5422996818419943, "grad_norm": 0.7840709090232849, "learning_rate": 1.2406327910246595e-05, "loss": 0.7392, "step": 241410 }, { "epoch": 1.542363568991733, "grad_norm": 0.904344379901886, "learning_rate": 1.2403019902392837e-05, "loss": 0.915, "step": 241420 }, { "epoch": 1.5424274561414717, "grad_norm": 1.0914677381515503, "learning_rate": 1.2399712273173181e-05, "loss": 0.7752, "step": 241430 }, { "epoch": 1.5424913432912104, "grad_norm": 1.2973593473434448, "learning_rate": 1.2396405022620966e-05, "loss": 0.8754, "step": 241440 }, { "epoch": 1.542555230440949, "grad_norm": 0.7872340679168701, "learning_rate": 1.2393098150769467e-05, "loss": 0.8355, "step": 241450 }, { "epoch": 1.5426191175906878, "grad_norm": 0.7979745864868164, "learning_rate": 1.2389791657652023e-05, "loss": 0.9445, "step": 241460 }, { "epoch": 1.5426830047404265, "grad_norm": 1.740941047668457, "learning_rate": 1.2386485543301896e-05, "loss": 0.9143, "step": 241470 }, { "epoch": 1.5427468918901652, "grad_norm": 0.9509738683700562, "learning_rate": 1.2383179807752399e-05, "loss": 0.678, "step": 241480 }, { "epoch": 1.542810779039904, "grad_norm": 0.8133483529090881, "learning_rate": 1.2379874451036844e-05, "loss": 0.9289, "step": 241490 }, { "epoch": 1.5428746661896426, "grad_norm": 0.7095542550086975, "learning_rate": 1.2376569473188483e-05, "loss": 0.776, "step": 241500 }, { "epoch": 1.5429385533393813, "grad_norm": 0.9627558588981628, "learning_rate": 1.2373264874240625e-05, "loss": 0.8404, "step": 241510 }, { "epoch": 1.54300244048912, "grad_norm": 0.9276164770126343, "learning_rate": 1.2369960654226536e-05, "loss": 1.0322, "step": 241520 }, { "epoch": 1.5430663276388588, "grad_norm": 0.7478491067886353, "learning_rate": 1.2366656813179506e-05, "loss": 0.7374, "step": 241530 }, { "epoch": 1.5431302147885975, "grad_norm": 0.8188766241073608, "learning_rate": 1.236335335113279e-05, "loss": 0.7053, "step": 241540 }, { "epoch": 1.5431941019383362, "grad_norm": 0.7239640951156616, "learning_rate": 1.2360050268119677e-05, "loss": 0.7981, "step": 241550 }, { "epoch": 1.5432579890880749, "grad_norm": 1.2404205799102783, "learning_rate": 1.2356747564173405e-05, "loss": 1.3444, "step": 241560 }, { "epoch": 1.5433218762378136, "grad_norm": 0.597846508026123, "learning_rate": 1.2353445239327271e-05, "loss": 0.9732, "step": 241570 }, { "epoch": 1.543385763387552, "grad_norm": 0.9635471105575562, "learning_rate": 1.2350143293614491e-05, "loss": 0.7034, "step": 241580 }, { "epoch": 1.543449650537291, "grad_norm": 0.9366225004196167, "learning_rate": 1.234684172706836e-05, "loss": 0.6786, "step": 241590 }, { "epoch": 1.5435135376870295, "grad_norm": 0.9822403788566589, "learning_rate": 1.2343540539722093e-05, "loss": 0.7379, "step": 241600 }, { "epoch": 1.5435774248367684, "grad_norm": 0.6430383920669556, "learning_rate": 1.234023973160896e-05, "loss": 0.8726, "step": 241610 }, { "epoch": 1.543641311986507, "grad_norm": 0.7444342374801636, "learning_rate": 1.233693930276218e-05, "loss": 1.0381, "step": 241620 }, { "epoch": 1.5437051991362458, "grad_norm": 0.9430611729621887, "learning_rate": 1.2333639253215024e-05, "loss": 0.7164, "step": 241630 }, { "epoch": 1.5437690862859843, "grad_norm": 1.330551266670227, "learning_rate": 1.2330339583000688e-05, "loss": 0.6713, "step": 241640 }, { "epoch": 1.5438329734357232, "grad_norm": 1.3129150867462158, "learning_rate": 1.2327040292152436e-05, "loss": 0.8398, "step": 241650 }, { "epoch": 1.5438968605854617, "grad_norm": 1.5121541023254395, "learning_rate": 1.2323741380703469e-05, "loss": 0.7578, "step": 241660 }, { "epoch": 1.5439607477352006, "grad_norm": 1.7165089845657349, "learning_rate": 1.2320442848687031e-05, "loss": 0.6938, "step": 241670 }, { "epoch": 1.5440246348849391, "grad_norm": 1.1789953708648682, "learning_rate": 1.2317144696136318e-05, "loss": 0.6197, "step": 241680 }, { "epoch": 1.544088522034678, "grad_norm": 0.80938321352005, "learning_rate": 1.2313846923084554e-05, "loss": 1.0115, "step": 241690 }, { "epoch": 1.5441524091844165, "grad_norm": 0.9993014931678772, "learning_rate": 1.2310549529564974e-05, "loss": 0.8602, "step": 241700 }, { "epoch": 1.5442162963341555, "grad_norm": 0.8560216426849365, "learning_rate": 1.2307252515610751e-05, "loss": 0.7854, "step": 241710 }, { "epoch": 1.544280183483894, "grad_norm": 0.9550018310546875, "learning_rate": 1.230395588125512e-05, "loss": 0.7854, "step": 241720 }, { "epoch": 1.5443440706336329, "grad_norm": 0.46049657464027405, "learning_rate": 1.2300659626531247e-05, "loss": 0.7882, "step": 241730 }, { "epoch": 1.5444079577833714, "grad_norm": 2.080028533935547, "learning_rate": 1.2297363751472363e-05, "loss": 0.9231, "step": 241740 }, { "epoch": 1.5444718449331103, "grad_norm": 1.1706985235214233, "learning_rate": 1.2294068256111629e-05, "loss": 0.9966, "step": 241750 }, { "epoch": 1.5445357320828488, "grad_norm": 2.6633691787719727, "learning_rate": 1.2290773140482265e-05, "loss": 1.0068, "step": 241760 }, { "epoch": 1.5445996192325877, "grad_norm": 0.6567732691764832, "learning_rate": 1.2287478404617419e-05, "loss": 0.839, "step": 241770 }, { "epoch": 1.5446635063823262, "grad_norm": 0.5708428025245667, "learning_rate": 1.2284184048550307e-05, "loss": 0.9023, "step": 241780 }, { "epoch": 1.5447273935320651, "grad_norm": 0.9928178787231445, "learning_rate": 1.2280890072314078e-05, "loss": 0.7571, "step": 241790 }, { "epoch": 1.5447912806818036, "grad_norm": 0.8214378952980042, "learning_rate": 1.2277596475941933e-05, "loss": 0.8863, "step": 241800 }, { "epoch": 1.5448551678315423, "grad_norm": 0.6824464797973633, "learning_rate": 1.2274303259467007e-05, "loss": 0.9429, "step": 241810 }, { "epoch": 1.544919054981281, "grad_norm": 0.6267659664154053, "learning_rate": 1.2271010422922503e-05, "loss": 0.6823, "step": 241820 }, { "epoch": 1.5449829421310197, "grad_norm": 1.0387673377990723, "learning_rate": 1.2267717966341547e-05, "loss": 0.8397, "step": 241830 }, { "epoch": 1.5450468292807584, "grad_norm": 0.7781873345375061, "learning_rate": 1.226442588975733e-05, "loss": 0.5977, "step": 241840 }, { "epoch": 1.5451107164304971, "grad_norm": 1.1527103185653687, "learning_rate": 1.2261134193202977e-05, "loss": 0.8294, "step": 241850 }, { "epoch": 1.5451746035802358, "grad_norm": 2.6820149421691895, "learning_rate": 1.2257842876711662e-05, "loss": 0.998, "step": 241860 }, { "epoch": 1.5452384907299745, "grad_norm": 0.8141218423843384, "learning_rate": 1.2254551940316512e-05, "loss": 0.8745, "step": 241870 }, { "epoch": 1.5453023778797133, "grad_norm": 1.027464509010315, "learning_rate": 1.2251261384050689e-05, "loss": 0.9838, "step": 241880 }, { "epoch": 1.545366265029452, "grad_norm": 1.029893398284912, "learning_rate": 1.2247971207947323e-05, "loss": 1.0286, "step": 241890 }, { "epoch": 1.5454301521791907, "grad_norm": 0.9250289797782898, "learning_rate": 1.2244681412039532e-05, "loss": 1.1109, "step": 241900 }, { "epoch": 1.5454940393289294, "grad_norm": 0.6123344898223877, "learning_rate": 1.2241391996360475e-05, "loss": 0.9885, "step": 241910 }, { "epoch": 1.545557926478668, "grad_norm": 0.9226810932159424, "learning_rate": 1.2238102960943254e-05, "loss": 1.044, "step": 241920 }, { "epoch": 1.5456218136284068, "grad_norm": 3.4627225399017334, "learning_rate": 1.2234814305821019e-05, "loss": 0.7857, "step": 241930 }, { "epoch": 1.5456857007781455, "grad_norm": 0.76270991563797, "learning_rate": 1.2231526031026863e-05, "loss": 0.9521, "step": 241940 }, { "epoch": 1.5457495879278842, "grad_norm": 1.6452094316482544, "learning_rate": 1.2228238136593922e-05, "loss": 0.6327, "step": 241950 }, { "epoch": 1.545813475077623, "grad_norm": 1.7455980777740479, "learning_rate": 1.2224950622555292e-05, "loss": 0.9259, "step": 241960 }, { "epoch": 1.5458773622273616, "grad_norm": 1.0099796056747437, "learning_rate": 1.2221663488944101e-05, "loss": 0.6914, "step": 241970 }, { "epoch": 1.5459412493771003, "grad_norm": 0.7746630907058716, "learning_rate": 1.2218376735793424e-05, "loss": 0.7888, "step": 241980 }, { "epoch": 1.546005136526839, "grad_norm": 0.8316277861595154, "learning_rate": 1.2215090363136406e-05, "loss": 0.9571, "step": 241990 }, { "epoch": 1.5460690236765777, "grad_norm": 1.7187628746032715, "learning_rate": 1.221180437100609e-05, "loss": 0.7908, "step": 242000 }, { "epoch": 1.5461329108263164, "grad_norm": 2.2674307823181152, "learning_rate": 1.22085187594356e-05, "loss": 0.8359, "step": 242010 }, { "epoch": 1.5461967979760551, "grad_norm": 0.6435132622718811, "learning_rate": 1.2205233528458031e-05, "loss": 0.7904, "step": 242020 }, { "epoch": 1.5462606851257938, "grad_norm": 1.0840686559677124, "learning_rate": 1.2201948678106445e-05, "loss": 0.9222, "step": 242030 }, { "epoch": 1.5463245722755325, "grad_norm": 0.643201470375061, "learning_rate": 1.2198664208413951e-05, "loss": 0.7934, "step": 242040 }, { "epoch": 1.5463884594252713, "grad_norm": 1.0409761667251587, "learning_rate": 1.2195380119413596e-05, "loss": 1.0341, "step": 242050 }, { "epoch": 1.54645234657501, "grad_norm": 0.9929067492485046, "learning_rate": 1.2192096411138487e-05, "loss": 1.0195, "step": 242060 }, { "epoch": 1.5465162337247484, "grad_norm": 1.201584815979004, "learning_rate": 1.2188813083621659e-05, "loss": 1.0365, "step": 242070 }, { "epoch": 1.5465801208744874, "grad_norm": 0.9801458716392517, "learning_rate": 1.2185530136896205e-05, "loss": 0.6543, "step": 242080 }, { "epoch": 1.5466440080242259, "grad_norm": 0.9027044177055359, "learning_rate": 1.218224757099517e-05, "loss": 0.7799, "step": 242090 }, { "epoch": 1.5467078951739648, "grad_norm": 1.112123966217041, "learning_rate": 1.2178965385951629e-05, "loss": 0.8694, "step": 242100 }, { "epoch": 1.5467717823237033, "grad_norm": 0.6458132863044739, "learning_rate": 1.2175683581798613e-05, "loss": 0.8681, "step": 242110 }, { "epoch": 1.5468356694734422, "grad_norm": 1.030352234840393, "learning_rate": 1.2172402158569202e-05, "loss": 0.975, "step": 242120 }, { "epoch": 1.5468995566231807, "grad_norm": 1.0494451522827148, "learning_rate": 1.2169121116296407e-05, "loss": 0.7302, "step": 242130 }, { "epoch": 1.5469634437729196, "grad_norm": 0.6920552849769592, "learning_rate": 1.2165840455013305e-05, "loss": 0.8045, "step": 242140 }, { "epoch": 1.547027330922658, "grad_norm": 0.905730128288269, "learning_rate": 1.2162560174752912e-05, "loss": 0.9529, "step": 242150 }, { "epoch": 1.547091218072397, "grad_norm": 1.1732187271118164, "learning_rate": 1.2159280275548286e-05, "loss": 1.0796, "step": 242160 }, { "epoch": 1.5471551052221355, "grad_norm": 2.3803117275238037, "learning_rate": 1.2156000757432423e-05, "loss": 0.9862, "step": 242170 }, { "epoch": 1.5472189923718744, "grad_norm": 0.9430058002471924, "learning_rate": 1.2152721620438395e-05, "loss": 0.8618, "step": 242180 }, { "epoch": 1.547282879521613, "grad_norm": 0.8770157098770142, "learning_rate": 1.2149442864599187e-05, "loss": 0.8812, "step": 242190 }, { "epoch": 1.5473467666713518, "grad_norm": 1.1416618824005127, "learning_rate": 1.2146164489947847e-05, "loss": 0.8192, "step": 242200 }, { "epoch": 1.5474106538210903, "grad_norm": 1.3545187711715698, "learning_rate": 1.2142886496517365e-05, "loss": 0.95, "step": 242210 }, { "epoch": 1.5474745409708293, "grad_norm": 0.8353721499443054, "learning_rate": 1.2139608884340764e-05, "loss": 0.7645, "step": 242220 }, { "epoch": 1.5475384281205677, "grad_norm": 2.1088666915893555, "learning_rate": 1.2136331653451071e-05, "loss": 0.8966, "step": 242230 }, { "epoch": 1.5476023152703067, "grad_norm": 0.8680897951126099, "learning_rate": 1.2133054803881267e-05, "loss": 0.9958, "step": 242240 }, { "epoch": 1.5476662024200452, "grad_norm": 1.1479185819625854, "learning_rate": 1.2129778335664366e-05, "loss": 0.9269, "step": 242250 }, { "epoch": 1.547730089569784, "grad_norm": 0.6384342908859253, "learning_rate": 1.2126502248833355e-05, "loss": 0.7369, "step": 242260 }, { "epoch": 1.5477939767195226, "grad_norm": 0.48517945408821106, "learning_rate": 1.2123226543421235e-05, "loss": 0.7885, "step": 242270 }, { "epoch": 1.5478578638692615, "grad_norm": 0.5258692502975464, "learning_rate": 1.2119951219460985e-05, "loss": 1.0042, "step": 242280 }, { "epoch": 1.547921751019, "grad_norm": 0.6555418968200684, "learning_rate": 1.2116676276985606e-05, "loss": 0.8657, "step": 242290 }, { "epoch": 1.5479856381687387, "grad_norm": 0.8531206250190735, "learning_rate": 1.2113401716028061e-05, "loss": 0.7222, "step": 242300 }, { "epoch": 1.5480495253184774, "grad_norm": 1.6429301500320435, "learning_rate": 1.2110127536621352e-05, "loss": 0.797, "step": 242310 }, { "epoch": 1.548113412468216, "grad_norm": 0.78465735912323, "learning_rate": 1.2106853738798419e-05, "loss": 0.7611, "step": 242320 }, { "epoch": 1.5481772996179548, "grad_norm": 1.2310987710952759, "learning_rate": 1.2103580322592273e-05, "loss": 1.1022, "step": 242330 }, { "epoch": 1.5482411867676935, "grad_norm": 0.8379166722297668, "learning_rate": 1.210030728803584e-05, "loss": 0.9564, "step": 242340 }, { "epoch": 1.5483050739174322, "grad_norm": 0.7665345668792725, "learning_rate": 1.2097034635162108e-05, "loss": 0.9774, "step": 242350 }, { "epoch": 1.548368961067171, "grad_norm": 1.7157317399978638, "learning_rate": 1.2093762364004024e-05, "loss": 1.0717, "step": 242360 }, { "epoch": 1.5484328482169096, "grad_norm": 1.0685333013534546, "learning_rate": 1.2090490474594557e-05, "loss": 0.7697, "step": 242370 }, { "epoch": 1.5484967353666483, "grad_norm": 0.9549485445022583, "learning_rate": 1.2087218966966645e-05, "loss": 0.9957, "step": 242380 }, { "epoch": 1.548560622516387, "grad_norm": 0.7715072631835938, "learning_rate": 1.2083947841153226e-05, "loss": 0.956, "step": 242390 }, { "epoch": 1.5486245096661257, "grad_norm": 0.7687417268753052, "learning_rate": 1.2080677097187266e-05, "loss": 0.817, "step": 242400 }, { "epoch": 1.5486883968158645, "grad_norm": 3.5652546882629395, "learning_rate": 1.2077406735101682e-05, "loss": 0.6971, "step": 242410 }, { "epoch": 1.5487522839656032, "grad_norm": 0.7793838977813721, "learning_rate": 1.2074136754929428e-05, "loss": 0.838, "step": 242420 }, { "epoch": 1.5488161711153419, "grad_norm": 0.7141621708869934, "learning_rate": 1.2070867156703419e-05, "loss": 0.7336, "step": 242430 }, { "epoch": 1.5488800582650806, "grad_norm": 0.9291849136352539, "learning_rate": 1.2067597940456605e-05, "loss": 0.9723, "step": 242440 }, { "epoch": 1.5489439454148193, "grad_norm": 0.8107156157493591, "learning_rate": 1.2064329106221877e-05, "loss": 0.8796, "step": 242450 }, { "epoch": 1.549007832564558, "grad_norm": 1.0288563966751099, "learning_rate": 1.2061060654032192e-05, "loss": 0.9119, "step": 242460 }, { "epoch": 1.5490717197142967, "grad_norm": 1.1197317838668823, "learning_rate": 1.2057792583920436e-05, "loss": 0.7437, "step": 242470 }, { "epoch": 1.5491356068640354, "grad_norm": 0.7756921052932739, "learning_rate": 1.2054524895919539e-05, "loss": 0.6483, "step": 242480 }, { "epoch": 1.549199494013774, "grad_norm": 1.0276974439620972, "learning_rate": 1.2051257590062397e-05, "loss": 0.779, "step": 242490 }, { "epoch": 1.5492633811635128, "grad_norm": 3.1762709617614746, "learning_rate": 1.2047990666381936e-05, "loss": 1.1483, "step": 242500 }, { "epoch": 1.5493272683132515, "grad_norm": 0.7494045495986938, "learning_rate": 1.2044724124911023e-05, "loss": 1.0768, "step": 242510 }, { "epoch": 1.5493911554629902, "grad_norm": 0.996212363243103, "learning_rate": 1.204145796568259e-05, "loss": 1.0021, "step": 242520 }, { "epoch": 1.549455042612729, "grad_norm": 1.222100019454956, "learning_rate": 1.2038192188729502e-05, "loss": 0.8258, "step": 242530 }, { "epoch": 1.5495189297624674, "grad_norm": 1.158316731452942, "learning_rate": 1.203492679408466e-05, "loss": 0.8146, "step": 242540 }, { "epoch": 1.5495828169122063, "grad_norm": 0.9694693684577942, "learning_rate": 1.2031661781780962e-05, "loss": 0.7009, "step": 242550 }, { "epoch": 1.5496467040619448, "grad_norm": 1.084350347518921, "learning_rate": 1.2028397151851262e-05, "loss": 0.7308, "step": 242560 }, { "epoch": 1.5497105912116838, "grad_norm": 1.462477207183838, "learning_rate": 1.2025132904328474e-05, "loss": 0.856, "step": 242570 }, { "epoch": 1.5497744783614222, "grad_norm": 1.2192946672439575, "learning_rate": 1.2021869039245431e-05, "loss": 0.8128, "step": 242580 }, { "epoch": 1.5498383655111612, "grad_norm": 0.6992841958999634, "learning_rate": 1.2018605556635037e-05, "loss": 0.6754, "step": 242590 }, { "epoch": 1.5499022526608996, "grad_norm": 0.6951451897621155, "learning_rate": 1.2015342456530126e-05, "loss": 0.773, "step": 242600 }, { "epoch": 1.5499661398106386, "grad_norm": 0.764622688293457, "learning_rate": 1.20120797389636e-05, "loss": 0.7105, "step": 242610 }, { "epoch": 1.550030026960377, "grad_norm": 1.0126152038574219, "learning_rate": 1.2008817403968275e-05, "loss": 0.8862, "step": 242620 }, { "epoch": 1.550093914110116, "grad_norm": 0.9220937490463257, "learning_rate": 1.2005555451577038e-05, "loss": 0.8853, "step": 242630 }, { "epoch": 1.5501578012598545, "grad_norm": 1.593605875968933, "learning_rate": 1.2002293881822718e-05, "loss": 0.983, "step": 242640 }, { "epoch": 1.5502216884095934, "grad_norm": 1.2632958889007568, "learning_rate": 1.1999032694738188e-05, "loss": 0.8475, "step": 242650 }, { "epoch": 1.5502855755593319, "grad_norm": 1.2085556983947754, "learning_rate": 1.1995771890356255e-05, "loss": 0.9129, "step": 242660 }, { "epoch": 1.5503494627090708, "grad_norm": 0.9262987375259399, "learning_rate": 1.1992511468709794e-05, "loss": 0.9488, "step": 242670 }, { "epoch": 1.5504133498588093, "grad_norm": 0.9073424339294434, "learning_rate": 1.198925142983161e-05, "loss": 0.8018, "step": 242680 }, { "epoch": 1.5504772370085482, "grad_norm": 0.8505311608314514, "learning_rate": 1.1985991773754557e-05, "loss": 0.6834, "step": 242690 }, { "epoch": 1.5505411241582867, "grad_norm": 0.9804407954216003, "learning_rate": 1.1982732500511445e-05, "loss": 0.96, "step": 242700 }, { "epoch": 1.5506050113080256, "grad_norm": 0.9178571105003357, "learning_rate": 1.1979473610135117e-05, "loss": 1.0759, "step": 242710 }, { "epoch": 1.5506688984577641, "grad_norm": 0.6002690196037292, "learning_rate": 1.1976215102658372e-05, "loss": 0.777, "step": 242720 }, { "epoch": 1.550732785607503, "grad_norm": 1.127046823501587, "learning_rate": 1.197295697811403e-05, "loss": 0.9468, "step": 242730 }, { "epoch": 1.5507966727572415, "grad_norm": 0.6819142699241638, "learning_rate": 1.1969699236534932e-05, "loss": 0.7426, "step": 242740 }, { "epoch": 1.5508605599069805, "grad_norm": 0.817094624042511, "learning_rate": 1.1966441877953843e-05, "loss": 0.8225, "step": 242750 }, { "epoch": 1.550924447056719, "grad_norm": 0.8324134945869446, "learning_rate": 1.1963184902403607e-05, "loss": 0.7833, "step": 242760 }, { "epoch": 1.5509883342064579, "grad_norm": 0.8241669535636902, "learning_rate": 1.1959928309916984e-05, "loss": 1.0963, "step": 242770 }, { "epoch": 1.5510522213561964, "grad_norm": 0.9614746570587158, "learning_rate": 1.195667210052681e-05, "loss": 0.9468, "step": 242780 }, { "epoch": 1.551116108505935, "grad_norm": 3.019289255142212, "learning_rate": 1.195341627426585e-05, "loss": 0.9503, "step": 242790 }, { "epoch": 1.5511799956556738, "grad_norm": 1.0065377950668335, "learning_rate": 1.1950160831166912e-05, "loss": 0.6603, "step": 242800 }, { "epoch": 1.5512438828054125, "grad_norm": 0.888316810131073, "learning_rate": 1.1946905771262761e-05, "loss": 0.9429, "step": 242810 }, { "epoch": 1.5513077699551512, "grad_norm": 0.9668296575546265, "learning_rate": 1.1943651094586206e-05, "loss": 0.7907, "step": 242820 }, { "epoch": 1.55137165710489, "grad_norm": 0.7448475360870361, "learning_rate": 1.194039680116999e-05, "loss": 0.9268, "step": 242830 }, { "epoch": 1.5514355442546286, "grad_norm": 0.918478786945343, "learning_rate": 1.1937142891046915e-05, "loss": 0.8964, "step": 242840 }, { "epoch": 1.5514994314043673, "grad_norm": 1.3066163063049316, "learning_rate": 1.1933889364249733e-05, "loss": 0.9459, "step": 242850 }, { "epoch": 1.551563318554106, "grad_norm": 0.7327299118041992, "learning_rate": 1.1930636220811226e-05, "loss": 0.9063, "step": 242860 }, { "epoch": 1.5516272057038447, "grad_norm": 0.7058196067810059, "learning_rate": 1.1927383460764152e-05, "loss": 0.9326, "step": 242870 }, { "epoch": 1.5516910928535834, "grad_norm": 0.5669124722480774, "learning_rate": 1.1924131084141244e-05, "loss": 1.0724, "step": 242880 }, { "epoch": 1.5517549800033221, "grad_norm": 1.1147737503051758, "learning_rate": 1.1920879090975295e-05, "loss": 0.8509, "step": 242890 }, { "epoch": 1.5518188671530608, "grad_norm": 0.9954694509506226, "learning_rate": 1.1917627481299021e-05, "loss": 1.228, "step": 242900 }, { "epoch": 1.5518827543027995, "grad_norm": 1.6905938386917114, "learning_rate": 1.1914376255145199e-05, "loss": 0.713, "step": 242910 }, { "epoch": 1.5519466414525382, "grad_norm": 1.1534686088562012, "learning_rate": 1.1911125412546542e-05, "loss": 0.7959, "step": 242920 }, { "epoch": 1.552010528602277, "grad_norm": 1.1367498636245728, "learning_rate": 1.190787495353582e-05, "loss": 0.7514, "step": 242930 }, { "epoch": 1.5520744157520157, "grad_norm": 1.3090966939926147, "learning_rate": 1.1904624878145731e-05, "loss": 0.8619, "step": 242940 }, { "epoch": 1.5521383029017544, "grad_norm": 2.116231918334961, "learning_rate": 1.1901375186409047e-05, "loss": 0.7968, "step": 242950 }, { "epoch": 1.552202190051493, "grad_norm": 2.4607231616973877, "learning_rate": 1.1898125878358457e-05, "loss": 0.8341, "step": 242960 }, { "epoch": 1.5522660772012318, "grad_norm": 1.982274055480957, "learning_rate": 1.1894876954026718e-05, "loss": 0.8466, "step": 242970 }, { "epoch": 1.5523299643509705, "grad_norm": 0.8793577551841736, "learning_rate": 1.189162841344652e-05, "loss": 0.8501, "step": 242980 }, { "epoch": 1.5523938515007092, "grad_norm": 3.320389986038208, "learning_rate": 1.188838025665061e-05, "loss": 0.9446, "step": 242990 }, { "epoch": 1.552457738650448, "grad_norm": 0.8653967976570129, "learning_rate": 1.1885132483671663e-05, "loss": 0.6972, "step": 243000 }, { "epoch": 1.5525216258001866, "grad_norm": 1.2445247173309326, "learning_rate": 1.1881885094542422e-05, "loss": 0.679, "step": 243010 }, { "epoch": 1.5525855129499253, "grad_norm": 0.9507594704627991, "learning_rate": 1.1878638089295562e-05, "loss": 0.7246, "step": 243020 }, { "epoch": 1.5526494000996638, "grad_norm": 0.9456453919410706, "learning_rate": 1.1875391467963803e-05, "loss": 0.8444, "step": 243030 }, { "epoch": 1.5527132872494027, "grad_norm": 0.9404791593551636, "learning_rate": 1.1872145230579828e-05, "loss": 0.9284, "step": 243040 }, { "epoch": 1.5527771743991412, "grad_norm": 0.8506212830543518, "learning_rate": 1.1868899377176346e-05, "loss": 0.839, "step": 243050 }, { "epoch": 1.5528410615488801, "grad_norm": 1.2514028549194336, "learning_rate": 1.1865653907786023e-05, "loss": 0.6611, "step": 243060 }, { "epoch": 1.5529049486986186, "grad_norm": 0.8052425980567932, "learning_rate": 1.1862408822441557e-05, "loss": 0.7826, "step": 243070 }, { "epoch": 1.5529688358483575, "grad_norm": 0.6969268321990967, "learning_rate": 1.1859164121175642e-05, "loss": 0.8792, "step": 243080 }, { "epoch": 1.553032722998096, "grad_norm": 0.908186137676239, "learning_rate": 1.1855919804020926e-05, "loss": 1.0984, "step": 243090 }, { "epoch": 1.553096610147835, "grad_norm": 0.9428640604019165, "learning_rate": 1.185267587101011e-05, "loss": 0.6629, "step": 243100 }, { "epoch": 1.5531604972975734, "grad_norm": 1.1042400598526, "learning_rate": 1.1849432322175835e-05, "loss": 0.7213, "step": 243110 }, { "epoch": 1.5532243844473124, "grad_norm": 0.9871320128440857, "learning_rate": 1.1846189157550796e-05, "loss": 0.7859, "step": 243120 }, { "epoch": 1.5532882715970509, "grad_norm": 1.2303754091262817, "learning_rate": 1.184294637716763e-05, "loss": 0.9326, "step": 243130 }, { "epoch": 1.5533521587467898, "grad_norm": 1.3488849401474, "learning_rate": 1.1839703981059014e-05, "loss": 0.8483, "step": 243140 }, { "epoch": 1.5534160458965283, "grad_norm": 1.0050297975540161, "learning_rate": 1.1836461969257578e-05, "loss": 1.1711, "step": 243150 }, { "epoch": 1.5534799330462672, "grad_norm": 0.8590402603149414, "learning_rate": 1.1833220341796002e-05, "loss": 0.7949, "step": 243160 }, { "epoch": 1.5535438201960057, "grad_norm": 1.1629736423492432, "learning_rate": 1.1829979098706905e-05, "loss": 0.8913, "step": 243170 }, { "epoch": 1.5536077073457446, "grad_norm": 0.979515552520752, "learning_rate": 1.1826738240022949e-05, "loss": 0.893, "step": 243180 }, { "epoch": 1.553671594495483, "grad_norm": 0.6931970715522766, "learning_rate": 1.1823497765776753e-05, "loss": 0.9594, "step": 243190 }, { "epoch": 1.553735481645222, "grad_norm": 0.8464453816413879, "learning_rate": 1.1820257676000978e-05, "loss": 0.8218, "step": 243200 }, { "epoch": 1.5537993687949605, "grad_norm": 1.2369046211242676, "learning_rate": 1.1817017970728223e-05, "loss": 0.646, "step": 243210 }, { "epoch": 1.5538632559446994, "grad_norm": 0.5345267653465271, "learning_rate": 1.1813778649991136e-05, "loss": 0.7116, "step": 243220 }, { "epoch": 1.553927143094438, "grad_norm": 1.2022993564605713, "learning_rate": 1.1810539713822327e-05, "loss": 1.1143, "step": 243230 }, { "epoch": 1.5539910302441768, "grad_norm": 0.5171950459480286, "learning_rate": 1.1807301162254435e-05, "loss": 0.7505, "step": 243240 }, { "epoch": 1.5540549173939153, "grad_norm": NaN, "learning_rate": 1.1804386794704053e-05, "loss": 0.8014, "step": 243250 }, { "epoch": 1.5541188045436543, "grad_norm": 0.7231611013412476, "learning_rate": 1.1801148973967718e-05, "loss": 1.0568, "step": 243260 }, { "epoch": 1.5541826916933927, "grad_norm": 0.8570977449417114, "learning_rate": 1.179791153792687e-05, "loss": 0.642, "step": 243270 }, { "epoch": 1.5542465788431314, "grad_norm": 1.048039197921753, "learning_rate": 1.1794674486614089e-05, "loss": 0.8153, "step": 243280 }, { "epoch": 1.5543104659928701, "grad_norm": 0.8269684314727783, "learning_rate": 1.1791437820062002e-05, "loss": 0.7687, "step": 243290 }, { "epoch": 1.5543743531426089, "grad_norm": 0.8667225241661072, "learning_rate": 1.1788201538303173e-05, "loss": 0.8972, "step": 243300 }, { "epoch": 1.5544382402923476, "grad_norm": 0.8198990225791931, "learning_rate": 1.1784965641370233e-05, "loss": 1.0236, "step": 243310 }, { "epoch": 1.5545021274420863, "grad_norm": 0.7782520055770874, "learning_rate": 1.1781730129295732e-05, "loss": 0.9793, "step": 243320 }, { "epoch": 1.554566014591825, "grad_norm": 0.7397206425666809, "learning_rate": 1.1778495002112289e-05, "loss": 1.0227, "step": 243330 }, { "epoch": 1.5546299017415637, "grad_norm": 0.8276669383049011, "learning_rate": 1.177526025985245e-05, "loss": 0.7356, "step": 243340 }, { "epoch": 1.5546937888913024, "grad_norm": 1.3340283632278442, "learning_rate": 1.1772025902548828e-05, "loss": 0.9137, "step": 243350 }, { "epoch": 1.554757676041041, "grad_norm": 1.0188891887664795, "learning_rate": 1.1768791930233958e-05, "loss": 0.9519, "step": 243360 }, { "epoch": 1.5548215631907798, "grad_norm": 0.9149680137634277, "learning_rate": 1.1765558342940442e-05, "loss": 0.8527, "step": 243370 }, { "epoch": 1.5548854503405185, "grad_norm": 1.1764163970947266, "learning_rate": 1.176232514070082e-05, "loss": 0.8642, "step": 243380 }, { "epoch": 1.5549493374902572, "grad_norm": 1.2475415468215942, "learning_rate": 1.1759092323547665e-05, "loss": 1.0427, "step": 243390 }, { "epoch": 1.555013224639996, "grad_norm": 1.0803353786468506, "learning_rate": 1.1755859891513549e-05, "loss": 0.913, "step": 243400 }, { "epoch": 1.5550771117897346, "grad_norm": 0.6774383783340454, "learning_rate": 1.1752627844630988e-05, "loss": 0.9511, "step": 243410 }, { "epoch": 1.5551409989394733, "grad_norm": 1.2802356481552124, "learning_rate": 1.1749396182932571e-05, "loss": 1.1907, "step": 243420 }, { "epoch": 1.555204886089212, "grad_norm": 0.8571861982345581, "learning_rate": 1.1746164906450814e-05, "loss": 0.9953, "step": 243430 }, { "epoch": 1.5552687732389507, "grad_norm": 0.6251296997070312, "learning_rate": 1.1742934015218282e-05, "loss": 0.8489, "step": 243440 }, { "epoch": 1.5553326603886894, "grad_norm": 1.214426875114441, "learning_rate": 1.173970350926749e-05, "loss": 0.832, "step": 243450 }, { "epoch": 1.5553965475384282, "grad_norm": 1.2595208883285522, "learning_rate": 1.1736473388630998e-05, "loss": 0.7714, "step": 243460 }, { "epoch": 1.5554604346881669, "grad_norm": 0.8752058148384094, "learning_rate": 1.1733243653341309e-05, "loss": 0.936, "step": 243470 }, { "epoch": 1.5555243218379056, "grad_norm": 1.6139124631881714, "learning_rate": 1.1730014303430969e-05, "loss": 0.9531, "step": 243480 }, { "epoch": 1.5555882089876443, "grad_norm": 1.2276432514190674, "learning_rate": 1.17267853389325e-05, "loss": 0.7412, "step": 243490 }, { "epoch": 1.555652096137383, "grad_norm": 0.7098108530044556, "learning_rate": 1.1723556759878395e-05, "loss": 0.8033, "step": 243500 }, { "epoch": 1.5557159832871217, "grad_norm": 0.7928702235221863, "learning_rate": 1.1720328566301202e-05, "loss": 0.8587, "step": 243510 }, { "epoch": 1.5557798704368602, "grad_norm": 1.4220958948135376, "learning_rate": 1.1717100758233406e-05, "loss": 1.0561, "step": 243520 }, { "epoch": 1.555843757586599, "grad_norm": 0.8648812770843506, "learning_rate": 1.1713873335707537e-05, "loss": 0.8772, "step": 243530 }, { "epoch": 1.5559076447363376, "grad_norm": 1.4728702306747437, "learning_rate": 1.1710646298756073e-05, "loss": 0.823, "step": 243540 }, { "epoch": 1.5559715318860765, "grad_norm": 0.9253225922584534, "learning_rate": 1.1707419647411538e-05, "loss": 0.8703, "step": 243550 }, { "epoch": 1.556035419035815, "grad_norm": 0.9636625051498413, "learning_rate": 1.1704193381706397e-05, "loss": 0.7297, "step": 243560 }, { "epoch": 1.556099306185554, "grad_norm": 0.6991531252861023, "learning_rate": 1.1700967501673176e-05, "loss": 0.6324, "step": 243570 }, { "epoch": 1.5561631933352924, "grad_norm": 0.8342954516410828, "learning_rate": 1.1697742007344336e-05, "loss": 0.881, "step": 243580 }, { "epoch": 1.5562270804850313, "grad_norm": 1.172669768333435, "learning_rate": 1.1694516898752383e-05, "loss": 1.1548, "step": 243590 }, { "epoch": 1.5562909676347698, "grad_norm": 1.1467583179473877, "learning_rate": 1.1691292175929769e-05, "loss": 1.2036, "step": 243600 }, { "epoch": 1.5563548547845087, "grad_norm": 1.2399263381958008, "learning_rate": 1.1688067838908995e-05, "loss": 0.8152, "step": 243610 }, { "epoch": 1.5564187419342472, "grad_norm": 0.9543413519859314, "learning_rate": 1.1684843887722512e-05, "loss": 0.991, "step": 243620 }, { "epoch": 1.5564826290839862, "grad_norm": 1.735193133354187, "learning_rate": 1.1681620322402808e-05, "loss": 0.8614, "step": 243630 }, { "epoch": 1.5565465162337246, "grad_norm": 0.979796826839447, "learning_rate": 1.1678397142982333e-05, "loss": 0.6309, "step": 243640 }, { "epoch": 1.5566104033834636, "grad_norm": 0.7954897880554199, "learning_rate": 1.1675174349493556e-05, "loss": 0.8495, "step": 243650 }, { "epoch": 1.556674290533202, "grad_norm": 0.7541021704673767, "learning_rate": 1.1671951941968922e-05, "loss": 0.8902, "step": 243660 }, { "epoch": 1.556738177682941, "grad_norm": 0.7214952111244202, "learning_rate": 1.1668729920440897e-05, "loss": 0.7945, "step": 243670 }, { "epoch": 1.5568020648326795, "grad_norm": 0.5442585349082947, "learning_rate": 1.1665508284941918e-05, "loss": 0.6824, "step": 243680 }, { "epoch": 1.5568659519824184, "grad_norm": 1.2295407056808472, "learning_rate": 1.1662287035504438e-05, "loss": 0.894, "step": 243690 }, { "epoch": 1.5569298391321569, "grad_norm": 1.6326357126235962, "learning_rate": 1.1659066172160887e-05, "loss": 1.0403, "step": 243700 }, { "epoch": 1.5569937262818958, "grad_norm": 0.9609403610229492, "learning_rate": 1.1655845694943712e-05, "loss": 0.6769, "step": 243710 }, { "epoch": 1.5570576134316343, "grad_norm": 0.969074547290802, "learning_rate": 1.1652625603885353e-05, "loss": 0.9733, "step": 243720 }, { "epoch": 1.5571215005813732, "grad_norm": 0.5936760902404785, "learning_rate": 1.1649405899018211e-05, "loss": 0.8327, "step": 243730 }, { "epoch": 1.5571853877311117, "grad_norm": 0.8490630388259888, "learning_rate": 1.1646186580374752e-05, "loss": 0.8271, "step": 243740 }, { "epoch": 1.5572492748808506, "grad_norm": 1.3601961135864258, "learning_rate": 1.1642967647987357e-05, "loss": 0.8539, "step": 243750 }, { "epoch": 1.5573131620305891, "grad_norm": 0.7356747984886169, "learning_rate": 1.1639749101888476e-05, "loss": 1.0955, "step": 243760 }, { "epoch": 1.5573770491803278, "grad_norm": 1.0827041864395142, "learning_rate": 1.1636530942110496e-05, "loss": 0.7306, "step": 243770 }, { "epoch": 1.5574409363300665, "grad_norm": 1.0377275943756104, "learning_rate": 1.163331316868585e-05, "loss": 0.8907, "step": 243780 }, { "epoch": 1.5575048234798052, "grad_norm": 0.9135669469833374, "learning_rate": 1.1630095781646915e-05, "loss": 1.1076, "step": 243790 }, { "epoch": 1.557568710629544, "grad_norm": 0.9091414213180542, "learning_rate": 1.162687878102613e-05, "loss": 0.8852, "step": 243800 }, { "epoch": 1.5576325977792826, "grad_norm": 0.7446238994598389, "learning_rate": 1.1623662166855853e-05, "loss": 1.0378, "step": 243810 }, { "epoch": 1.5576964849290214, "grad_norm": 1.005259394645691, "learning_rate": 1.1620445939168517e-05, "loss": 1.3612, "step": 243820 }, { "epoch": 1.55776037207876, "grad_norm": 0.6413303017616272, "learning_rate": 1.1617230097996479e-05, "loss": 0.804, "step": 243830 }, { "epoch": 1.5578242592284988, "grad_norm": 0.8589564561843872, "learning_rate": 1.1614014643372157e-05, "loss": 0.8413, "step": 243840 }, { "epoch": 1.5578881463782375, "grad_norm": 1.4905831813812256, "learning_rate": 1.1610799575327896e-05, "loss": 0.9538, "step": 243850 }, { "epoch": 1.5579520335279762, "grad_norm": 1.0603193044662476, "learning_rate": 1.1607584893896112e-05, "loss": 0.9069, "step": 243860 }, { "epoch": 1.5580159206777149, "grad_norm": 1.3208253383636475, "learning_rate": 1.1604370599109143e-05, "loss": 1.0391, "step": 243870 }, { "epoch": 1.5580798078274536, "grad_norm": 0.860833466053009, "learning_rate": 1.1601156690999398e-05, "loss": 1.1647, "step": 243880 }, { "epoch": 1.5581436949771923, "grad_norm": 0.8165043592453003, "learning_rate": 1.1597943169599212e-05, "loss": 0.7295, "step": 243890 }, { "epoch": 1.558207582126931, "grad_norm": 1.4571269750595093, "learning_rate": 1.1594730034940976e-05, "loss": 0.9935, "step": 243900 }, { "epoch": 1.5582714692766697, "grad_norm": 0.9263190627098083, "learning_rate": 1.1591517287057013e-05, "loss": 0.8241, "step": 243910 }, { "epoch": 1.5583353564264084, "grad_norm": 0.7845439314842224, "learning_rate": 1.1588304925979704e-05, "loss": 0.7681, "step": 243920 }, { "epoch": 1.5583992435761471, "grad_norm": 1.0294877290725708, "learning_rate": 1.1585092951741405e-05, "loss": 0.9277, "step": 243930 }, { "epoch": 1.5584631307258858, "grad_norm": 0.9200586676597595, "learning_rate": 1.1581881364374448e-05, "loss": 0.7354, "step": 243940 }, { "epoch": 1.5585270178756245, "grad_norm": 0.8697454333305359, "learning_rate": 1.1578670163911186e-05, "loss": 0.7926, "step": 243950 }, { "epoch": 1.5585909050253632, "grad_norm": 1.2176082134246826, "learning_rate": 1.157545935038395e-05, "loss": 0.6585, "step": 243960 }, { "epoch": 1.558654792175102, "grad_norm": 0.5793275833129883, "learning_rate": 1.1572248923825102e-05, "loss": 0.7817, "step": 243970 }, { "epoch": 1.5587186793248407, "grad_norm": 1.0738435983657837, "learning_rate": 1.1569038884266924e-05, "loss": 1.0253, "step": 243980 }, { "epoch": 1.5587825664745794, "grad_norm": 0.824019730091095, "learning_rate": 1.1565829231741787e-05, "loss": 0.7129, "step": 243990 }, { "epoch": 1.558846453624318, "grad_norm": 0.8560379147529602, "learning_rate": 1.1562619966281985e-05, "loss": 0.8505, "step": 244000 }, { "epoch": 1.5589103407740565, "grad_norm": 1.3552207946777344, "learning_rate": 1.1559411087919868e-05, "loss": 0.5609, "step": 244010 }, { "epoch": 1.5589742279237955, "grad_norm": 1.1426405906677246, "learning_rate": 1.1556202596687726e-05, "loss": 0.7931, "step": 244020 }, { "epoch": 1.559038115073534, "grad_norm": 2.6822657585144043, "learning_rate": 1.155299449261788e-05, "loss": 1.1084, "step": 244030 }, { "epoch": 1.5591020022232729, "grad_norm": 0.5422342419624329, "learning_rate": 1.1549786775742656e-05, "loss": 1.004, "step": 244040 }, { "epoch": 1.5591658893730114, "grad_norm": 2.055058479309082, "learning_rate": 1.154657944609433e-05, "loss": 0.6937, "step": 244050 }, { "epoch": 1.5592297765227503, "grad_norm": 0.8282245993614197, "learning_rate": 1.1543372503705224e-05, "loss": 0.8837, "step": 244060 }, { "epoch": 1.5592936636724888, "grad_norm": 0.898158609867096, "learning_rate": 1.1540165948607618e-05, "loss": 0.7305, "step": 244070 }, { "epoch": 1.5593575508222277, "grad_norm": 1.2153574228286743, "learning_rate": 1.1536959780833829e-05, "loss": 0.8754, "step": 244080 }, { "epoch": 1.5594214379719662, "grad_norm": 0.8690845370292664, "learning_rate": 1.1533754000416114e-05, "loss": 0.7418, "step": 244090 }, { "epoch": 1.5594853251217051, "grad_norm": 0.9315603375434875, "learning_rate": 1.1530548607386788e-05, "loss": 0.7841, "step": 244100 }, { "epoch": 1.5595492122714436, "grad_norm": 1.0761457681655884, "learning_rate": 1.1527343601778101e-05, "loss": 0.7509, "step": 244110 }, { "epoch": 1.5596130994211825, "grad_norm": 1.8089863061904907, "learning_rate": 1.1524138983622368e-05, "loss": 0.9987, "step": 244120 }, { "epoch": 1.559676986570921, "grad_norm": 1.311298131942749, "learning_rate": 1.1520934752951824e-05, "loss": 1.0197, "step": 244130 }, { "epoch": 1.55974087372066, "grad_norm": 0.5967944264411926, "learning_rate": 1.1517730909798768e-05, "loss": 0.7139, "step": 244140 }, { "epoch": 1.5598047608703984, "grad_norm": 1.037063479423523, "learning_rate": 1.1514527454195445e-05, "loss": 0.8653, "step": 244150 }, { "epoch": 1.5598686480201374, "grad_norm": 0.7818079590797424, "learning_rate": 1.1511324386174138e-05, "loss": 0.7673, "step": 244160 }, { "epoch": 1.5599325351698758, "grad_norm": 0.9035627245903015, "learning_rate": 1.1508121705767072e-05, "loss": 0.8202, "step": 244170 }, { "epoch": 1.5599964223196148, "grad_norm": 0.9490880966186523, "learning_rate": 1.1504919413006542e-05, "loss": 0.769, "step": 244180 }, { "epoch": 1.5600603094693533, "grad_norm": 0.8521928191184998, "learning_rate": 1.1501717507924759e-05, "loss": 0.8679, "step": 244190 }, { "epoch": 1.5601241966190922, "grad_norm": 0.8884910941123962, "learning_rate": 1.1498515990554e-05, "loss": 0.9999, "step": 244200 }, { "epoch": 1.5601880837688307, "grad_norm": 0.7485408186912537, "learning_rate": 1.1495314860926481e-05, "loss": 0.9666, "step": 244210 }, { "epoch": 1.5602519709185696, "grad_norm": 1.0756878852844238, "learning_rate": 1.1492114119074465e-05, "loss": 1.007, "step": 244220 }, { "epoch": 1.560315858068308, "grad_norm": 0.7426232099533081, "learning_rate": 1.1488913765030163e-05, "loss": 0.9013, "step": 244230 }, { "epoch": 1.5603797452180468, "grad_norm": 0.5656971335411072, "learning_rate": 1.1485713798825815e-05, "loss": 0.8492, "step": 244240 }, { "epoch": 1.5604436323677855, "grad_norm": 1.036117434501648, "learning_rate": 1.1482514220493663e-05, "loss": 0.9966, "step": 244250 }, { "epoch": 1.5605075195175242, "grad_norm": 1.0510843992233276, "learning_rate": 1.1479315030065897e-05, "loss": 0.9037, "step": 244260 }, { "epoch": 1.560571406667263, "grad_norm": 6.631254196166992, "learning_rate": 1.1476116227574768e-05, "loss": 0.9423, "step": 244270 }, { "epoch": 1.5606352938170016, "grad_norm": 0.85828697681427, "learning_rate": 1.147291781305247e-05, "loss": 0.9699, "step": 244280 }, { "epoch": 1.5606991809667403, "grad_norm": 1.2128971815109253, "learning_rate": 1.146971978653123e-05, "loss": 0.8753, "step": 244290 }, { "epoch": 1.560763068116479, "grad_norm": 1.1858617067337036, "learning_rate": 1.1466522148043229e-05, "loss": 0.7687, "step": 244300 }, { "epoch": 1.5608269552662177, "grad_norm": 0.7385855317115784, "learning_rate": 1.1463324897620702e-05, "loss": 0.7099, "step": 244310 }, { "epoch": 1.5608908424159564, "grad_norm": 1.2785903215408325, "learning_rate": 1.146012803529582e-05, "loss": 0.9598, "step": 244320 }, { "epoch": 1.5609547295656951, "grad_norm": 0.951858639717102, "learning_rate": 1.1456931561100798e-05, "loss": 0.765, "step": 244330 }, { "epoch": 1.5610186167154338, "grad_norm": 0.9530162811279297, "learning_rate": 1.1453735475067811e-05, "loss": 0.7981, "step": 244340 }, { "epoch": 1.5610825038651726, "grad_norm": 1.2772146463394165, "learning_rate": 1.1450539777229069e-05, "loss": 0.7661, "step": 244350 }, { "epoch": 1.5611463910149113, "grad_norm": 1.0854233503341675, "learning_rate": 1.1447344467616727e-05, "loss": 0.7692, "step": 244360 }, { "epoch": 1.56121027816465, "grad_norm": 1.4239158630371094, "learning_rate": 1.1444149546262995e-05, "loss": 0.865, "step": 244370 }, { "epoch": 1.5612741653143887, "grad_norm": 1.141292691230774, "learning_rate": 1.1440955013200017e-05, "loss": 0.7969, "step": 244380 }, { "epoch": 1.5613380524641274, "grad_norm": 0.7779687643051147, "learning_rate": 1.143776086845999e-05, "loss": 0.7294, "step": 244390 }, { "epoch": 1.561401939613866, "grad_norm": 1.1889150142669678, "learning_rate": 1.1434567112075061e-05, "loss": 0.9621, "step": 244400 }, { "epoch": 1.5614658267636048, "grad_norm": 0.5841159224510193, "learning_rate": 1.1431373744077422e-05, "loss": 0.6453, "step": 244410 }, { "epoch": 1.5615297139133435, "grad_norm": 0.8326659202575684, "learning_rate": 1.1428180764499202e-05, "loss": 0.8479, "step": 244420 }, { "epoch": 1.5615936010630822, "grad_norm": 0.6364220976829529, "learning_rate": 1.142498817337257e-05, "loss": 0.8159, "step": 244430 }, { "epoch": 1.561657488212821, "grad_norm": 1.0597093105316162, "learning_rate": 1.1421795970729688e-05, "loss": 0.7059, "step": 244440 }, { "epoch": 1.5617213753625596, "grad_norm": 0.6557106375694275, "learning_rate": 1.1418604156602686e-05, "loss": 0.9927, "step": 244450 }, { "epoch": 1.5617852625122983, "grad_norm": 1.2477772235870361, "learning_rate": 1.1415412731023745e-05, "loss": 1.1245, "step": 244460 }, { "epoch": 1.561849149662037, "grad_norm": 0.8714918494224548, "learning_rate": 1.1412221694024954e-05, "loss": 0.7583, "step": 244470 }, { "epoch": 1.5619130368117757, "grad_norm": 0.969723105430603, "learning_rate": 1.140903104563848e-05, "loss": 1.0076, "step": 244480 }, { "epoch": 1.5619769239615144, "grad_norm": 1.1985634565353394, "learning_rate": 1.1405840785896443e-05, "loss": 0.7064, "step": 244490 }, { "epoch": 1.562040811111253, "grad_norm": 2.019909381866455, "learning_rate": 1.1402650914830987e-05, "loss": 0.8153, "step": 244500 }, { "epoch": 1.5621046982609919, "grad_norm": 0.9854116439819336, "learning_rate": 1.1399461432474218e-05, "loss": 0.9243, "step": 244510 }, { "epoch": 1.5621685854107303, "grad_norm": 0.7602574229240417, "learning_rate": 1.1396272338858276e-05, "loss": 0.6641, "step": 244520 }, { "epoch": 1.5622324725604693, "grad_norm": 0.8997085690498352, "learning_rate": 1.1393083634015255e-05, "loss": 0.9308, "step": 244530 }, { "epoch": 1.5622963597102077, "grad_norm": 1.1706210374832153, "learning_rate": 1.13898953179773e-05, "loss": 0.9875, "step": 244540 }, { "epoch": 1.5623602468599467, "grad_norm": 1.6842466592788696, "learning_rate": 1.1386707390776485e-05, "loss": 0.9852, "step": 244550 }, { "epoch": 1.5624241340096852, "grad_norm": 2.359833002090454, "learning_rate": 1.138351985244493e-05, "loss": 0.8689, "step": 244560 }, { "epoch": 1.562488021159424, "grad_norm": 0.835586667060852, "learning_rate": 1.1380332703014757e-05, "loss": 1.0087, "step": 244570 }, { "epoch": 1.5625519083091626, "grad_norm": 1.5438088178634644, "learning_rate": 1.1377145942518024e-05, "loss": 0.6784, "step": 244580 }, { "epoch": 1.5626157954589015, "grad_norm": 1.3177486658096313, "learning_rate": 1.1373959570986864e-05, "loss": 0.8362, "step": 244590 }, { "epoch": 1.56267968260864, "grad_norm": 1.20131254196167, "learning_rate": 1.1370773588453332e-05, "loss": 1.1488, "step": 244600 }, { "epoch": 1.562743569758379, "grad_norm": 0.9831515550613403, "learning_rate": 1.136758799494954e-05, "loss": 0.8171, "step": 244610 }, { "epoch": 1.5628074569081174, "grad_norm": 0.7109906673431396, "learning_rate": 1.136440279050755e-05, "loss": 1.0254, "step": 244620 }, { "epoch": 1.5628713440578563, "grad_norm": 0.9102615714073181, "learning_rate": 1.1361217975159454e-05, "loss": 0.6895, "step": 244630 }, { "epoch": 1.5629352312075948, "grad_norm": 1.2789576053619385, "learning_rate": 1.1358033548937314e-05, "loss": 0.7483, "step": 244640 }, { "epoch": 1.5629991183573337, "grad_norm": 1.4456409215927124, "learning_rate": 1.1354849511873222e-05, "loss": 0.7125, "step": 244650 }, { "epoch": 1.5630630055070722, "grad_norm": 1.380568027496338, "learning_rate": 1.1351665863999206e-05, "loss": 0.8184, "step": 244660 }, { "epoch": 1.5631268926568112, "grad_norm": 1.104844093322754, "learning_rate": 1.1348482605347372e-05, "loss": 0.7848, "step": 244670 }, { "epoch": 1.5631907798065496, "grad_norm": 1.1302462816238403, "learning_rate": 1.1345299735949738e-05, "loss": 0.813, "step": 244680 }, { "epoch": 1.5632546669562886, "grad_norm": 0.9731299877166748, "learning_rate": 1.1342117255838391e-05, "loss": 0.887, "step": 244690 }, { "epoch": 1.563318554106027, "grad_norm": 1.2548291683197021, "learning_rate": 1.1338935165045356e-05, "loss": 0.9777, "step": 244700 }, { "epoch": 1.563382441255766, "grad_norm": 0.9935744404792786, "learning_rate": 1.13357534636027e-05, "loss": 0.927, "step": 244710 }, { "epoch": 1.5634463284055045, "grad_norm": 1.089245319366455, "learning_rate": 1.1332572151542448e-05, "loss": 0.904, "step": 244720 }, { "epoch": 1.5635102155552432, "grad_norm": 0.7848595380783081, "learning_rate": 1.1329391228896652e-05, "loss": 0.8947, "step": 244730 }, { "epoch": 1.5635741027049819, "grad_norm": 0.7474117875099182, "learning_rate": 1.132621069569733e-05, "loss": 0.8568, "step": 244740 }, { "epoch": 1.5636379898547206, "grad_norm": 1.012871503829956, "learning_rate": 1.1323030551976544e-05, "loss": 0.7365, "step": 244750 }, { "epoch": 1.5637018770044593, "grad_norm": 0.6147935390472412, "learning_rate": 1.1319850797766285e-05, "loss": 0.6997, "step": 244760 }, { "epoch": 1.563765764154198, "grad_norm": 0.7995234727859497, "learning_rate": 1.1316671433098585e-05, "loss": 0.787, "step": 244770 }, { "epoch": 1.5638296513039367, "grad_norm": 1.2112396955490112, "learning_rate": 1.1313492458005488e-05, "loss": 0.7857, "step": 244780 }, { "epoch": 1.5638935384536754, "grad_norm": 1.4421223402023315, "learning_rate": 1.1310313872518979e-05, "loss": 0.9153, "step": 244790 }, { "epoch": 1.563957425603414, "grad_norm": 1.025989055633545, "learning_rate": 1.1307135676671087e-05, "loss": 0.8745, "step": 244800 }, { "epoch": 1.5640213127531528, "grad_norm": 1.7144854068756104, "learning_rate": 1.1303957870493808e-05, "loss": 0.8442, "step": 244810 }, { "epoch": 1.5640851999028915, "grad_norm": 0.9952170252799988, "learning_rate": 1.130078045401916e-05, "loss": 1.0449, "step": 244820 }, { "epoch": 1.5641490870526302, "grad_norm": 0.763575553894043, "learning_rate": 1.129760342727912e-05, "loss": 0.8549, "step": 244830 }, { "epoch": 1.564212974202369, "grad_norm": 0.5484232306480408, "learning_rate": 1.1294426790305707e-05, "loss": 0.8661, "step": 244840 }, { "epoch": 1.5642768613521076, "grad_norm": 1.4526423215866089, "learning_rate": 1.1291250543130888e-05, "loss": 0.7648, "step": 244850 }, { "epoch": 1.5643407485018463, "grad_norm": 6.604135990142822, "learning_rate": 1.1288074685786677e-05, "loss": 0.9358, "step": 244860 }, { "epoch": 1.564404635651585, "grad_norm": 0.8231043815612793, "learning_rate": 1.1284899218305034e-05, "loss": 0.6355, "step": 244870 }, { "epoch": 1.5644685228013238, "grad_norm": 0.8967733979225159, "learning_rate": 1.128172414071796e-05, "loss": 0.7986, "step": 244880 }, { "epoch": 1.5645324099510625, "grad_norm": 0.6413818597793579, "learning_rate": 1.1278549453057408e-05, "loss": 1.1034, "step": 244890 }, { "epoch": 1.5645962971008012, "grad_norm": 0.7567101716995239, "learning_rate": 1.1275375155355372e-05, "loss": 0.661, "step": 244900 }, { "epoch": 1.5646601842505399, "grad_norm": 1.2307841777801514, "learning_rate": 1.1272201247643799e-05, "loss": 0.7908, "step": 244910 }, { "epoch": 1.5647240714002786, "grad_norm": 0.8741402626037598, "learning_rate": 1.1269027729954678e-05, "loss": 0.7941, "step": 244920 }, { "epoch": 1.5647879585500173, "grad_norm": 0.5362954139709473, "learning_rate": 1.1265854602319936e-05, "loss": 0.706, "step": 244930 }, { "epoch": 1.564851845699756, "grad_norm": 1.1786456108093262, "learning_rate": 1.1262681864771568e-05, "loss": 0.94, "step": 244940 }, { "epoch": 1.5649157328494947, "grad_norm": 1.1140693426132202, "learning_rate": 1.1259509517341504e-05, "loss": 0.7359, "step": 244950 }, { "epoch": 1.5649796199992334, "grad_norm": 1.1226584911346436, "learning_rate": 1.125633756006168e-05, "loss": 0.8718, "step": 244960 }, { "epoch": 1.565043507148972, "grad_norm": 1.0554084777832031, "learning_rate": 1.1253165992964071e-05, "loss": 0.811, "step": 244970 }, { "epoch": 1.5651073942987108, "grad_norm": 0.6143940091133118, "learning_rate": 1.1249994816080584e-05, "loss": 0.9069, "step": 244980 }, { "epoch": 1.5651712814484493, "grad_norm": 1.6363439559936523, "learning_rate": 1.1246824029443187e-05, "loss": 0.7897, "step": 244990 }, { "epoch": 1.5652351685981882, "grad_norm": 0.8065378069877625, "learning_rate": 1.1243653633083789e-05, "loss": 0.8535, "step": 245000 }, { "epoch": 1.5652990557479267, "grad_norm": 1.6017779111862183, "learning_rate": 1.1240483627034337e-05, "loss": 0.9318, "step": 245010 }, { "epoch": 1.5653629428976656, "grad_norm": 1.2830206155776978, "learning_rate": 1.1237314011326733e-05, "loss": 0.9921, "step": 245020 }, { "epoch": 1.5654268300474041, "grad_norm": 1.024481177330017, "learning_rate": 1.1234144785992927e-05, "loss": 0.7257, "step": 245030 }, { "epoch": 1.565490717197143, "grad_norm": 0.7877789735794067, "learning_rate": 1.123097595106481e-05, "loss": 0.8077, "step": 245040 }, { "epoch": 1.5655546043468815, "grad_norm": 0.6652231216430664, "learning_rate": 1.1227807506574312e-05, "loss": 0.6924, "step": 245050 }, { "epoch": 1.5656184914966205, "grad_norm": 1.2229474782943726, "learning_rate": 1.1224639452553326e-05, "loss": 0.746, "step": 245060 }, { "epoch": 1.565682378646359, "grad_norm": 0.6137751340866089, "learning_rate": 1.1221471789033777e-05, "loss": 0.8736, "step": 245070 }, { "epoch": 1.5657462657960979, "grad_norm": 1.5874028205871582, "learning_rate": 1.121830451604754e-05, "loss": 0.9268, "step": 245080 }, { "epoch": 1.5658101529458364, "grad_norm": 0.9740021228790283, "learning_rate": 1.1215137633626532e-05, "loss": 1.2189, "step": 245090 }, { "epoch": 1.5658740400955753, "grad_norm": 0.8691504597663879, "learning_rate": 1.1211971141802658e-05, "loss": 0.8799, "step": 245100 }, { "epoch": 1.5659379272453138, "grad_norm": 0.9141390919685364, "learning_rate": 1.1208805040607768e-05, "loss": 1.2535, "step": 245110 }, { "epoch": 1.5660018143950527, "grad_norm": 0.9398859739303589, "learning_rate": 1.1205639330073791e-05, "loss": 0.9259, "step": 245120 }, { "epoch": 1.5660657015447912, "grad_norm": 1.1347993612289429, "learning_rate": 1.1202474010232572e-05, "loss": 0.9103, "step": 245130 }, { "epoch": 1.5661295886945301, "grad_norm": 0.5754815340042114, "learning_rate": 1.1199309081116016e-05, "loss": 0.8963, "step": 245140 }, { "epoch": 1.5661934758442686, "grad_norm": 0.8746938109397888, "learning_rate": 1.1196144542755976e-05, "loss": 0.6805, "step": 245150 }, { "epoch": 1.5662573629940075, "grad_norm": 0.8321001529693604, "learning_rate": 1.1192980395184344e-05, "loss": 1.0746, "step": 245160 }, { "epoch": 1.566321250143746, "grad_norm": 0.5793132781982422, "learning_rate": 1.1189816638432954e-05, "loss": 0.7767, "step": 245170 }, { "epoch": 1.566385137293485, "grad_norm": 1.0133846998214722, "learning_rate": 1.1186653272533698e-05, "loss": 0.7942, "step": 245180 }, { "epoch": 1.5664490244432234, "grad_norm": 1.2261652946472168, "learning_rate": 1.1183490297518417e-05, "loss": 1.0238, "step": 245190 }, { "epoch": 1.5665129115929624, "grad_norm": 1.3811265230178833, "learning_rate": 1.1180327713418976e-05, "loss": 0.8442, "step": 245200 }, { "epoch": 1.5665767987427008, "grad_norm": 0.6937904953956604, "learning_rate": 1.1177165520267207e-05, "loss": 1.0061, "step": 245210 }, { "epoch": 1.5666406858924395, "grad_norm": 1.304955244064331, "learning_rate": 1.1174003718094983e-05, "loss": 1.1358, "step": 245220 }, { "epoch": 1.5667045730421783, "grad_norm": 1.1086231470108032, "learning_rate": 1.1170842306934114e-05, "loss": 0.944, "step": 245230 }, { "epoch": 1.566768460191917, "grad_norm": 0.8318544030189514, "learning_rate": 1.1167681286816472e-05, "loss": 0.7889, "step": 245240 }, { "epoch": 1.5668323473416557, "grad_norm": 0.7987926602363586, "learning_rate": 1.1164520657773863e-05, "loss": 0.8404, "step": 245250 }, { "epoch": 1.5668962344913944, "grad_norm": 0.6072881817817688, "learning_rate": 1.116136041983814e-05, "loss": 0.9524, "step": 245260 }, { "epoch": 1.566960121641133, "grad_norm": 0.8089340329170227, "learning_rate": 1.11582005730411e-05, "loss": 0.5764, "step": 245270 }, { "epoch": 1.5670240087908718, "grad_norm": 1.2315974235534668, "learning_rate": 1.1155041117414584e-05, "loss": 0.854, "step": 245280 }, { "epoch": 1.5670878959406105, "grad_norm": 0.7801371216773987, "learning_rate": 1.1151882052990425e-05, "loss": 0.5417, "step": 245290 }, { "epoch": 1.5671517830903492, "grad_norm": 0.9974910616874695, "learning_rate": 1.1148723379800407e-05, "loss": 0.9432, "step": 245300 }, { "epoch": 1.567215670240088, "grad_norm": 3.204942464828491, "learning_rate": 1.1145565097876376e-05, "loss": 0.925, "step": 245310 }, { "epoch": 1.5672795573898266, "grad_norm": 0.731713056564331, "learning_rate": 1.11424072072501e-05, "loss": 0.691, "step": 245320 }, { "epoch": 1.5673434445395653, "grad_norm": 1.2439576387405396, "learning_rate": 1.113924970795341e-05, "loss": 0.9036, "step": 245330 }, { "epoch": 1.567407331689304, "grad_norm": 1.1690343618392944, "learning_rate": 1.1136092600018084e-05, "loss": 0.8694, "step": 245340 }, { "epoch": 1.5674712188390427, "grad_norm": 1.4979528188705444, "learning_rate": 1.1132935883475942e-05, "loss": 0.9888, "step": 245350 }, { "epoch": 1.5675351059887814, "grad_norm": 1.1090213060379028, "learning_rate": 1.112977955835875e-05, "loss": 0.9002, "step": 245360 }, { "epoch": 1.5675989931385201, "grad_norm": 1.3513494729995728, "learning_rate": 1.1126623624698312e-05, "loss": 0.7067, "step": 245370 }, { "epoch": 1.5676628802882588, "grad_norm": 0.8277401924133301, "learning_rate": 1.1123468082526395e-05, "loss": 1.0927, "step": 245380 }, { "epoch": 1.5677267674379975, "grad_norm": 0.7830334901809692, "learning_rate": 1.1120312931874798e-05, "loss": 0.775, "step": 245390 }, { "epoch": 1.5677906545877363, "grad_norm": 0.8738146424293518, "learning_rate": 1.1117158172775278e-05, "loss": 0.9157, "step": 245400 }, { "epoch": 1.567854541737475, "grad_norm": 1.0072499513626099, "learning_rate": 1.111400380525962e-05, "loss": 0.7875, "step": 245410 }, { "epoch": 1.5679184288872137, "grad_norm": 1.1970336437225342, "learning_rate": 1.1110849829359577e-05, "loss": 0.8484, "step": 245420 }, { "epoch": 1.5679823160369524, "grad_norm": 0.8071025609970093, "learning_rate": 1.110769624510693e-05, "loss": 0.7452, "step": 245430 }, { "epoch": 1.568046203186691, "grad_norm": 0.8323325514793396, "learning_rate": 1.1104543052533433e-05, "loss": 0.8922, "step": 245440 }, { "epoch": 1.5681100903364298, "grad_norm": 0.7894075512886047, "learning_rate": 1.1101390251670818e-05, "loss": 0.8484, "step": 245450 }, { "epoch": 1.5681739774861683, "grad_norm": 0.9994876980781555, "learning_rate": 1.1098237842550874e-05, "loss": 0.6992, "step": 245460 }, { "epoch": 1.5682378646359072, "grad_norm": 1.2985382080078125, "learning_rate": 1.1095085825205309e-05, "loss": 0.8643, "step": 245470 }, { "epoch": 1.5683017517856457, "grad_norm": 1.2723342180252075, "learning_rate": 1.1091934199665904e-05, "loss": 0.9908, "step": 245480 }, { "epoch": 1.5683656389353846, "grad_norm": 0.8650884032249451, "learning_rate": 1.1088782965964373e-05, "loss": 0.9908, "step": 245490 }, { "epoch": 1.568429526085123, "grad_norm": 1.132795810699463, "learning_rate": 1.1085632124132467e-05, "loss": 0.8583, "step": 245500 }, { "epoch": 1.568493413234862, "grad_norm": 0.6821199655532837, "learning_rate": 1.10824816742019e-05, "loss": 0.8731, "step": 245510 }, { "epoch": 1.5685573003846005, "grad_norm": 1.0070725679397583, "learning_rate": 1.107933161620443e-05, "loss": 0.8267, "step": 245520 }, { "epoch": 1.5686211875343394, "grad_norm": 1.131516695022583, "learning_rate": 1.1076181950171743e-05, "loss": 0.8921, "step": 245530 }, { "epoch": 1.568685074684078, "grad_norm": 1.031501293182373, "learning_rate": 1.1073032676135591e-05, "loss": 1.1427, "step": 245540 }, { "epoch": 1.5687489618338168, "grad_norm": 1.1449588537216187, "learning_rate": 1.1069883794127661e-05, "loss": 0.9412, "step": 245550 }, { "epoch": 1.5688128489835553, "grad_norm": 1.0295383930206299, "learning_rate": 1.1066735304179698e-05, "loss": 0.609, "step": 245560 }, { "epoch": 1.5688767361332943, "grad_norm": 1.0607703924179077, "learning_rate": 1.1063587206323378e-05, "loss": 0.7468, "step": 245570 }, { "epoch": 1.5689406232830327, "grad_norm": 0.988595724105835, "learning_rate": 1.1060439500590436e-05, "loss": 0.8743, "step": 245580 }, { "epoch": 1.5690045104327717, "grad_norm": 1.4843655824661255, "learning_rate": 1.1057292187012535e-05, "loss": 1.0648, "step": 245590 }, { "epoch": 1.5690683975825102, "grad_norm": 0.9656323790550232, "learning_rate": 1.1054145265621412e-05, "loss": 0.9058, "step": 245600 }, { "epoch": 1.569132284732249, "grad_norm": 1.3070482015609741, "learning_rate": 1.1050998736448726e-05, "loss": 0.8845, "step": 245610 }, { "epoch": 1.5691961718819876, "grad_norm": 1.023574948310852, "learning_rate": 1.1047852599526176e-05, "loss": 0.6545, "step": 245620 }, { "epoch": 1.5692600590317265, "grad_norm": 0.8293267488479614, "learning_rate": 1.1044706854885462e-05, "loss": 0.8326, "step": 245630 }, { "epoch": 1.569323946181465, "grad_norm": 0.9412983655929565, "learning_rate": 1.1041561502558233e-05, "loss": 1.18, "step": 245640 }, { "epoch": 1.569387833331204, "grad_norm": 1.4803204536437988, "learning_rate": 1.1038416542576202e-05, "loss": 0.891, "step": 245650 }, { "epoch": 1.5694517204809424, "grad_norm": 0.9529423713684082, "learning_rate": 1.1035271974971013e-05, "loss": 0.9386, "step": 245660 }, { "epoch": 1.5695156076306813, "grad_norm": 1.0522215366363525, "learning_rate": 1.1032127799774356e-05, "loss": 1.0287, "step": 245670 }, { "epoch": 1.5695794947804198, "grad_norm": 0.979540228843689, "learning_rate": 1.1028984017017863e-05, "loss": 0.6719, "step": 245680 }, { "epoch": 1.5696433819301587, "grad_norm": 0.9477531909942627, "learning_rate": 1.1025840626733237e-05, "loss": 1.0261, "step": 245690 }, { "epoch": 1.5697072690798972, "grad_norm": 0.8826400637626648, "learning_rate": 1.10226976289521e-05, "loss": 1.0628, "step": 245700 }, { "epoch": 1.569771156229636, "grad_norm": 0.8096141815185547, "learning_rate": 1.101955502370613e-05, "loss": 0.8966, "step": 245710 }, { "epoch": 1.5698350433793746, "grad_norm": 3.1200544834136963, "learning_rate": 1.1016412811026943e-05, "loss": 0.8998, "step": 245720 }, { "epoch": 1.5698989305291133, "grad_norm": 0.9650887250900269, "learning_rate": 1.1013270990946228e-05, "loss": 0.7894, "step": 245730 }, { "epoch": 1.569962817678852, "grad_norm": 1.0511590242385864, "learning_rate": 1.1010129563495586e-05, "loss": 0.7479, "step": 245740 }, { "epoch": 1.5700267048285907, "grad_norm": 0.8517940044403076, "learning_rate": 1.1006988528706685e-05, "loss": 0.6017, "step": 245750 }, { "epoch": 1.5700905919783295, "grad_norm": 1.2697738409042358, "learning_rate": 1.1003847886611129e-05, "loss": 0.9392, "step": 245760 }, { "epoch": 1.5701544791280682, "grad_norm": 0.6032277941703796, "learning_rate": 1.1000707637240571e-05, "loss": 0.7957, "step": 245770 }, { "epoch": 1.5702183662778069, "grad_norm": 0.5549653768539429, "learning_rate": 1.0997567780626617e-05, "loss": 0.5034, "step": 245780 }, { "epoch": 1.5702822534275456, "grad_norm": 0.7875387072563171, "learning_rate": 1.099442831680091e-05, "loss": 0.8163, "step": 245790 }, { "epoch": 1.5703461405772843, "grad_norm": 0.9425227046012878, "learning_rate": 1.0991289245795039e-05, "loss": 0.8701, "step": 245800 }, { "epoch": 1.570410027727023, "grad_norm": 0.7016029357910156, "learning_rate": 1.0988150567640636e-05, "loss": 0.8692, "step": 245810 }, { "epoch": 1.5704739148767617, "grad_norm": 0.8856372237205505, "learning_rate": 1.0985012282369318e-05, "loss": 0.6919, "step": 245820 }, { "epoch": 1.5705378020265004, "grad_norm": 1.202332854270935, "learning_rate": 1.0981874390012664e-05, "loss": 0.7305, "step": 245830 }, { "epoch": 1.570601689176239, "grad_norm": 1.0130597352981567, "learning_rate": 1.0978736890602308e-05, "loss": 0.7973, "step": 245840 }, { "epoch": 1.5706655763259778, "grad_norm": 1.3987774848937988, "learning_rate": 1.0975599784169815e-05, "loss": 0.8538, "step": 245850 }, { "epoch": 1.5707294634757165, "grad_norm": 0.9398975372314453, "learning_rate": 1.0972463070746803e-05, "loss": 0.9216, "step": 245860 }, { "epoch": 1.5707933506254552, "grad_norm": 1.7127617597579956, "learning_rate": 1.0969326750364844e-05, "loss": 0.8187, "step": 245870 }, { "epoch": 1.570857237775194, "grad_norm": 1.2938936948776245, "learning_rate": 1.096619082305554e-05, "loss": 0.7339, "step": 245880 }, { "epoch": 1.5709211249249326, "grad_norm": 0.8230463266372681, "learning_rate": 1.0963055288850455e-05, "loss": 0.772, "step": 245890 }, { "epoch": 1.5709850120746713, "grad_norm": 1.022827386856079, "learning_rate": 1.0959920147781182e-05, "loss": 0.9621, "step": 245900 }, { "epoch": 1.57104889922441, "grad_norm": 1.102954387664795, "learning_rate": 1.0956785399879276e-05, "loss": 0.8469, "step": 245910 }, { "epoch": 1.5711127863741488, "grad_norm": 1.8467339277267456, "learning_rate": 1.0953651045176349e-05, "loss": 1.0997, "step": 245920 }, { "epoch": 1.5711766735238875, "grad_norm": 2.318774461746216, "learning_rate": 1.0950517083703904e-05, "loss": 0.7981, "step": 245930 }, { "epoch": 1.5712405606736262, "grad_norm": 0.8628405928611755, "learning_rate": 1.0947383515493536e-05, "loss": 0.8306, "step": 245940 }, { "epoch": 1.5713044478233646, "grad_norm": 2.304323196411133, "learning_rate": 1.0944250340576818e-05, "loss": 0.9758, "step": 245950 }, { "epoch": 1.5713683349731036, "grad_norm": 0.9406927824020386, "learning_rate": 1.094111755898527e-05, "loss": 0.6177, "step": 245960 }, { "epoch": 1.571432222122842, "grad_norm": 0.7597897052764893, "learning_rate": 1.093798517075047e-05, "loss": 0.7199, "step": 245970 }, { "epoch": 1.571496109272581, "grad_norm": 0.8691983819007874, "learning_rate": 1.0934853175903948e-05, "loss": 0.9129, "step": 245980 }, { "epoch": 1.5715599964223195, "grad_norm": 1.026710033416748, "learning_rate": 1.0931721574477255e-05, "loss": 1.0643, "step": 245990 }, { "epoch": 1.5716238835720584, "grad_norm": 1.5185635089874268, "learning_rate": 1.0928590366501917e-05, "loss": 1.0488, "step": 246000 }, { "epoch": 1.5716877707217969, "grad_norm": 0.7404221892356873, "learning_rate": 1.0925459552009486e-05, "loss": 0.8142, "step": 246010 }, { "epoch": 1.5717516578715358, "grad_norm": 0.7018494606018066, "learning_rate": 1.0922329131031467e-05, "loss": 1.1856, "step": 246020 }, { "epoch": 1.5718155450212743, "grad_norm": 1.0612128973007202, "learning_rate": 1.091919910359942e-05, "loss": 0.5845, "step": 246030 }, { "epoch": 1.5718794321710132, "grad_norm": 2.2911901473999023, "learning_rate": 1.0916069469744827e-05, "loss": 0.8647, "step": 246040 }, { "epoch": 1.5719433193207517, "grad_norm": 1.1287060976028442, "learning_rate": 1.0912940229499247e-05, "loss": 0.9265, "step": 246050 }, { "epoch": 1.5720072064704906, "grad_norm": 0.6138035655021667, "learning_rate": 1.0909811382894158e-05, "loss": 0.8942, "step": 246060 }, { "epoch": 1.5720710936202291, "grad_norm": 1.3819774389266968, "learning_rate": 1.0906682929961099e-05, "loss": 1.0637, "step": 246070 }, { "epoch": 1.572134980769968, "grad_norm": 1.287611484527588, "learning_rate": 1.0903554870731548e-05, "loss": 0.849, "step": 246080 }, { "epoch": 1.5721988679197065, "grad_norm": 0.7293438911437988, "learning_rate": 1.090042720523704e-05, "loss": 0.7931, "step": 246090 }, { "epoch": 1.5722627550694455, "grad_norm": 1.9633628129959106, "learning_rate": 1.0897299933509037e-05, "loss": 0.9321, "step": 246100 }, { "epoch": 1.572326642219184, "grad_norm": 0.5741513967514038, "learning_rate": 1.089417305557907e-05, "loss": 0.6795, "step": 246110 }, { "epoch": 1.5723905293689229, "grad_norm": 1.1001640558242798, "learning_rate": 1.0891046571478597e-05, "loss": 1.11, "step": 246120 }, { "epoch": 1.5724544165186614, "grad_norm": 1.022058129310608, "learning_rate": 1.0887920481239122e-05, "loss": 1.0803, "step": 246130 }, { "epoch": 1.5725183036684003, "grad_norm": 1.7643972635269165, "learning_rate": 1.0884794784892133e-05, "loss": 0.8927, "step": 246140 }, { "epoch": 1.5725821908181388, "grad_norm": 0.7340256571769714, "learning_rate": 1.0881669482469092e-05, "loss": 1.0397, "step": 246150 }, { "epoch": 1.5726460779678777, "grad_norm": 1.4430502653121948, "learning_rate": 1.0878544574001492e-05, "loss": 0.8335, "step": 246160 }, { "epoch": 1.5727099651176162, "grad_norm": 1.156911015510559, "learning_rate": 1.087542005952078e-05, "loss": 0.8916, "step": 246170 }, { "epoch": 1.572773852267355, "grad_norm": 4.588479995727539, "learning_rate": 1.0872295939058446e-05, "loss": 0.8711, "step": 246180 }, { "epoch": 1.5728377394170936, "grad_norm": 1.0178959369659424, "learning_rate": 1.0869172212645933e-05, "loss": 0.8555, "step": 246190 }, { "epoch": 1.5729016265668323, "grad_norm": 0.626876175403595, "learning_rate": 1.0866048880314722e-05, "loss": 0.8164, "step": 246200 }, { "epoch": 1.572965513716571, "grad_norm": 1.4186381101608276, "learning_rate": 1.0863238218182126e-05, "loss": 0.7887, "step": 246210 }, { "epoch": 1.5730294008663097, "grad_norm": 1.20530366897583, "learning_rate": 1.0860115634692013e-05, "loss": 1.002, "step": 246220 }, { "epoch": 1.5730932880160484, "grad_norm": 1.250042200088501, "learning_rate": 1.0856993445374386e-05, "loss": 0.738, "step": 246230 }, { "epoch": 1.5731571751657871, "grad_norm": 1.899093508720398, "learning_rate": 1.0853871650260716e-05, "loss": 0.9449, "step": 246240 }, { "epoch": 1.5732210623155258, "grad_norm": 1.0699888467788696, "learning_rate": 1.0850750249382408e-05, "loss": 0.7348, "step": 246250 }, { "epoch": 1.5732849494652645, "grad_norm": 0.7223033308982849, "learning_rate": 1.0847629242770912e-05, "loss": 0.9035, "step": 246260 }, { "epoch": 1.5733488366150032, "grad_norm": 0.8356568813323975, "learning_rate": 1.0844508630457678e-05, "loss": 1.0011, "step": 246270 }, { "epoch": 1.573412723764742, "grad_norm": 0.8588039875030518, "learning_rate": 1.0841388412474101e-05, "loss": 0.6855, "step": 246280 }, { "epoch": 1.5734766109144807, "grad_norm": 0.5876787304878235, "learning_rate": 1.083826858885163e-05, "loss": 0.8988, "step": 246290 }, { "epoch": 1.5735404980642194, "grad_norm": 0.9825795292854309, "learning_rate": 1.0835149159621666e-05, "loss": 0.8273, "step": 246300 }, { "epoch": 1.573604385213958, "grad_norm": 1.5006439685821533, "learning_rate": 1.0832030124815646e-05, "loss": 0.7658, "step": 246310 }, { "epoch": 1.5736682723636968, "grad_norm": 1.415667176246643, "learning_rate": 1.0828911484464954e-05, "loss": 1.0479, "step": 246320 }, { "epoch": 1.5737321595134355, "grad_norm": 1.1721471548080444, "learning_rate": 1.0825793238601017e-05, "loss": 0.8538, "step": 246330 }, { "epoch": 1.5737960466631742, "grad_norm": 1.0684322118759155, "learning_rate": 1.0822675387255227e-05, "loss": 0.7542, "step": 246340 }, { "epoch": 1.573859933812913, "grad_norm": 0.8883429169654846, "learning_rate": 1.0819557930459e-05, "loss": 1.0311, "step": 246350 }, { "epoch": 1.5739238209626516, "grad_norm": 0.8817760944366455, "learning_rate": 1.081644086824371e-05, "loss": 1.0911, "step": 246360 }, { "epoch": 1.5739877081123903, "grad_norm": 0.7657302021980286, "learning_rate": 1.0813324200640768e-05, "loss": 0.9857, "step": 246370 }, { "epoch": 1.574051595262129, "grad_norm": 1.0451817512512207, "learning_rate": 1.0810207927681542e-05, "loss": 0.8334, "step": 246380 }, { "epoch": 1.5741154824118677, "grad_norm": 1.2856969833374023, "learning_rate": 1.0807092049397439e-05, "loss": 0.8962, "step": 246390 }, { "epoch": 1.5741793695616064, "grad_norm": 0.7358013987541199, "learning_rate": 1.0803976565819813e-05, "loss": 0.8894, "step": 246400 }, { "epoch": 1.5742432567113451, "grad_norm": 1.019336462020874, "learning_rate": 1.0800861476980067e-05, "loss": 1.1504, "step": 246410 }, { "epoch": 1.5743071438610838, "grad_norm": 0.748558521270752, "learning_rate": 1.079774678290954e-05, "loss": 0.7269, "step": 246420 }, { "epoch": 1.5743710310108225, "grad_norm": 0.6980547308921814, "learning_rate": 1.0794632483639634e-05, "loss": 0.8777, "step": 246430 }, { "epoch": 1.574434918160561, "grad_norm": 1.1652023792266846, "learning_rate": 1.0791518579201688e-05, "loss": 0.8815, "step": 246440 }, { "epoch": 1.5744988053103, "grad_norm": 0.9284915328025818, "learning_rate": 1.0788405069627072e-05, "loss": 0.717, "step": 246450 }, { "epoch": 1.5745626924600384, "grad_norm": 1.090552806854248, "learning_rate": 1.0785291954947135e-05, "loss": 0.8092, "step": 246460 }, { "epoch": 1.5746265796097774, "grad_norm": 1.166859745979309, "learning_rate": 1.0782179235193229e-05, "loss": 0.6876, "step": 246470 }, { "epoch": 1.5746904667595159, "grad_norm": 0.8611056804656982, "learning_rate": 1.0779066910396724e-05, "loss": 0.9592, "step": 246480 }, { "epoch": 1.5747543539092548, "grad_norm": 0.9985960721969604, "learning_rate": 1.0775954980588932e-05, "loss": 0.7917, "step": 246490 }, { "epoch": 1.5748182410589933, "grad_norm": 0.5808977484703064, "learning_rate": 1.0772843445801216e-05, "loss": 0.7458, "step": 246500 }, { "epoch": 1.5748821282087322, "grad_norm": 1.3468122482299805, "learning_rate": 1.0769732306064895e-05, "loss": 0.8343, "step": 246510 }, { "epoch": 1.5749460153584707, "grad_norm": 0.7967391610145569, "learning_rate": 1.0766621561411317e-05, "loss": 1.0195, "step": 246520 }, { "epoch": 1.5750099025082096, "grad_norm": 1.3037304878234863, "learning_rate": 1.0763511211871791e-05, "loss": 0.8428, "step": 246530 }, { "epoch": 1.575073789657948, "grad_norm": 0.7973846197128296, "learning_rate": 1.0760401257477664e-05, "loss": 0.8917, "step": 246540 }, { "epoch": 1.575137676807687, "grad_norm": 0.8173840641975403, "learning_rate": 1.0757291698260246e-05, "loss": 0.822, "step": 246550 }, { "epoch": 1.5752015639574255, "grad_norm": 0.8736957311630249, "learning_rate": 1.0754182534250851e-05, "loss": 0.9213, "step": 246560 }, { "epoch": 1.5752654511071644, "grad_norm": 0.9506877064704895, "learning_rate": 1.0751073765480773e-05, "loss": 0.9554, "step": 246570 }, { "epoch": 1.575329338256903, "grad_norm": 1.5564134120941162, "learning_rate": 1.074796539198134e-05, "loss": 0.8712, "step": 246580 }, { "epoch": 1.5753932254066418, "grad_norm": 0.5361325144767761, "learning_rate": 1.0744857413783865e-05, "loss": 0.5999, "step": 246590 }, { "epoch": 1.5754571125563803, "grad_norm": 0.8528962731361389, "learning_rate": 1.0741749830919623e-05, "loss": 0.8792, "step": 246600 }, { "epoch": 1.5755209997061193, "grad_norm": 0.9711998701095581, "learning_rate": 1.0738642643419938e-05, "loss": 0.9086, "step": 246610 }, { "epoch": 1.5755848868558577, "grad_norm": 0.8326975107192993, "learning_rate": 1.073553585131607e-05, "loss": 0.7687, "step": 246620 }, { "epoch": 1.5756487740055967, "grad_norm": 0.7956464290618896, "learning_rate": 1.0732429454639342e-05, "loss": 0.6987, "step": 246630 }, { "epoch": 1.5757126611553351, "grad_norm": 1.4720463752746582, "learning_rate": 1.0729323453421008e-05, "loss": 0.7809, "step": 246640 }, { "epoch": 1.575776548305074, "grad_norm": 1.258072018623352, "learning_rate": 1.0726217847692371e-05, "loss": 0.9506, "step": 246650 }, { "epoch": 1.5758404354548126, "grad_norm": 0.9584420323371887, "learning_rate": 1.0723112637484684e-05, "loss": 0.7599, "step": 246660 }, { "epoch": 1.5759043226045513, "grad_norm": 1.1683968305587769, "learning_rate": 1.0720007822829248e-05, "loss": 0.828, "step": 246670 }, { "epoch": 1.57596820975429, "grad_norm": 0.9970899820327759, "learning_rate": 1.07169034037573e-05, "loss": 0.8159, "step": 246680 }, { "epoch": 1.5760320969040287, "grad_norm": 0.9691714644432068, "learning_rate": 1.0713799380300132e-05, "loss": 0.8154, "step": 246690 }, { "epoch": 1.5760959840537674, "grad_norm": 1.2461732625961304, "learning_rate": 1.071069575248898e-05, "loss": 0.9882, "step": 246700 }, { "epoch": 1.576159871203506, "grad_norm": 0.8372601270675659, "learning_rate": 1.0707592520355125e-05, "loss": 0.84, "step": 246710 }, { "epoch": 1.5762237583532448, "grad_norm": 0.7840808033943176, "learning_rate": 1.0704489683929796e-05, "loss": 0.8733, "step": 246720 }, { "epoch": 1.5762876455029835, "grad_norm": 1.0950987339019775, "learning_rate": 1.0701387243244259e-05, "loss": 0.9205, "step": 246730 }, { "epoch": 1.5763515326527222, "grad_norm": 1.576825737953186, "learning_rate": 1.0698285198329744e-05, "loss": 0.8162, "step": 246740 }, { "epoch": 1.576415419802461, "grad_norm": 1.297703742980957, "learning_rate": 1.0695183549217502e-05, "loss": 0.7828, "step": 246750 }, { "epoch": 1.5764793069521996, "grad_norm": 0.9645804762840271, "learning_rate": 1.069208229593876e-05, "loss": 0.9203, "step": 246760 }, { "epoch": 1.5765431941019383, "grad_norm": 0.9418362975120544, "learning_rate": 1.0688981438524764e-05, "loss": 0.9594, "step": 246770 }, { "epoch": 1.576607081251677, "grad_norm": 1.2878425121307373, "learning_rate": 1.0685880977006723e-05, "loss": 0.8661, "step": 246780 }, { "epoch": 1.5766709684014157, "grad_norm": 1.1463112831115723, "learning_rate": 1.0682780911415868e-05, "loss": 0.8811, "step": 246790 }, { "epoch": 1.5767348555511544, "grad_norm": 0.8709837794303894, "learning_rate": 1.067968124178344e-05, "loss": 0.9157, "step": 246800 }, { "epoch": 1.5767987427008932, "grad_norm": 2.789950132369995, "learning_rate": 1.0676581968140625e-05, "loss": 0.8866, "step": 246810 }, { "epoch": 1.5768626298506319, "grad_norm": 0.8597791790962219, "learning_rate": 1.0673483090518666e-05, "loss": 0.7851, "step": 246820 }, { "epoch": 1.5769265170003706, "grad_norm": 1.0854740142822266, "learning_rate": 1.0670384608948737e-05, "loss": 1.0849, "step": 246830 }, { "epoch": 1.5769904041501093, "grad_norm": 1.0474879741668701, "learning_rate": 1.0667286523462072e-05, "loss": 0.826, "step": 246840 }, { "epoch": 1.577054291299848, "grad_norm": 0.9019538164138794, "learning_rate": 1.066418883408985e-05, "loss": 0.8243, "step": 246850 }, { "epoch": 1.5771181784495867, "grad_norm": 0.8639311790466309, "learning_rate": 1.0661091540863289e-05, "loss": 0.7818, "step": 246860 }, { "epoch": 1.5771820655993254, "grad_norm": 1.1420975923538208, "learning_rate": 1.0657994643813557e-05, "loss": 1.1253, "step": 246870 }, { "epoch": 1.577245952749064, "grad_norm": 1.1920766830444336, "learning_rate": 1.0654898142971865e-05, "loss": 0.9389, "step": 246880 }, { "epoch": 1.5773098398988028, "grad_norm": 1.6129494905471802, "learning_rate": 1.0651802038369373e-05, "loss": 0.8958, "step": 246890 }, { "epoch": 1.5773737270485415, "grad_norm": 0.7555667757987976, "learning_rate": 1.0648706330037289e-05, "loss": 0.8478, "step": 246900 }, { "epoch": 1.57743761419828, "grad_norm": 0.8667502403259277, "learning_rate": 1.0645611018006769e-05, "loss": 0.7366, "step": 246910 }, { "epoch": 1.577501501348019, "grad_norm": 1.4878448247909546, "learning_rate": 1.0642516102309002e-05, "loss": 0.8595, "step": 246920 }, { "epoch": 1.5775653884977574, "grad_norm": 0.6253286600112915, "learning_rate": 1.0639421582975128e-05, "loss": 0.8944, "step": 246930 }, { "epoch": 1.5776292756474963, "grad_norm": 0.9675326943397522, "learning_rate": 1.0636327460036349e-05, "loss": 0.681, "step": 246940 }, { "epoch": 1.5776931627972348, "grad_norm": 0.7513461112976074, "learning_rate": 1.0633233733523795e-05, "loss": 1.0764, "step": 246950 }, { "epoch": 1.5777570499469737, "grad_norm": 0.71452796459198, "learning_rate": 1.0630140403468647e-05, "loss": 0.8061, "step": 246960 }, { "epoch": 1.5778209370967122, "grad_norm": 0.8605654835700989, "learning_rate": 1.062704746990203e-05, "loss": 0.7689, "step": 246970 }, { "epoch": 1.5778848242464512, "grad_norm": 0.9058616757392883, "learning_rate": 1.0623954932855108e-05, "loss": 0.8225, "step": 246980 }, { "epoch": 1.5779487113961896, "grad_norm": 0.860543429851532, "learning_rate": 1.0620862792359037e-05, "loss": 0.8649, "step": 246990 }, { "epoch": 1.5780125985459286, "grad_norm": 0.8707665205001831, "learning_rate": 1.0617771048444936e-05, "loss": 0.7123, "step": 247000 }, { "epoch": 1.578076485695667, "grad_norm": 1.5746833086013794, "learning_rate": 1.061467970114396e-05, "loss": 0.8932, "step": 247010 }, { "epoch": 1.578140372845406, "grad_norm": 0.792719304561615, "learning_rate": 1.0611588750487223e-05, "loss": 0.8443, "step": 247020 }, { "epoch": 1.5782042599951445, "grad_norm": 1.7057514190673828, "learning_rate": 1.0608498196505873e-05, "loss": 0.9104, "step": 247030 }, { "epoch": 1.5782681471448834, "grad_norm": 1.1587259769439697, "learning_rate": 1.0605408039231024e-05, "loss": 0.7765, "step": 247040 }, { "epoch": 1.5783320342946219, "grad_norm": 0.7321895360946655, "learning_rate": 1.0602318278693802e-05, "loss": 0.9205, "step": 247050 }, { "epoch": 1.5783959214443608, "grad_norm": 0.8459962010383606, "learning_rate": 1.0599228914925303e-05, "loss": 0.9146, "step": 247060 }, { "epoch": 1.5784598085940993, "grad_norm": 0.8274263143539429, "learning_rate": 1.0596139947956669e-05, "loss": 0.8276, "step": 247070 }, { "epoch": 1.5785236957438382, "grad_norm": 0.9039373397827148, "learning_rate": 1.059305137781898e-05, "loss": 0.9089, "step": 247080 }, { "epoch": 1.5785875828935767, "grad_norm": 1.0288807153701782, "learning_rate": 1.0589963204543368e-05, "loss": 0.7714, "step": 247090 }, { "epoch": 1.5786514700433156, "grad_norm": 0.9122907519340515, "learning_rate": 1.0586875428160908e-05, "loss": 0.5767, "step": 247100 }, { "epoch": 1.5787153571930541, "grad_norm": 1.0185425281524658, "learning_rate": 1.0583788048702715e-05, "loss": 0.9802, "step": 247110 }, { "epoch": 1.578779244342793, "grad_norm": 0.9830597043037415, "learning_rate": 1.0580701066199883e-05, "loss": 0.9354, "step": 247120 }, { "epoch": 1.5788431314925315, "grad_norm": 0.9498816132545471, "learning_rate": 1.0577614480683485e-05, "loss": 0.9159, "step": 247130 }, { "epoch": 1.5789070186422705, "grad_norm": 0.9181402921676636, "learning_rate": 1.0574528292184622e-05, "loss": 0.7249, "step": 247140 }, { "epoch": 1.578970905792009, "grad_norm": 1.1932886838912964, "learning_rate": 1.0571442500734357e-05, "loss": 0.8719, "step": 247150 }, { "epoch": 1.5790347929417476, "grad_norm": 0.7471179962158203, "learning_rate": 1.0568357106363792e-05, "loss": 0.8601, "step": 247160 }, { "epoch": 1.5790986800914864, "grad_norm": 0.9473945498466492, "learning_rate": 1.0565272109103968e-05, "loss": 1.0133, "step": 247170 }, { "epoch": 1.579162567241225, "grad_norm": 1.2394994497299194, "learning_rate": 1.0562187508985987e-05, "loss": 0.8834, "step": 247180 }, { "epoch": 1.5792264543909638, "grad_norm": 1.355468988418579, "learning_rate": 1.055910330604088e-05, "loss": 0.8369, "step": 247190 }, { "epoch": 1.5792903415407025, "grad_norm": 0.8456324338912964, "learning_rate": 1.0556019500299735e-05, "loss": 0.7767, "step": 247200 }, { "epoch": 1.5793542286904412, "grad_norm": 0.6504706740379333, "learning_rate": 1.0552936091793591e-05, "loss": 0.8129, "step": 247210 }, { "epoch": 1.5794181158401799, "grad_norm": 1.120896339416504, "learning_rate": 1.0549853080553513e-05, "loss": 0.8561, "step": 247220 }, { "epoch": 1.5794820029899186, "grad_norm": 0.928348958492279, "learning_rate": 1.0546770466610533e-05, "loss": 1.0094, "step": 247230 }, { "epoch": 1.5795458901396573, "grad_norm": 1.2079272270202637, "learning_rate": 1.054368824999572e-05, "loss": 0.8225, "step": 247240 }, { "epoch": 1.579609777289396, "grad_norm": 1.0350985527038574, "learning_rate": 1.0540606430740091e-05, "loss": 1.0844, "step": 247250 }, { "epoch": 1.5796736644391347, "grad_norm": 0.8831034302711487, "learning_rate": 1.0537525008874705e-05, "loss": 0.9471, "step": 247260 }, { "epoch": 1.5797375515888734, "grad_norm": 0.7610118389129639, "learning_rate": 1.0534443984430564e-05, "loss": 1.0177, "step": 247270 }, { "epoch": 1.5798014387386121, "grad_norm": 1.1378825902938843, "learning_rate": 1.0531363357438728e-05, "loss": 1.037, "step": 247280 }, { "epoch": 1.5798653258883508, "grad_norm": 1.1919612884521484, "learning_rate": 1.0528283127930199e-05, "loss": 0.7967, "step": 247290 }, { "epoch": 1.5799292130380895, "grad_norm": 0.8108116388320923, "learning_rate": 1.0525203295936004e-05, "loss": 0.86, "step": 247300 }, { "epoch": 1.5799931001878282, "grad_norm": 1.2976237535476685, "learning_rate": 1.0522123861487177e-05, "loss": 0.7617, "step": 247310 }, { "epoch": 1.580056987337567, "grad_norm": 1.0517657995224, "learning_rate": 1.0519044824614705e-05, "loss": 0.9928, "step": 247320 }, { "epoch": 1.5801208744873056, "grad_norm": 1.2619192600250244, "learning_rate": 1.0515966185349612e-05, "loss": 0.9209, "step": 247330 }, { "epoch": 1.5801847616370444, "grad_norm": 0.7939496040344238, "learning_rate": 1.0512887943722893e-05, "loss": 0.7354, "step": 247340 }, { "epoch": 1.580248648786783, "grad_norm": 1.7504665851593018, "learning_rate": 1.0509810099765566e-05, "loss": 0.9486, "step": 247350 }, { "epoch": 1.5803125359365218, "grad_norm": 0.9001903533935547, "learning_rate": 1.05067326535086e-05, "loss": 1.0617, "step": 247360 }, { "epoch": 1.5803764230862605, "grad_norm": 1.1803821325302124, "learning_rate": 1.0503655604983021e-05, "loss": 1.0478, "step": 247370 }, { "epoch": 1.5804403102359992, "grad_norm": 0.8175578117370605, "learning_rate": 1.050057895421978e-05, "loss": 0.9052, "step": 247380 }, { "epoch": 1.5805041973857379, "grad_norm": 0.7418546080589294, "learning_rate": 1.0497502701249894e-05, "loss": 0.7204, "step": 247390 }, { "epoch": 1.5805680845354764, "grad_norm": 0.9057771563529968, "learning_rate": 1.0494426846104321e-05, "loss": 0.6815, "step": 247400 }, { "epoch": 1.5806319716852153, "grad_norm": 1.755724549293518, "learning_rate": 1.0491351388814057e-05, "loss": 0.8857, "step": 247410 }, { "epoch": 1.5806958588349538, "grad_norm": 1.293442964553833, "learning_rate": 1.0488276329410051e-05, "loss": 0.7786, "step": 247420 }, { "epoch": 1.5807597459846927, "grad_norm": 1.0106276273727417, "learning_rate": 1.0485201667923301e-05, "loss": 1.021, "step": 247430 }, { "epoch": 1.5808236331344312, "grad_norm": 1.364980936050415, "learning_rate": 1.048212740438474e-05, "loss": 0.8366, "step": 247440 }, { "epoch": 1.5808875202841701, "grad_norm": 1.1519886255264282, "learning_rate": 1.0479053538825357e-05, "loss": 0.6659, "step": 247450 }, { "epoch": 1.5809514074339086, "grad_norm": 0.8733057379722595, "learning_rate": 1.047598007127608e-05, "loss": 0.8836, "step": 247460 }, { "epoch": 1.5810152945836475, "grad_norm": 0.9156480431556702, "learning_rate": 1.0472907001767895e-05, "loss": 1.1012, "step": 247470 }, { "epoch": 1.581079181733386, "grad_norm": 0.7186042070388794, "learning_rate": 1.0469834330331718e-05, "loss": 0.6984, "step": 247480 }, { "epoch": 1.581143068883125, "grad_norm": 1.4364821910858154, "learning_rate": 1.046676205699852e-05, "loss": 0.8082, "step": 247490 }, { "epoch": 1.5812069560328634, "grad_norm": 1.1890616416931152, "learning_rate": 1.0463690181799212e-05, "loss": 0.8385, "step": 247500 }, { "epoch": 1.5812708431826024, "grad_norm": 1.5968211889266968, "learning_rate": 1.0460618704764752e-05, "loss": 1.0133, "step": 247510 }, { "epoch": 1.5813347303323408, "grad_norm": 0.9693719744682312, "learning_rate": 1.0457547625926096e-05, "loss": 0.7946, "step": 247520 }, { "epoch": 1.5813986174820798, "grad_norm": 1.1753880977630615, "learning_rate": 1.0454476945314113e-05, "loss": 0.878, "step": 247530 }, { "epoch": 1.5814625046318183, "grad_norm": 1.0377428531646729, "learning_rate": 1.0451406662959778e-05, "loss": 0.8611, "step": 247540 }, { "epoch": 1.5815263917815572, "grad_norm": 1.8484477996826172, "learning_rate": 1.0448336778893974e-05, "loss": 0.9474, "step": 247550 }, { "epoch": 1.5815902789312957, "grad_norm": 0.8086001873016357, "learning_rate": 1.0445267293147654e-05, "loss": 0.7801, "step": 247560 }, { "epoch": 1.5816541660810346, "grad_norm": 0.9309337139129639, "learning_rate": 1.0442198205751692e-05, "loss": 0.9321, "step": 247570 }, { "epoch": 1.581718053230773, "grad_norm": 0.783740758895874, "learning_rate": 1.0439129516737034e-05, "loss": 0.8634, "step": 247580 }, { "epoch": 1.581781940380512, "grad_norm": 0.7251185178756714, "learning_rate": 1.0436061226134553e-05, "loss": 0.8588, "step": 247590 }, { "epoch": 1.5818458275302505, "grad_norm": 0.8295360803604126, "learning_rate": 1.0432993333975176e-05, "loss": 0.9542, "step": 247600 }, { "epoch": 1.5819097146799894, "grad_norm": 0.7831049561500549, "learning_rate": 1.0429925840289772e-05, "loss": 0.7736, "step": 247610 }, { "epoch": 1.581973601829728, "grad_norm": 0.6984795928001404, "learning_rate": 1.0426858745109263e-05, "loss": 1.1484, "step": 247620 }, { "epoch": 1.5820374889794668, "grad_norm": 1.2676637172698975, "learning_rate": 1.042379204846451e-05, "loss": 1.0692, "step": 247630 }, { "epoch": 1.5821013761292053, "grad_norm": 0.7221893072128296, "learning_rate": 1.0420725750386407e-05, "loss": 0.6549, "step": 247640 }, { "epoch": 1.582165263278944, "grad_norm": 1.0046474933624268, "learning_rate": 1.0417659850905853e-05, "loss": 0.8093, "step": 247650 }, { "epoch": 1.5822291504286827, "grad_norm": 0.6917647123336792, "learning_rate": 1.0414594350053691e-05, "loss": 0.8005, "step": 247660 }, { "epoch": 1.5822930375784214, "grad_norm": 1.201385259628296, "learning_rate": 1.0411529247860824e-05, "loss": 0.98, "step": 247670 }, { "epoch": 1.5823569247281601, "grad_norm": 0.5854590535163879, "learning_rate": 1.0408464544358094e-05, "loss": 1.0863, "step": 247680 }, { "epoch": 1.5824208118778988, "grad_norm": 0.9084241986274719, "learning_rate": 1.040540023957639e-05, "loss": 0.8678, "step": 247690 }, { "epoch": 1.5824846990276376, "grad_norm": 0.6541767716407776, "learning_rate": 1.0402336333546547e-05, "loss": 0.9901, "step": 247700 }, { "epoch": 1.5825485861773763, "grad_norm": 1.1705743074417114, "learning_rate": 1.039927282629945e-05, "loss": 1.1037, "step": 247710 }, { "epoch": 1.582612473327115, "grad_norm": 0.9867904782295227, "learning_rate": 1.0396209717865918e-05, "loss": 1.0031, "step": 247720 }, { "epoch": 1.5826763604768537, "grad_norm": 0.9883776903152466, "learning_rate": 1.0393147008276832e-05, "loss": 0.9994, "step": 247730 }, { "epoch": 1.5827402476265924, "grad_norm": 1.223418116569519, "learning_rate": 1.0390084697563008e-05, "loss": 0.9183, "step": 247740 }, { "epoch": 1.582804134776331, "grad_norm": 1.9032589197158813, "learning_rate": 1.0387022785755307e-05, "loss": 0.8064, "step": 247750 }, { "epoch": 1.5828680219260698, "grad_norm": 0.8207881450653076, "learning_rate": 1.0383961272884546e-05, "loss": 0.8457, "step": 247760 }, { "epoch": 1.5829319090758085, "grad_norm": 1.2639472484588623, "learning_rate": 1.0380900158981583e-05, "loss": 0.8783, "step": 247770 }, { "epoch": 1.5829957962255472, "grad_norm": 0.9134830832481384, "learning_rate": 1.0377839444077215e-05, "loss": 0.7592, "step": 247780 }, { "epoch": 1.583059683375286, "grad_norm": 1.3179031610488892, "learning_rate": 1.0374779128202294e-05, "loss": 0.8649, "step": 247790 }, { "epoch": 1.5831235705250246, "grad_norm": 0.8579550981521606, "learning_rate": 1.037171921138762e-05, "loss": 1.1334, "step": 247800 }, { "epoch": 1.5831874576747633, "grad_norm": 0.6448858976364136, "learning_rate": 1.0368659693664023e-05, "loss": 0.9309, "step": 247810 }, { "epoch": 1.583251344824502, "grad_norm": 0.9539541602134705, "learning_rate": 1.03656005750623e-05, "loss": 0.8983, "step": 247820 }, { "epoch": 1.5833152319742407, "grad_norm": 1.5543828010559082, "learning_rate": 1.0362541855613267e-05, "loss": 0.9587, "step": 247830 }, { "epoch": 1.5833791191239794, "grad_norm": 1.0303435325622559, "learning_rate": 1.0359483535347742e-05, "loss": 1.2866, "step": 247840 }, { "epoch": 1.5834430062737181, "grad_norm": 1.1230798959732056, "learning_rate": 1.03564256142965e-05, "loss": 0.8112, "step": 247850 }, { "epoch": 1.5835068934234569, "grad_norm": 1.0403016805648804, "learning_rate": 1.0353368092490362e-05, "loss": 0.7441, "step": 247860 }, { "epoch": 1.5835707805731956, "grad_norm": 1.1610591411590576, "learning_rate": 1.0350310969960092e-05, "loss": 0.8292, "step": 247870 }, { "epoch": 1.5836346677229343, "grad_norm": 0.8638874292373657, "learning_rate": 1.0347254246736504e-05, "loss": 1.0327, "step": 247880 }, { "epoch": 1.5836985548726727, "grad_norm": 0.9477463364601135, "learning_rate": 1.0344197922850357e-05, "loss": 0.9738, "step": 247890 }, { "epoch": 1.5837624420224117, "grad_norm": 0.8979694247245789, "learning_rate": 1.034114199833246e-05, "loss": 0.8999, "step": 247900 }, { "epoch": 1.5838263291721502, "grad_norm": 3.237966299057007, "learning_rate": 1.033808647321356e-05, "loss": 0.7072, "step": 247910 }, { "epoch": 1.583890216321889, "grad_norm": 1.0422779321670532, "learning_rate": 1.0335031347524454e-05, "loss": 0.6422, "step": 247920 }, { "epoch": 1.5839541034716276, "grad_norm": 0.9260254502296448, "learning_rate": 1.033197662129588e-05, "loss": 0.9527, "step": 247930 }, { "epoch": 1.5840179906213665, "grad_norm": 3.0579593181610107, "learning_rate": 1.032892229455864e-05, "loss": 1.086, "step": 247940 }, { "epoch": 1.584081877771105, "grad_norm": 1.0561027526855469, "learning_rate": 1.0325868367343456e-05, "loss": 0.8016, "step": 247950 }, { "epoch": 1.584145764920844, "grad_norm": 0.6812159419059753, "learning_rate": 1.032281483968111e-05, "loss": 0.71, "step": 247960 }, { "epoch": 1.5842096520705824, "grad_norm": 1.056764841079712, "learning_rate": 1.0319761711602332e-05, "loss": 1.2764, "step": 247970 }, { "epoch": 1.5842735392203213, "grad_norm": 0.7221099734306335, "learning_rate": 1.0316708983137902e-05, "loss": 0.8135, "step": 247980 }, { "epoch": 1.5843374263700598, "grad_norm": 0.838124692440033, "learning_rate": 1.031365665431852e-05, "loss": 1.0928, "step": 247990 }, { "epoch": 1.5844013135197987, "grad_norm": 0.7449299693107605, "learning_rate": 1.0310604725174971e-05, "loss": 0.9119, "step": 248000 }, { "epoch": 1.5844652006695372, "grad_norm": 1.1632795333862305, "learning_rate": 1.0307553195737968e-05, "loss": 0.841, "step": 248010 }, { "epoch": 1.5845290878192761, "grad_norm": 1.0129088163375854, "learning_rate": 1.0304502066038224e-05, "loss": 0.9286, "step": 248020 }, { "epoch": 1.5845929749690146, "grad_norm": 0.9895704388618469, "learning_rate": 1.0301451336106504e-05, "loss": 1.1631, "step": 248030 }, { "epoch": 1.5846568621187536, "grad_norm": 1.057637095451355, "learning_rate": 1.0298401005973502e-05, "loss": 0.79, "step": 248040 }, { "epoch": 1.584720749268492, "grad_norm": 1.1140066385269165, "learning_rate": 1.0295351075669957e-05, "loss": 0.5707, "step": 248050 }, { "epoch": 1.584784636418231, "grad_norm": 1.073408842086792, "learning_rate": 1.0292301545226562e-05, "loss": 0.8806, "step": 248060 }, { "epoch": 1.5848485235679695, "grad_norm": 0.9552643299102783, "learning_rate": 1.0289252414674055e-05, "loss": 0.9322, "step": 248070 }, { "epoch": 1.5849124107177084, "grad_norm": 0.6368883848190308, "learning_rate": 1.0286203684043122e-05, "loss": 0.9138, "step": 248080 }, { "epoch": 1.5849762978674469, "grad_norm": 1.0097981691360474, "learning_rate": 1.0283155353364487e-05, "loss": 0.8904, "step": 248090 }, { "epoch": 1.5850401850171858, "grad_norm": 0.7692947387695312, "learning_rate": 1.0280107422668822e-05, "loss": 0.97, "step": 248100 }, { "epoch": 1.5851040721669243, "grad_norm": 0.7867854237556458, "learning_rate": 1.0277059891986856e-05, "loss": 0.778, "step": 248110 }, { "epoch": 1.5851679593166632, "grad_norm": 0.7965070009231567, "learning_rate": 1.0274012761349244e-05, "loss": 0.8268, "step": 248120 }, { "epoch": 1.5852318464664017, "grad_norm": 1.3041713237762451, "learning_rate": 1.0270966030786711e-05, "loss": 1.1806, "step": 248130 }, { "epoch": 1.5852957336161404, "grad_norm": 0.7487338781356812, "learning_rate": 1.0267919700329903e-05, "loss": 0.8568, "step": 248140 }, { "epoch": 1.585359620765879, "grad_norm": 5.159639358520508, "learning_rate": 1.0264873770009514e-05, "loss": 0.8124, "step": 248150 }, { "epoch": 1.5854235079156178, "grad_norm": 0.80680251121521, "learning_rate": 1.0261828239856241e-05, "loss": 0.8401, "step": 248160 }, { "epoch": 1.5854873950653565, "grad_norm": 1.3771222829818726, "learning_rate": 1.0258783109900717e-05, "loss": 0.857, "step": 248170 }, { "epoch": 1.5855512822150952, "grad_norm": 0.7607705593109131, "learning_rate": 1.0255738380173647e-05, "loss": 1.0143, "step": 248180 }, { "epoch": 1.585615169364834, "grad_norm": 0.9139487147331238, "learning_rate": 1.0252694050705658e-05, "loss": 0.888, "step": 248190 }, { "epoch": 1.5856790565145726, "grad_norm": 1.1925636529922485, "learning_rate": 1.0249650121527443e-05, "loss": 0.8567, "step": 248200 }, { "epoch": 1.5857429436643113, "grad_norm": 1.0441328287124634, "learning_rate": 1.0246606592669622e-05, "loss": 0.9031, "step": 248210 }, { "epoch": 1.58580683081405, "grad_norm": 0.8782601356506348, "learning_rate": 1.0243563464162881e-05, "loss": 1.0137, "step": 248220 }, { "epoch": 1.5858707179637888, "grad_norm": 1.2816804647445679, "learning_rate": 1.0240520736037834e-05, "loss": 0.8509, "step": 248230 }, { "epoch": 1.5859346051135275, "grad_norm": 0.9288935661315918, "learning_rate": 1.0237478408325158e-05, "loss": 0.9061, "step": 248240 }, { "epoch": 1.5859984922632662, "grad_norm": 0.8910688757896423, "learning_rate": 1.0234436481055454e-05, "loss": 0.9472, "step": 248250 }, { "epoch": 1.5860623794130049, "grad_norm": 0.9356805682182312, "learning_rate": 1.0231394954259394e-05, "loss": 0.9874, "step": 248260 }, { "epoch": 1.5861262665627436, "grad_norm": 1.12856924533844, "learning_rate": 1.0228353827967579e-05, "loss": 0.9618, "step": 248270 }, { "epoch": 1.5861901537124823, "grad_norm": 1.6523456573486328, "learning_rate": 1.022531310221066e-05, "loss": 0.881, "step": 248280 }, { "epoch": 1.586254040862221, "grad_norm": 1.1004304885864258, "learning_rate": 1.0222272777019237e-05, "loss": 0.8419, "step": 248290 }, { "epoch": 1.5863179280119597, "grad_norm": 0.7944563031196594, "learning_rate": 1.0219232852423949e-05, "loss": 0.7764, "step": 248300 }, { "epoch": 1.5863818151616984, "grad_norm": 1.085784673690796, "learning_rate": 1.0216193328455392e-05, "loss": 0.9209, "step": 248310 }, { "epoch": 1.5864457023114371, "grad_norm": 0.8792476654052734, "learning_rate": 1.0213154205144199e-05, "loss": 0.8064, "step": 248320 }, { "epoch": 1.5865095894611758, "grad_norm": 0.6366348266601562, "learning_rate": 1.0210115482520948e-05, "loss": 0.7487, "step": 248330 }, { "epoch": 1.5865734766109145, "grad_norm": 0.8250417113304138, "learning_rate": 1.0207077160616274e-05, "loss": 0.8573, "step": 248340 }, { "epoch": 1.5866373637606532, "grad_norm": 0.9031588435173035, "learning_rate": 1.0204039239460745e-05, "loss": 0.9037, "step": 248350 }, { "epoch": 1.586701250910392, "grad_norm": 0.7464722394943237, "learning_rate": 1.0201001719084969e-05, "loss": 0.8245, "step": 248360 }, { "epoch": 1.5867651380601306, "grad_norm": 1.6804680824279785, "learning_rate": 1.0197964599519543e-05, "loss": 0.838, "step": 248370 }, { "epoch": 1.5868290252098691, "grad_norm": 0.8236187100410461, "learning_rate": 1.019492788079504e-05, "loss": 1.0602, "step": 248380 }, { "epoch": 1.586892912359608, "grad_norm": 1.974440574645996, "learning_rate": 1.0191891562942063e-05, "loss": 0.7904, "step": 248390 }, { "epoch": 1.5869567995093465, "grad_norm": 1.6416873931884766, "learning_rate": 1.0188855645991163e-05, "loss": 1.0055, "step": 248400 }, { "epoch": 1.5870206866590855, "grad_norm": 0.9831100702285767, "learning_rate": 1.0185820129972945e-05, "loss": 0.7863, "step": 248410 }, { "epoch": 1.587084573808824, "grad_norm": 0.891497790813446, "learning_rate": 1.0182785014917945e-05, "loss": 0.7261, "step": 248420 }, { "epoch": 1.5871484609585629, "grad_norm": 0.8720300793647766, "learning_rate": 1.017975030085676e-05, "loss": 0.8513, "step": 248430 }, { "epoch": 1.5872123481083014, "grad_norm": 0.6937757730484009, "learning_rate": 1.0176715987819923e-05, "loss": 0.7849, "step": 248440 }, { "epoch": 1.5872762352580403, "grad_norm": 1.342638611793518, "learning_rate": 1.0173682075838026e-05, "loss": 0.7524, "step": 248450 }, { "epoch": 1.5873401224077788, "grad_norm": 0.6457897424697876, "learning_rate": 1.0170648564941592e-05, "loss": 0.9541, "step": 248460 }, { "epoch": 1.5874040095575177, "grad_norm": 0.9868115782737732, "learning_rate": 1.0167615455161194e-05, "loss": 1.0407, "step": 248470 }, { "epoch": 1.5874678967072562, "grad_norm": 1.2257436513900757, "learning_rate": 1.0164582746527357e-05, "loss": 0.944, "step": 248480 }, { "epoch": 1.5875317838569951, "grad_norm": 1.2771724462509155, "learning_rate": 1.0161550439070649e-05, "loss": 0.9378, "step": 248490 }, { "epoch": 1.5875956710067336, "grad_norm": 0.47110313177108765, "learning_rate": 1.0158518532821593e-05, "loss": 0.9029, "step": 248500 }, { "epoch": 1.5876595581564725, "grad_norm": 1.252586841583252, "learning_rate": 1.0155487027810706e-05, "loss": 0.8562, "step": 248510 }, { "epoch": 1.587723445306211, "grad_norm": 0.7602173089981079, "learning_rate": 1.0152455924068549e-05, "loss": 0.7778, "step": 248520 }, { "epoch": 1.58778733245595, "grad_norm": 0.8817898631095886, "learning_rate": 1.0149425221625625e-05, "loss": 0.982, "step": 248530 }, { "epoch": 1.5878512196056884, "grad_norm": 3.27030873298645, "learning_rate": 1.0146394920512475e-05, "loss": 0.6923, "step": 248540 }, { "epoch": 1.5879151067554274, "grad_norm": 1.6652390956878662, "learning_rate": 1.0143365020759593e-05, "loss": 0.84, "step": 248550 }, { "epoch": 1.5879789939051658, "grad_norm": 0.7446771860122681, "learning_rate": 1.014033552239752e-05, "loss": 1.0318, "step": 248560 }, { "epoch": 1.5880428810549048, "grad_norm": 0.9068264961242676, "learning_rate": 1.0137306425456738e-05, "loss": 0.9516, "step": 248570 }, { "epoch": 1.5881067682046432, "grad_norm": 1.3777496814727783, "learning_rate": 1.0134277729967784e-05, "loss": 0.7882, "step": 248580 }, { "epoch": 1.5881706553543822, "grad_norm": 1.0046823024749756, "learning_rate": 1.013124943596112e-05, "loss": 0.981, "step": 248590 }, { "epoch": 1.5882345425041207, "grad_norm": 1.1702842712402344, "learning_rate": 1.0128221543467288e-05, "loss": 0.6711, "step": 248600 }, { "epoch": 1.5882984296538596, "grad_norm": 0.9184240102767944, "learning_rate": 1.012519405251674e-05, "loss": 0.6594, "step": 248610 }, { "epoch": 1.588362316803598, "grad_norm": 1.1504337787628174, "learning_rate": 1.012216696314e-05, "loss": 0.8985, "step": 248620 }, { "epoch": 1.5884262039533368, "grad_norm": 1.0141348838806152, "learning_rate": 1.0119140275367522e-05, "loss": 1.0221, "step": 248630 }, { "epoch": 1.5884900911030755, "grad_norm": 0.9321434497833252, "learning_rate": 1.011611398922982e-05, "loss": 0.9214, "step": 248640 }, { "epoch": 1.5885539782528142, "grad_norm": 0.8669467568397522, "learning_rate": 1.011308810475734e-05, "loss": 1.0403, "step": 248650 }, { "epoch": 1.588617865402553, "grad_norm": 0.9638845920562744, "learning_rate": 1.0110062621980587e-05, "loss": 0.7519, "step": 248660 }, { "epoch": 1.5886817525522916, "grad_norm": 0.9903292655944824, "learning_rate": 1.0107037540929992e-05, "loss": 0.7911, "step": 248670 }, { "epoch": 1.5887456397020303, "grad_norm": 0.8706985116004944, "learning_rate": 1.0104012861636047e-05, "loss": 0.7986, "step": 248680 }, { "epoch": 1.588809526851769, "grad_norm": 0.6027273535728455, "learning_rate": 1.0100988584129217e-05, "loss": 0.7547, "step": 248690 }, { "epoch": 1.5888734140015077, "grad_norm": 0.8384061455726624, "learning_rate": 1.0097964708439938e-05, "loss": 0.9898, "step": 248700 }, { "epoch": 1.5889373011512464, "grad_norm": 0.747555136680603, "learning_rate": 1.0094941234598693e-05, "loss": 0.813, "step": 248710 }, { "epoch": 1.5890011883009851, "grad_norm": 0.9358962774276733, "learning_rate": 1.0091918162635894e-05, "loss": 0.6808, "step": 248720 }, { "epoch": 1.5890650754507238, "grad_norm": 1.0704421997070312, "learning_rate": 1.0088895492582013e-05, "loss": 0.9423, "step": 248730 }, { "epoch": 1.5891289626004625, "grad_norm": 0.8794128894805908, "learning_rate": 1.0085873224467479e-05, "loss": 0.7343, "step": 248740 }, { "epoch": 1.5891928497502013, "grad_norm": 1.1814675331115723, "learning_rate": 1.0082851358322737e-05, "loss": 0.933, "step": 248750 }, { "epoch": 1.58925673689994, "grad_norm": 0.7328624725341797, "learning_rate": 1.0079829894178205e-05, "loss": 0.8094, "step": 248760 }, { "epoch": 1.5893206240496787, "grad_norm": 0.7021011114120483, "learning_rate": 1.0076808832064339e-05, "loss": 0.8035, "step": 248770 }, { "epoch": 1.5893845111994174, "grad_norm": 0.8993996381759644, "learning_rate": 1.0073788172011528e-05, "loss": 0.6404, "step": 248780 }, { "epoch": 1.589448398349156, "grad_norm": 0.9888026714324951, "learning_rate": 1.0070767914050228e-05, "loss": 0.6814, "step": 248790 }, { "epoch": 1.5895122854988948, "grad_norm": 1.5480871200561523, "learning_rate": 1.0067748058210825e-05, "loss": 0.9038, "step": 248800 }, { "epoch": 1.5895761726486335, "grad_norm": 0.9566226005554199, "learning_rate": 1.0064728604523754e-05, "loss": 0.7915, "step": 248810 }, { "epoch": 1.5896400597983722, "grad_norm": 1.3926703929901123, "learning_rate": 1.0061709553019406e-05, "loss": 0.9904, "step": 248820 }, { "epoch": 1.589703946948111, "grad_norm": 1.0358089208602905, "learning_rate": 1.0058690903728207e-05, "loss": 0.8774, "step": 248830 }, { "epoch": 1.5897678340978496, "grad_norm": 1.1436970233917236, "learning_rate": 1.0055672656680532e-05, "loss": 0.8088, "step": 248840 }, { "epoch": 1.5898317212475883, "grad_norm": 2.777813673019409, "learning_rate": 1.0052654811906803e-05, "loss": 1.1499, "step": 248850 }, { "epoch": 1.589895608397327, "grad_norm": 1.3283435106277466, "learning_rate": 1.0049637369437386e-05, "loss": 0.9473, "step": 248860 }, { "epoch": 1.5899594955470655, "grad_norm": 1.2573764324188232, "learning_rate": 1.0046620329302692e-05, "loss": 0.7271, "step": 248870 }, { "epoch": 1.5900233826968044, "grad_norm": 1.3681310415267944, "learning_rate": 1.0043603691533088e-05, "loss": 0.9251, "step": 248880 }, { "epoch": 1.590087269846543, "grad_norm": 0.8003191351890564, "learning_rate": 1.0040587456158961e-05, "loss": 0.7029, "step": 248890 }, { "epoch": 1.5901511569962818, "grad_norm": 0.7895790934562683, "learning_rate": 1.00375716232107e-05, "loss": 0.7636, "step": 248900 }, { "epoch": 1.5902150441460203, "grad_norm": 0.6591528058052063, "learning_rate": 1.0034556192718652e-05, "loss": 0.6429, "step": 248910 }, { "epoch": 1.5902789312957593, "grad_norm": 0.8874780535697937, "learning_rate": 1.0031541164713215e-05, "loss": 0.9603, "step": 248920 }, { "epoch": 1.5903428184454977, "grad_norm": 1.5267889499664307, "learning_rate": 1.002852653922472e-05, "loss": 0.6381, "step": 248930 }, { "epoch": 1.5904067055952367, "grad_norm": 1.887830138206482, "learning_rate": 1.0025512316283553e-05, "loss": 0.825, "step": 248940 }, { "epoch": 1.5904705927449752, "grad_norm": 0.8215576410293579, "learning_rate": 1.0022498495920053e-05, "loss": 1.2069, "step": 248950 }, { "epoch": 1.590534479894714, "grad_norm": 1.7051416635513306, "learning_rate": 1.0019485078164587e-05, "loss": 0.9801, "step": 248960 }, { "epoch": 1.5905983670444526, "grad_norm": 1.316051721572876, "learning_rate": 1.0016472063047483e-05, "loss": 0.7481, "step": 248970 }, { "epoch": 1.5906622541941915, "grad_norm": 0.9726854562759399, "learning_rate": 1.0013459450599122e-05, "loss": 0.9108, "step": 248980 }, { "epoch": 1.59072614134393, "grad_norm": 1.443015217781067, "learning_rate": 1.001044724084979e-05, "loss": 0.8269, "step": 248990 }, { "epoch": 1.590790028493669, "grad_norm": 1.5187175273895264, "learning_rate": 1.0007435433829854e-05, "loss": 0.8788, "step": 249000 }, { "epoch": 1.5908539156434074, "grad_norm": 0.8051071763038635, "learning_rate": 1.0004424029569653e-05, "loss": 0.899, "step": 249010 }, { "epoch": 1.5909178027931463, "grad_norm": 1.0638450384140015, "learning_rate": 1.0001413028099487e-05, "loss": 0.9527, "step": 249020 }, { "epoch": 1.5909816899428848, "grad_norm": 1.3071459531784058, "learning_rate": 9.99840242944971e-06, "loss": 0.8981, "step": 249030 }, { "epoch": 1.5910455770926237, "grad_norm": 0.8064150214195251, "learning_rate": 9.995392233650614e-06, "loss": 0.8127, "step": 249040 }, { "epoch": 1.5911094642423622, "grad_norm": 1.0664138793945312, "learning_rate": 9.992382440732546e-06, "loss": 0.7089, "step": 249050 }, { "epoch": 1.5911733513921011, "grad_norm": 0.6624587178230286, "learning_rate": 9.989373050725775e-06, "loss": 0.7468, "step": 249060 }, { "epoch": 1.5912372385418396, "grad_norm": 1.153746485710144, "learning_rate": 9.986364063660653e-06, "loss": 0.7196, "step": 249070 }, { "epoch": 1.5913011256915786, "grad_norm": 1.2669557332992554, "learning_rate": 9.983355479567446e-06, "loss": 0.8815, "step": 249080 }, { "epoch": 1.591365012841317, "grad_norm": 0.9664118885993958, "learning_rate": 9.980347298476477e-06, "loss": 1.0083, "step": 249090 }, { "epoch": 1.5914288999910557, "grad_norm": 1.1675530672073364, "learning_rate": 9.977339520418027e-06, "loss": 0.7832, "step": 249100 }, { "epoch": 1.5914927871407945, "grad_norm": 0.9785093069076538, "learning_rate": 9.974332145422399e-06, "loss": 0.9357, "step": 249110 }, { "epoch": 1.5915566742905332, "grad_norm": 1.8464292287826538, "learning_rate": 9.971325173519863e-06, "loss": 1.0924, "step": 249120 }, { "epoch": 1.5916205614402719, "grad_norm": 1.0116223096847534, "learning_rate": 9.968318604740728e-06, "loss": 0.7755, "step": 249130 }, { "epoch": 1.5916844485900106, "grad_norm": 0.5784453749656677, "learning_rate": 9.965312439115243e-06, "loss": 0.7531, "step": 249140 }, { "epoch": 1.5917483357397493, "grad_norm": 0.9767338633537292, "learning_rate": 9.962306676673711e-06, "loss": 0.9978, "step": 249150 }, { "epoch": 1.591812222889488, "grad_norm": 0.7933934926986694, "learning_rate": 9.959301317446369e-06, "loss": 0.927, "step": 249160 }, { "epoch": 1.5918761100392267, "grad_norm": 2.1215498447418213, "learning_rate": 9.956296361463518e-06, "loss": 0.8457, "step": 249170 }, { "epoch": 1.5919399971889654, "grad_norm": 0.5488957166671753, "learning_rate": 9.953291808755393e-06, "loss": 0.8871, "step": 249180 }, { "epoch": 1.592003884338704, "grad_norm": 1.6344760656356812, "learning_rate": 9.95028765935228e-06, "loss": 0.8525, "step": 249190 }, { "epoch": 1.5920677714884428, "grad_norm": 1.200919508934021, "learning_rate": 9.947584269740252e-06, "loss": 0.7876, "step": 249200 }, { "epoch": 1.5921316586381815, "grad_norm": 1.2127763032913208, "learning_rate": 9.944580886699966e-06, "loss": 0.8877, "step": 249210 }, { "epoch": 1.5921955457879202, "grad_norm": 0.837215781211853, "learning_rate": 9.941577907052413e-06, "loss": 1.0373, "step": 249220 }, { "epoch": 1.592259432937659, "grad_norm": 1.1384263038635254, "learning_rate": 9.938575330827821e-06, "loss": 0.864, "step": 249230 }, { "epoch": 1.5923233200873976, "grad_norm": 0.8473384380340576, "learning_rate": 9.935573158056437e-06, "loss": 0.898, "step": 249240 }, { "epoch": 1.5923872072371363, "grad_norm": 2.536351442337036, "learning_rate": 9.932571388768486e-06, "loss": 0.8477, "step": 249250 }, { "epoch": 1.592451094386875, "grad_norm": 0.7730850577354431, "learning_rate": 9.929570022994217e-06, "loss": 0.9414, "step": 249260 }, { "epoch": 1.5925149815366137, "grad_norm": 0.5718420743942261, "learning_rate": 9.926569060763829e-06, "loss": 0.8113, "step": 249270 }, { "epoch": 1.5925788686863525, "grad_norm": 1.1947507858276367, "learning_rate": 9.923568502107572e-06, "loss": 0.8983, "step": 249280 }, { "epoch": 1.5926427558360912, "grad_norm": 1.237534999847412, "learning_rate": 9.920568347055637e-06, "loss": 0.9349, "step": 249290 }, { "epoch": 1.5927066429858299, "grad_norm": 0.8614002466201782, "learning_rate": 9.917568595638265e-06, "loss": 0.9377, "step": 249300 }, { "epoch": 1.5927705301355686, "grad_norm": 0.7836909890174866, "learning_rate": 9.914569247885647e-06, "loss": 0.9604, "step": 249310 }, { "epoch": 1.5928344172853073, "grad_norm": 0.7826888561248779, "learning_rate": 9.911570303827999e-06, "loss": 0.8028, "step": 249320 }, { "epoch": 1.592898304435046, "grad_norm": 1.3262845277786255, "learning_rate": 9.908571763495516e-06, "loss": 0.8967, "step": 249330 }, { "epoch": 1.5929621915847845, "grad_norm": 1.4745041131973267, "learning_rate": 9.905573626918392e-06, "loss": 0.9859, "step": 249340 }, { "epoch": 1.5930260787345234, "grad_norm": 1.885468602180481, "learning_rate": 9.902575894126841e-06, "loss": 0.8534, "step": 249350 }, { "epoch": 1.5930899658842619, "grad_norm": 0.608864426612854, "learning_rate": 9.899578565151035e-06, "loss": 0.7751, "step": 249360 }, { "epoch": 1.5931538530340008, "grad_norm": 0.8991163372993469, "learning_rate": 9.896581640021169e-06, "loss": 0.9384, "step": 249370 }, { "epoch": 1.5932177401837393, "grad_norm": 1.1069456338882446, "learning_rate": 9.893585118767413e-06, "loss": 0.7718, "step": 249380 }, { "epoch": 1.5932816273334782, "grad_norm": 0.7635006904602051, "learning_rate": 9.890589001419959e-06, "loss": 0.8445, "step": 249390 }, { "epoch": 1.5933455144832167, "grad_norm": 0.6687338948249817, "learning_rate": 9.887593288008967e-06, "loss": 0.9449, "step": 249400 }, { "epoch": 1.5934094016329556, "grad_norm": 1.5973081588745117, "learning_rate": 9.884597978564625e-06, "loss": 0.8369, "step": 249410 }, { "epoch": 1.5934732887826941, "grad_norm": 1.0921237468719482, "learning_rate": 9.881603073117068e-06, "loss": 0.8445, "step": 249420 }, { "epoch": 1.593537175932433, "grad_norm": 0.813981831073761, "learning_rate": 9.878608571696491e-06, "loss": 0.7445, "step": 249430 }, { "epoch": 1.5936010630821715, "grad_norm": 1.3870654106140137, "learning_rate": 9.875614474333022e-06, "loss": 0.8893, "step": 249440 }, { "epoch": 1.5936649502319105, "grad_norm": 1.150707721710205, "learning_rate": 9.87262078105684e-06, "loss": 1.0009, "step": 249450 }, { "epoch": 1.593728837381649, "grad_norm": 1.7357616424560547, "learning_rate": 9.86962749189807e-06, "loss": 0.9953, "step": 249460 }, { "epoch": 1.5937927245313879, "grad_norm": 0.9563319087028503, "learning_rate": 9.866634606886882e-06, "loss": 1.0655, "step": 249470 }, { "epoch": 1.5938566116811264, "grad_norm": 1.100013256072998, "learning_rate": 9.863642126053391e-06, "loss": 1.1086, "step": 249480 }, { "epoch": 1.5939204988308653, "grad_norm": 0.9695361852645874, "learning_rate": 9.86065004942776e-06, "loss": 1.0619, "step": 249490 }, { "epoch": 1.5939843859806038, "grad_norm": 0.9638354182243347, "learning_rate": 9.857658377040097e-06, "loss": 0.8276, "step": 249500 }, { "epoch": 1.5940482731303427, "grad_norm": 1.1016769409179688, "learning_rate": 9.854667108920552e-06, "loss": 0.8694, "step": 249510 }, { "epoch": 1.5941121602800812, "grad_norm": 1.1219736337661743, "learning_rate": 9.851676245099228e-06, "loss": 0.8565, "step": 249520 }, { "epoch": 1.59417604742982, "grad_norm": 1.7792798280715942, "learning_rate": 9.84868578560626e-06, "loss": 1.3292, "step": 249530 }, { "epoch": 1.5942399345795586, "grad_norm": 1.0533701181411743, "learning_rate": 9.845695730471772e-06, "loss": 0.7286, "step": 249540 }, { "epoch": 1.5943038217292975, "grad_norm": 1.4720954895019531, "learning_rate": 9.842706079725856e-06, "loss": 1.1434, "step": 249550 }, { "epoch": 1.594367708879036, "grad_norm": 0.9887880086898804, "learning_rate": 9.839716833398638e-06, "loss": 0.7406, "step": 249560 }, { "epoch": 1.594431596028775, "grad_norm": 0.8414777517318726, "learning_rate": 9.836727991520206e-06, "loss": 0.9619, "step": 249570 }, { "epoch": 1.5944954831785134, "grad_norm": 0.9462972283363342, "learning_rate": 9.833739554120686e-06, "loss": 0.7826, "step": 249580 }, { "epoch": 1.5945593703282521, "grad_norm": 0.9703009128570557, "learning_rate": 9.830751521230142e-06, "loss": 1.0046, "step": 249590 }, { "epoch": 1.5946232574779908, "grad_norm": 1.0508860349655151, "learning_rate": 9.827763892878688e-06, "loss": 1.3851, "step": 249600 }, { "epoch": 1.5946871446277295, "grad_norm": 1.112051248550415, "learning_rate": 9.824776669096413e-06, "loss": 1.0773, "step": 249610 }, { "epoch": 1.5947510317774682, "grad_norm": 1.7548609972000122, "learning_rate": 9.821789849913393e-06, "loss": 0.773, "step": 249620 }, { "epoch": 1.594814918927207, "grad_norm": 0.8418256044387817, "learning_rate": 9.818803435359691e-06, "loss": 1.038, "step": 249630 }, { "epoch": 1.5948788060769457, "grad_norm": 0.551695704460144, "learning_rate": 9.815817425465418e-06, "loss": 0.8379, "step": 249640 }, { "epoch": 1.5949426932266844, "grad_norm": 0.9964414834976196, "learning_rate": 9.81283182026061e-06, "loss": 0.8553, "step": 249650 }, { "epoch": 1.595006580376423, "grad_norm": 0.7424940466880798, "learning_rate": 9.809846619775354e-06, "loss": 0.9025, "step": 249660 }, { "epoch": 1.5950704675261618, "grad_norm": 2.2108371257781982, "learning_rate": 9.806861824039726e-06, "loss": 0.7527, "step": 249670 }, { "epoch": 1.5951343546759005, "grad_norm": 1.1251683235168457, "learning_rate": 9.803877433083758e-06, "loss": 1.0794, "step": 249680 }, { "epoch": 1.5951982418256392, "grad_norm": 1.1081650257110596, "learning_rate": 9.800893446937537e-06, "loss": 0.7922, "step": 249690 }, { "epoch": 1.595262128975378, "grad_norm": 1.0226283073425293, "learning_rate": 9.797909865631078e-06, "loss": 0.6803, "step": 249700 }, { "epoch": 1.5953260161251166, "grad_norm": 0.6846336722373962, "learning_rate": 9.794926689194456e-06, "loss": 0.812, "step": 249710 }, { "epoch": 1.5953899032748553, "grad_norm": 1.3356788158416748, "learning_rate": 9.791943917657698e-06, "loss": 0.8233, "step": 249720 }, { "epoch": 1.595453790424594, "grad_norm": 0.6795898675918579, "learning_rate": 9.788961551050857e-06, "loss": 1.0373, "step": 249730 }, { "epoch": 1.5955176775743327, "grad_norm": 0.9310694932937622, "learning_rate": 9.785979589403948e-06, "loss": 0.6179, "step": 249740 }, { "epoch": 1.5955815647240714, "grad_norm": 1.051107406616211, "learning_rate": 9.782998032747031e-06, "loss": 0.8966, "step": 249750 }, { "epoch": 1.5956454518738101, "grad_norm": 0.7246778011322021, "learning_rate": 9.780016881110104e-06, "loss": 0.7684, "step": 249760 }, { "epoch": 1.5957093390235488, "grad_norm": 0.9064025282859802, "learning_rate": 9.777036134523215e-06, "loss": 0.8458, "step": 249770 }, { "epoch": 1.5957732261732875, "grad_norm": 0.9094128608703613, "learning_rate": 9.774055793016357e-06, "loss": 0.6985, "step": 249780 }, { "epoch": 1.5958371133230262, "grad_norm": 1.2230727672576904, "learning_rate": 9.771075856619566e-06, "loss": 0.9097, "step": 249790 }, { "epoch": 1.595901000472765, "grad_norm": 1.1106340885162354, "learning_rate": 9.768096325362836e-06, "loss": 0.9081, "step": 249800 }, { "epoch": 1.5959648876225037, "grad_norm": 1.3577688932418823, "learning_rate": 9.765117199276192e-06, "loss": 0.8087, "step": 249810 }, { "epoch": 1.5960287747722424, "grad_norm": 1.1891157627105713, "learning_rate": 9.762138478389616e-06, "loss": 0.8516, "step": 249820 }, { "epoch": 1.5960926619219808, "grad_norm": 1.4682430028915405, "learning_rate": 9.759160162733127e-06, "loss": 0.8946, "step": 249830 }, { "epoch": 1.5961565490717198, "grad_norm": 0.7288236021995544, "learning_rate": 9.7561822523367e-06, "loss": 0.7522, "step": 249840 }, { "epoch": 1.5962204362214583, "grad_norm": 1.9862715005874634, "learning_rate": 9.753204747230327e-06, "loss": 0.8168, "step": 249850 }, { "epoch": 1.5962843233711972, "grad_norm": 1.0854727029800415, "learning_rate": 9.750227647444015e-06, "loss": 0.8119, "step": 249860 }, { "epoch": 1.5963482105209357, "grad_norm": 1.2034173011779785, "learning_rate": 9.74725095300772e-06, "loss": 0.9405, "step": 249870 }, { "epoch": 1.5964120976706746, "grad_norm": 1.0065717697143555, "learning_rate": 9.744274663951441e-06, "loss": 0.9919, "step": 249880 }, { "epoch": 1.596475984820413, "grad_norm": 1.0933870077133179, "learning_rate": 9.741298780305136e-06, "loss": 0.8674, "step": 249890 }, { "epoch": 1.596539871970152, "grad_norm": 0.6897011399269104, "learning_rate": 9.738323302098789e-06, "loss": 1.101, "step": 249900 }, { "epoch": 1.5966037591198905, "grad_norm": 1.1092685461044312, "learning_rate": 9.735348229362346e-06, "loss": 1.0444, "step": 249910 }, { "epoch": 1.5966676462696294, "grad_norm": 1.103498935699463, "learning_rate": 9.73237356212579e-06, "loss": 0.6172, "step": 249920 }, { "epoch": 1.596731533419368, "grad_norm": 2.056804895401001, "learning_rate": 9.729399300419062e-06, "loss": 0.7891, "step": 249930 }, { "epoch": 1.5967954205691068, "grad_norm": 1.0614315271377563, "learning_rate": 9.72642544427213e-06, "loss": 0.9441, "step": 249940 }, { "epoch": 1.5968593077188453, "grad_norm": 1.092970609664917, "learning_rate": 9.723451993714927e-06, "loss": 1.0506, "step": 249950 }, { "epoch": 1.5969231948685843, "grad_norm": 3.405836343765259, "learning_rate": 9.720478948777418e-06, "loss": 1.0595, "step": 249960 }, { "epoch": 1.5969870820183227, "grad_norm": 0.8358373045921326, "learning_rate": 9.717506309489516e-06, "loss": 0.8651, "step": 249970 }, { "epoch": 1.5970509691680617, "grad_norm": 1.3352793455123901, "learning_rate": 9.714534075881187e-06, "loss": 0.8881, "step": 249980 }, { "epoch": 1.5971148563178001, "grad_norm": 1.7608089447021484, "learning_rate": 9.711562247982343e-06, "loss": 1.2395, "step": 249990 }, { "epoch": 1.597178743467539, "grad_norm": 0.7808798551559448, "learning_rate": 9.708590825822939e-06, "loss": 0.9053, "step": 250000 }, { "epoch": 1.5972426306172776, "grad_norm": 0.683994472026825, "learning_rate": 9.70561980943286e-06, "loss": 0.916, "step": 250010 }, { "epoch": 1.5973065177670165, "grad_norm": 2.5665359497070312, "learning_rate": 9.702649198842067e-06, "loss": 0.7694, "step": 250020 }, { "epoch": 1.597370404916755, "grad_norm": 0.9608335494995117, "learning_rate": 9.699678994080446e-06, "loss": 0.8268, "step": 250030 }, { "epoch": 1.597434292066494, "grad_norm": 0.7314376831054688, "learning_rate": 9.696709195177934e-06, "loss": 1.0663, "step": 250040 }, { "epoch": 1.5974981792162324, "grad_norm": 0.9006572365760803, "learning_rate": 9.693739802164414e-06, "loss": 0.8218, "step": 250050 }, { "epoch": 1.5975620663659713, "grad_norm": 0.7238269448280334, "learning_rate": 9.690770815069805e-06, "loss": 1.0089, "step": 250060 }, { "epoch": 1.5976259535157098, "grad_norm": 1.1664044857025146, "learning_rate": 9.687802233924021e-06, "loss": 1.0815, "step": 250070 }, { "epoch": 1.5976898406654485, "grad_norm": 0.8418641090393066, "learning_rate": 9.684834058756931e-06, "loss": 0.7089, "step": 250080 }, { "epoch": 1.5977537278151872, "grad_norm": 1.1800457239151, "learning_rate": 9.681866289598445e-06, "loss": 0.7196, "step": 250090 }, { "epoch": 1.597817614964926, "grad_norm": 0.6666972637176514, "learning_rate": 9.678898926478452e-06, "loss": 0.7729, "step": 250100 }, { "epoch": 1.5978815021146646, "grad_norm": 1.1304889917373657, "learning_rate": 9.675931969426833e-06, "loss": 0.912, "step": 250110 }, { "epoch": 1.5979453892644033, "grad_norm": 0.9430323243141174, "learning_rate": 9.672965418473446e-06, "loss": 0.872, "step": 250120 }, { "epoch": 1.598009276414142, "grad_norm": 0.9690349102020264, "learning_rate": 9.6699992736482e-06, "loss": 0.7998, "step": 250130 }, { "epoch": 1.5980731635638807, "grad_norm": 0.9274241328239441, "learning_rate": 9.667033534980935e-06, "loss": 0.8911, "step": 250140 }, { "epoch": 1.5981370507136194, "grad_norm": 0.5790615677833557, "learning_rate": 9.664068202501553e-06, "loss": 0.8692, "step": 250150 }, { "epoch": 1.5982009378633582, "grad_norm": 1.0739883184432983, "learning_rate": 9.66110327623988e-06, "loss": 1.0901, "step": 250160 }, { "epoch": 1.5982648250130969, "grad_norm": 0.8284801840782166, "learning_rate": 9.658138756225805e-06, "loss": 0.6699, "step": 250170 }, { "epoch": 1.5983287121628356, "grad_norm": 0.8745191693305969, "learning_rate": 9.655174642489179e-06, "loss": 0.6788, "step": 250180 }, { "epoch": 1.5983925993125743, "grad_norm": 1.3312053680419922, "learning_rate": 9.652210935059836e-06, "loss": 0.9959, "step": 250190 }, { "epoch": 1.598456486462313, "grad_norm": 1.0378258228302002, "learning_rate": 9.649247633967651e-06, "loss": 0.9172, "step": 250200 }, { "epoch": 1.5985203736120517, "grad_norm": 0.8793679475784302, "learning_rate": 9.646284739242434e-06, "loss": 0.9155, "step": 250210 }, { "epoch": 1.5985842607617904, "grad_norm": 0.8637825846672058, "learning_rate": 9.643322250914056e-06, "loss": 0.9205, "step": 250220 }, { "epoch": 1.598648147911529, "grad_norm": 1.0453790426254272, "learning_rate": 9.640360169012325e-06, "loss": 1.1401, "step": 250230 }, { "epoch": 1.5987120350612678, "grad_norm": 0.9813113212585449, "learning_rate": 9.637398493567091e-06, "loss": 0.9221, "step": 250240 }, { "epoch": 1.5987759222110065, "grad_norm": 0.6848676204681396, "learning_rate": 9.634437224608162e-06, "loss": 0.8406, "step": 250250 }, { "epoch": 1.5988398093607452, "grad_norm": 0.9610247611999512, "learning_rate": 9.631476362165386e-06, "loss": 0.8424, "step": 250260 }, { "epoch": 1.598903696510484, "grad_norm": 0.7780939340591431, "learning_rate": 9.628515906268554e-06, "loss": 0.5489, "step": 250270 }, { "epoch": 1.5989675836602226, "grad_norm": 1.0890668630599976, "learning_rate": 9.625555856947505e-06, "loss": 0.8914, "step": 250280 }, { "epoch": 1.5990314708099613, "grad_norm": 0.7628886103630066, "learning_rate": 9.622596214232021e-06, "loss": 1.1567, "step": 250290 }, { "epoch": 1.5990953579597, "grad_norm": 0.8857354521751404, "learning_rate": 9.619636978151942e-06, "loss": 0.9771, "step": 250300 }, { "epoch": 1.5991592451094387, "grad_norm": 1.5625184774398804, "learning_rate": 9.616678148737034e-06, "loss": 1.0714, "step": 250310 }, { "epoch": 1.5992231322591772, "grad_norm": 0.9194492101669312, "learning_rate": 9.613719726017134e-06, "loss": 0.7574, "step": 250320 }, { "epoch": 1.5992870194089162, "grad_norm": 0.6353196501731873, "learning_rate": 9.610761710021998e-06, "loss": 0.894, "step": 250330 }, { "epoch": 1.5993509065586546, "grad_norm": 1.1404699087142944, "learning_rate": 9.607804100781448e-06, "loss": 0.8643, "step": 250340 }, { "epoch": 1.5994147937083936, "grad_norm": 0.8246428370475769, "learning_rate": 9.604846898325243e-06, "loss": 0.7766, "step": 250350 }, { "epoch": 1.599478680858132, "grad_norm": 1.066979169845581, "learning_rate": 9.601890102683187e-06, "loss": 1.002, "step": 250360 }, { "epoch": 1.599542568007871, "grad_norm": 1.0399022102355957, "learning_rate": 9.598933713885034e-06, "loss": 0.8962, "step": 250370 }, { "epoch": 1.5996064551576095, "grad_norm": 0.4756830036640167, "learning_rate": 9.59597773196057e-06, "loss": 0.8602, "step": 250380 }, { "epoch": 1.5996703423073484, "grad_norm": 2.020519971847534, "learning_rate": 9.593022156939579e-06, "loss": 0.8967, "step": 250390 }, { "epoch": 1.5997342294570869, "grad_norm": 1.331128478050232, "learning_rate": 9.590066988851797e-06, "loss": 0.825, "step": 250400 }, { "epoch": 1.5997981166068258, "grad_norm": 0.7770277261734009, "learning_rate": 9.587112227727018e-06, "loss": 1.0992, "step": 250410 }, { "epoch": 1.5998620037565643, "grad_norm": 1.1182270050048828, "learning_rate": 9.584157873594961e-06, "loss": 0.7391, "step": 250420 }, { "epoch": 1.5999258909063032, "grad_norm": 0.7087160348892212, "learning_rate": 9.581203926485421e-06, "loss": 0.662, "step": 250430 }, { "epoch": 1.5999897780560417, "grad_norm": 0.8244704008102417, "learning_rate": 9.578250386428105e-06, "loss": 0.9107, "step": 250440 }, { "epoch": 1.6000536652057806, "grad_norm": 1.832184076309204, "learning_rate": 9.575297253452791e-06, "loss": 0.9488, "step": 250450 }, { "epoch": 1.6001175523555191, "grad_norm": 0.7238314151763916, "learning_rate": 9.572344527589195e-06, "loss": 1.0217, "step": 250460 }, { "epoch": 1.600181439505258, "grad_norm": 1.169054627418518, "learning_rate": 9.569392208867078e-06, "loss": 0.6509, "step": 250470 }, { "epoch": 1.6002453266549965, "grad_norm": 0.9178882837295532, "learning_rate": 9.566440297316142e-06, "loss": 0.8303, "step": 250480 }, { "epoch": 1.6003092138047355, "grad_norm": 0.6997204422950745, "learning_rate": 9.563488792966146e-06, "loss": 0.7921, "step": 250490 }, { "epoch": 1.600373100954474, "grad_norm": 1.2562320232391357, "learning_rate": 9.56053769584679e-06, "loss": 0.9274, "step": 250500 }, { "epoch": 1.6004369881042129, "grad_norm": 0.970008373260498, "learning_rate": 9.557587005987817e-06, "loss": 0.8867, "step": 250510 }, { "epoch": 1.6005008752539513, "grad_norm": 1.1938084363937378, "learning_rate": 9.554636723418919e-06, "loss": 0.9233, "step": 250520 }, { "epoch": 1.6005647624036903, "grad_norm": 1.1808513402938843, "learning_rate": 9.551686848169827e-06, "loss": 1.1654, "step": 250530 }, { "epoch": 1.6006286495534288, "grad_norm": 1.0170284509658813, "learning_rate": 9.548737380270234e-06, "loss": 0.8142, "step": 250540 }, { "epoch": 1.6006925367031677, "grad_norm": 0.6526452302932739, "learning_rate": 9.54578831974986e-06, "loss": 0.729, "step": 250550 }, { "epoch": 1.6007564238529062, "grad_norm": 0.8424413800239563, "learning_rate": 9.542839666638387e-06, "loss": 0.6507, "step": 250560 }, { "epoch": 1.6008203110026449, "grad_norm": 0.8636687994003296, "learning_rate": 9.539891420965524e-06, "loss": 0.8822, "step": 250570 }, { "epoch": 1.6008841981523836, "grad_norm": 1.0570945739746094, "learning_rate": 9.536943582760966e-06, "loss": 0.8312, "step": 250580 }, { "epoch": 1.6009480853021223, "grad_norm": 1.320425271987915, "learning_rate": 9.533996152054375e-06, "loss": 0.7736, "step": 250590 }, { "epoch": 1.601011972451861, "grad_norm": 0.8588604927062988, "learning_rate": 9.531049128875463e-06, "loss": 0.9693, "step": 250600 }, { "epoch": 1.6010758596015997, "grad_norm": 1.496766209602356, "learning_rate": 9.528102513253883e-06, "loss": 0.8687, "step": 250610 }, { "epoch": 1.6011397467513384, "grad_norm": 0.958341658115387, "learning_rate": 9.52515630521934e-06, "loss": 0.6905, "step": 250620 }, { "epoch": 1.6012036339010771, "grad_norm": 1.0842797756195068, "learning_rate": 9.522210504801471e-06, "loss": 0.787, "step": 250630 }, { "epoch": 1.6012675210508158, "grad_norm": 0.8197239637374878, "learning_rate": 9.519265112029978e-06, "loss": 0.8905, "step": 250640 }, { "epoch": 1.6013314082005545, "grad_norm": 1.5352048873901367, "learning_rate": 9.516320126934491e-06, "loss": 1.1262, "step": 250650 }, { "epoch": 1.6013952953502932, "grad_norm": 1.2177964448928833, "learning_rate": 9.513375549544701e-06, "loss": 0.7407, "step": 250660 }, { "epoch": 1.601459182500032, "grad_norm": 0.968707263469696, "learning_rate": 9.510431379890227e-06, "loss": 0.8163, "step": 250670 }, { "epoch": 1.6015230696497706, "grad_norm": 1.3217895030975342, "learning_rate": 9.507487618000754e-06, "loss": 0.9355, "step": 250680 }, { "epoch": 1.6015869567995094, "grad_norm": 1.0752805471420288, "learning_rate": 9.504544263905895e-06, "loss": 0.9473, "step": 250690 }, { "epoch": 1.601650843949248, "grad_norm": 1.2310861349105835, "learning_rate": 9.50160131763531e-06, "loss": 0.8163, "step": 250700 }, { "epoch": 1.6017147310989868, "grad_norm": 0.782647967338562, "learning_rate": 9.49865877921865e-06, "loss": 0.8367, "step": 250710 }, { "epoch": 1.6017786182487255, "grad_norm": 0.5226444005966187, "learning_rate": 9.49571664868552e-06, "loss": 0.8799, "step": 250720 }, { "epoch": 1.6018425053984642, "grad_norm": 4.664597511291504, "learning_rate": 9.492774926065579e-06, "loss": 1.1183, "step": 250730 }, { "epoch": 1.6019063925482029, "grad_norm": 1.443206787109375, "learning_rate": 9.489833611388428e-06, "loss": 0.8891, "step": 250740 }, { "epoch": 1.6019702796979416, "grad_norm": 0.7619695663452148, "learning_rate": 9.48689270468371e-06, "loss": 0.6385, "step": 250750 }, { "epoch": 1.6020341668476803, "grad_norm": 2.3720896244049072, "learning_rate": 9.483952205981018e-06, "loss": 0.9371, "step": 250760 }, { "epoch": 1.602098053997419, "grad_norm": 2.431199312210083, "learning_rate": 9.481012115309989e-06, "loss": 0.764, "step": 250770 }, { "epoch": 1.6021619411471577, "grad_norm": 1.5024423599243164, "learning_rate": 9.478072432700208e-06, "loss": 0.6849, "step": 250780 }, { "epoch": 1.6022258282968964, "grad_norm": 1.117741584777832, "learning_rate": 9.47513315818131e-06, "loss": 0.7878, "step": 250790 }, { "epoch": 1.6022897154466351, "grad_norm": 1.0036662817001343, "learning_rate": 9.472194291782871e-06, "loss": 0.803, "step": 250800 }, { "epoch": 1.6023536025963736, "grad_norm": 0.7195963263511658, "learning_rate": 9.469255833534501e-06, "loss": 0.9983, "step": 250810 }, { "epoch": 1.6024174897461125, "grad_norm": 1.221651554107666, "learning_rate": 9.466317783465784e-06, "loss": 0.9196, "step": 250820 }, { "epoch": 1.602481376895851, "grad_norm": 1.0164096355438232, "learning_rate": 9.463380141606321e-06, "loss": 1.0127, "step": 250830 }, { "epoch": 1.60254526404559, "grad_norm": 1.8335058689117432, "learning_rate": 9.460442907985679e-06, "loss": 0.8599, "step": 250840 }, { "epoch": 1.6026091511953284, "grad_norm": 1.6345186233520508, "learning_rate": 9.45750608263346e-06, "loss": 0.8847, "step": 250850 }, { "epoch": 1.6026730383450674, "grad_norm": 1.392560362815857, "learning_rate": 9.454569665579221e-06, "loss": 0.9483, "step": 250860 }, { "epoch": 1.6027369254948058, "grad_norm": 2.1398732662200928, "learning_rate": 9.45163365685255e-06, "loss": 0.7336, "step": 250870 }, { "epoch": 1.6028008126445448, "grad_norm": 1.1836103200912476, "learning_rate": 9.448698056482996e-06, "loss": 0.9999, "step": 250880 }, { "epoch": 1.6028646997942833, "grad_norm": 0.7985469102859497, "learning_rate": 9.445762864500147e-06, "loss": 1.2659, "step": 250890 }, { "epoch": 1.6029285869440222, "grad_norm": 1.1316717863082886, "learning_rate": 9.442828080933536e-06, "loss": 0.8242, "step": 250900 }, { "epoch": 1.6029924740937607, "grad_norm": 1.4258652925491333, "learning_rate": 9.439893705812736e-06, "loss": 0.8395, "step": 250910 }, { "epoch": 1.6030563612434996, "grad_norm": 1.2066962718963623, "learning_rate": 9.436959739167305e-06, "loss": 0.9081, "step": 250920 }, { "epoch": 1.603120248393238, "grad_norm": 1.2025872468948364, "learning_rate": 9.434026181026773e-06, "loss": 0.7896, "step": 250930 }, { "epoch": 1.603184135542977, "grad_norm": 1.226680874824524, "learning_rate": 9.431093031420702e-06, "loss": 0.7066, "step": 250940 }, { "epoch": 1.6032480226927155, "grad_norm": 0.9295899271965027, "learning_rate": 9.428160290378606e-06, "loss": 0.7791, "step": 250950 }, { "epoch": 1.6033119098424544, "grad_norm": 0.9173290133476257, "learning_rate": 9.425227957930055e-06, "loss": 0.7236, "step": 250960 }, { "epoch": 1.603375796992193, "grad_norm": 1.2388783693313599, "learning_rate": 9.422296034104544e-06, "loss": 0.8935, "step": 250970 }, { "epoch": 1.6034396841419318, "grad_norm": 1.0881764888763428, "learning_rate": 9.419364518931633e-06, "loss": 0.9492, "step": 250980 }, { "epoch": 1.6035035712916703, "grad_norm": 1.2922098636627197, "learning_rate": 9.416433412440812e-06, "loss": 1.0869, "step": 250990 }, { "epoch": 1.6035674584414092, "grad_norm": 1.176303744316101, "learning_rate": 9.413502714661626e-06, "loss": 0.9365, "step": 251000 }, { "epoch": 1.6036313455911477, "grad_norm": 1.007675290107727, "learning_rate": 9.41057242562357e-06, "loss": 1.02, "step": 251010 }, { "epoch": 1.6036952327408867, "grad_norm": 0.736621618270874, "learning_rate": 9.407642545356182e-06, "loss": 0.8256, "step": 251020 }, { "epoch": 1.6037591198906251, "grad_norm": 0.718731164932251, "learning_rate": 9.404713073888933e-06, "loss": 0.9013, "step": 251030 }, { "epoch": 1.6038230070403638, "grad_norm": 0.7369511127471924, "learning_rate": 9.401784011251357e-06, "loss": 0.8759, "step": 251040 }, { "epoch": 1.6038868941901026, "grad_norm": 1.6651430130004883, "learning_rate": 9.398855357472924e-06, "loss": 0.5682, "step": 251050 }, { "epoch": 1.6039507813398413, "grad_norm": 1.296785831451416, "learning_rate": 9.395927112583159e-06, "loss": 1.0505, "step": 251060 }, { "epoch": 1.60401466848958, "grad_norm": 0.8208275437355042, "learning_rate": 9.392999276611537e-06, "loss": 0.9532, "step": 251070 }, { "epoch": 1.6040785556393187, "grad_norm": 0.9491279721260071, "learning_rate": 9.390071849587523e-06, "loss": 0.615, "step": 251080 }, { "epoch": 1.6041424427890574, "grad_norm": 1.2566066980361938, "learning_rate": 9.387144831540634e-06, "loss": 0.7125, "step": 251090 }, { "epoch": 1.604206329938796, "grad_norm": 1.3850597143173218, "learning_rate": 9.384218222500324e-06, "loss": 1.1517, "step": 251100 }, { "epoch": 1.6042702170885348, "grad_norm": 0.8714288473129272, "learning_rate": 9.38129202249608e-06, "loss": 0.9129, "step": 251110 }, { "epoch": 1.6043341042382735, "grad_norm": 1.3241082429885864, "learning_rate": 9.378366231557356e-06, "loss": 0.75, "step": 251120 }, { "epoch": 1.6043979913880122, "grad_norm": 0.9380171895027161, "learning_rate": 9.375440849713635e-06, "loss": 0.8114, "step": 251130 }, { "epoch": 1.604461878537751, "grad_norm": 1.197385549545288, "learning_rate": 9.372515876994364e-06, "loss": 0.7724, "step": 251140 }, { "epoch": 1.6045257656874896, "grad_norm": 1.5692874193191528, "learning_rate": 9.369591313429016e-06, "loss": 0.6484, "step": 251150 }, { "epoch": 1.6045896528372283, "grad_norm": 1.0515121221542358, "learning_rate": 9.366667159047022e-06, "loss": 1.0717, "step": 251160 }, { "epoch": 1.604653539986967, "grad_norm": 1.019773244857788, "learning_rate": 9.363743413877851e-06, "loss": 0.7979, "step": 251170 }, { "epoch": 1.6047174271367057, "grad_norm": 0.8589492440223694, "learning_rate": 9.360820077950928e-06, "loss": 0.9352, "step": 251180 }, { "epoch": 1.6047813142864444, "grad_norm": 0.8133339881896973, "learning_rate": 9.35789715129572e-06, "loss": 0.7797, "step": 251190 }, { "epoch": 1.6048452014361831, "grad_norm": 0.7571247816085815, "learning_rate": 9.354974633941633e-06, "loss": 0.7098, "step": 251200 }, { "epoch": 1.6049090885859219, "grad_norm": 0.987298309803009, "learning_rate": 9.352052525918126e-06, "loss": 0.8463, "step": 251210 }, { "epoch": 1.6049729757356606, "grad_norm": 0.6550473570823669, "learning_rate": 9.349130827254605e-06, "loss": 0.7963, "step": 251220 }, { "epoch": 1.6050368628853993, "grad_norm": 0.8027055263519287, "learning_rate": 9.346209537980505e-06, "loss": 0.8266, "step": 251230 }, { "epoch": 1.605100750035138, "grad_norm": 1.0942720174789429, "learning_rate": 9.34328865812526e-06, "loss": 0.8502, "step": 251240 }, { "epoch": 1.6051646371848767, "grad_norm": 1.5014644861221313, "learning_rate": 9.340368187718256e-06, "loss": 0.8505, "step": 251250 }, { "epoch": 1.6052285243346154, "grad_norm": 0.6291337609291077, "learning_rate": 9.337448126788927e-06, "loss": 0.8417, "step": 251260 }, { "epoch": 1.605292411484354, "grad_norm": 0.4529359042644501, "learning_rate": 9.334528475366672e-06, "loss": 1.0095, "step": 251270 }, { "epoch": 1.6053562986340928, "grad_norm": 1.283196210861206, "learning_rate": 9.331609233480898e-06, "loss": 1.0089, "step": 251280 }, { "epoch": 1.6054201857838315, "grad_norm": 1.5335444211959839, "learning_rate": 9.328690401161e-06, "loss": 0.9119, "step": 251290 }, { "epoch": 1.60548407293357, "grad_norm": 1.4574180841445923, "learning_rate": 9.325771978436382e-06, "loss": 0.6515, "step": 251300 }, { "epoch": 1.605547960083309, "grad_norm": 1.1264441013336182, "learning_rate": 9.322853965336414e-06, "loss": 1.1423, "step": 251310 }, { "epoch": 1.6056118472330474, "grad_norm": 1.130892038345337, "learning_rate": 9.319936361890514e-06, "loss": 1.0827, "step": 251320 }, { "epoch": 1.6056757343827863, "grad_norm": 1.047214388847351, "learning_rate": 9.317019168128033e-06, "loss": 0.7339, "step": 251330 }, { "epoch": 1.6057396215325248, "grad_norm": 1.3307965993881226, "learning_rate": 9.314102384078377e-06, "loss": 0.6577, "step": 251340 }, { "epoch": 1.6058035086822637, "grad_norm": 1.187129020690918, "learning_rate": 9.311186009770901e-06, "loss": 0.8338, "step": 251350 }, { "epoch": 1.6058673958320022, "grad_norm": 2.011436700820923, "learning_rate": 9.30827004523499e-06, "loss": 1.0097, "step": 251360 }, { "epoch": 1.6059312829817411, "grad_norm": 0.9869546294212341, "learning_rate": 9.30535449049999e-06, "loss": 0.9975, "step": 251370 }, { "epoch": 1.6059951701314796, "grad_norm": 0.7743905186653137, "learning_rate": 9.302439345595293e-06, "loss": 0.8767, "step": 251380 }, { "epoch": 1.6060590572812186, "grad_norm": 1.3220921754837036, "learning_rate": 9.29952461055022e-06, "loss": 0.8059, "step": 251390 }, { "epoch": 1.606122944430957, "grad_norm": 1.1547019481658936, "learning_rate": 9.296610285394164e-06, "loss": 0.6196, "step": 251400 }, { "epoch": 1.606186831580696, "grad_norm": 1.0041553974151611, "learning_rate": 9.29369637015644e-06, "loss": 0.8884, "step": 251410 }, { "epoch": 1.6062507187304345, "grad_norm": 0.7838250994682312, "learning_rate": 9.290782864866426e-06, "loss": 1.0101, "step": 251420 }, { "epoch": 1.6063146058801734, "grad_norm": 1.1419521570205688, "learning_rate": 9.28786976955343e-06, "loss": 0.8721, "step": 251430 }, { "epoch": 1.6063784930299119, "grad_norm": 0.8693103194236755, "learning_rate": 9.284957084246814e-06, "loss": 1.0617, "step": 251440 }, { "epoch": 1.6064423801796508, "grad_norm": 1.3104223012924194, "learning_rate": 9.28204480897591e-06, "loss": 1.0841, "step": 251450 }, { "epoch": 1.6065062673293893, "grad_norm": 1.1727479696273804, "learning_rate": 9.27913294377003e-06, "loss": 1.0063, "step": 251460 }, { "epoch": 1.6065701544791282, "grad_norm": 0.9226076602935791, "learning_rate": 9.27622148865852e-06, "loss": 0.9742, "step": 251470 }, { "epoch": 1.6066340416288667, "grad_norm": 0.9547157883644104, "learning_rate": 9.27331044367068e-06, "loss": 0.8409, "step": 251480 }, { "epoch": 1.6066979287786056, "grad_norm": 1.7603591680526733, "learning_rate": 9.27039980883585e-06, "loss": 1.0406, "step": 251490 }, { "epoch": 1.606761815928344, "grad_norm": 0.7404820322990417, "learning_rate": 9.26748958418332e-06, "loss": 0.7947, "step": 251500 }, { "epoch": 1.606825703078083, "grad_norm": 1.1479718685150146, "learning_rate": 9.264579769742416e-06, "loss": 0.705, "step": 251510 }, { "epoch": 1.6068895902278215, "grad_norm": 0.8657917380332947, "learning_rate": 9.261670365542424e-06, "loss": 0.927, "step": 251520 }, { "epoch": 1.6069534773775602, "grad_norm": 1.3002824783325195, "learning_rate": 9.258761371612668e-06, "loss": 0.9777, "step": 251530 }, { "epoch": 1.607017364527299, "grad_norm": 0.9150441884994507, "learning_rate": 9.255852787982422e-06, "loss": 0.8219, "step": 251540 }, { "epoch": 1.6070812516770376, "grad_norm": 0.8327670693397522, "learning_rate": 9.252944614680998e-06, "loss": 1.3298, "step": 251550 }, { "epoch": 1.6071451388267763, "grad_norm": 0.8424513936042786, "learning_rate": 9.25003685173767e-06, "loss": 0.6958, "step": 251560 }, { "epoch": 1.607209025976515, "grad_norm": 0.8646684288978577, "learning_rate": 9.247129499181711e-06, "loss": 1.1269, "step": 251570 }, { "epoch": 1.6072729131262538, "grad_norm": 0.9485357999801636, "learning_rate": 9.244222557042431e-06, "loss": 0.8929, "step": 251580 }, { "epoch": 1.6073368002759925, "grad_norm": 0.7912691235542297, "learning_rate": 9.241316025349073e-06, "loss": 1.036, "step": 251590 }, { "epoch": 1.6074006874257312, "grad_norm": 1.385108232498169, "learning_rate": 9.238409904130941e-06, "loss": 0.744, "step": 251600 }, { "epoch": 1.6074645745754699, "grad_norm": 1.1460243463516235, "learning_rate": 9.235504193417266e-06, "loss": 1.1109, "step": 251610 }, { "epoch": 1.6075284617252086, "grad_norm": 0.646547794342041, "learning_rate": 9.23259889323735e-06, "loss": 1.2331, "step": 251620 }, { "epoch": 1.6075923488749473, "grad_norm": 1.1643790006637573, "learning_rate": 9.229694003620415e-06, "loss": 0.9083, "step": 251630 }, { "epoch": 1.607656236024686, "grad_norm": 0.8957083821296692, "learning_rate": 9.226789524595747e-06, "loss": 0.6786, "step": 251640 }, { "epoch": 1.6077201231744247, "grad_norm": 1.0356974601745605, "learning_rate": 9.223885456192566e-06, "loss": 0.9857, "step": 251650 }, { "epoch": 1.6077840103241634, "grad_norm": 1.0247944593429565, "learning_rate": 9.220981798440148e-06, "loss": 0.7348, "step": 251660 }, { "epoch": 1.607847897473902, "grad_norm": 0.7964886426925659, "learning_rate": 9.218078551367715e-06, "loss": 0.809, "step": 251670 }, { "epoch": 1.6079117846236408, "grad_norm": 0.8133732676506042, "learning_rate": 9.21517571500452e-06, "loss": 0.7862, "step": 251680 }, { "epoch": 1.6079756717733795, "grad_norm": 1.3863354921340942, "learning_rate": 9.212563513458184e-06, "loss": 0.8938, "step": 251690 }, { "epoch": 1.6080395589231182, "grad_norm": 0.682494580745697, "learning_rate": 9.209661457523062e-06, "loss": 0.709, "step": 251700 }, { "epoch": 1.608103446072857, "grad_norm": 0.5610239505767822, "learning_rate": 9.206759812381938e-06, "loss": 0.6273, "step": 251710 }, { "epoch": 1.6081673332225956, "grad_norm": 0.6274298429489136, "learning_rate": 9.203858578064028e-06, "loss": 0.7385, "step": 251720 }, { "epoch": 1.6082312203723343, "grad_norm": 1.206041932106018, "learning_rate": 9.20095775459856e-06, "loss": 0.9551, "step": 251730 }, { "epoch": 1.608295107522073, "grad_norm": 1.270914077758789, "learning_rate": 9.198057342014738e-06, "loss": 0.9709, "step": 251740 }, { "epoch": 1.6083589946718118, "grad_norm": 0.9437939524650574, "learning_rate": 9.195157340341787e-06, "loss": 0.8078, "step": 251750 }, { "epoch": 1.6084228818215505, "grad_norm": 1.5694162845611572, "learning_rate": 9.19225774960889e-06, "loss": 0.9904, "step": 251760 }, { "epoch": 1.608486768971289, "grad_norm": 1.7031606435775757, "learning_rate": 9.189358569845275e-06, "loss": 0.972, "step": 251770 }, { "epoch": 1.6085506561210279, "grad_norm": 2.132373094558716, "learning_rate": 9.186459801080111e-06, "loss": 0.8312, "step": 251780 }, { "epoch": 1.6086145432707664, "grad_norm": 1.3549762964248657, "learning_rate": 9.18356144334262e-06, "loss": 1.0376, "step": 251790 }, { "epoch": 1.6086784304205053, "grad_norm": 0.9008809328079224, "learning_rate": 9.180663496661962e-06, "loss": 0.8303, "step": 251800 }, { "epoch": 1.6087423175702438, "grad_norm": 0.8437301516532898, "learning_rate": 9.177765961067348e-06, "loss": 0.7478, "step": 251810 }, { "epoch": 1.6088062047199827, "grad_norm": 1.0403225421905518, "learning_rate": 9.174868836587942e-06, "loss": 0.9228, "step": 251820 }, { "epoch": 1.6088700918697212, "grad_norm": 1.1173384189605713, "learning_rate": 9.171972123252931e-06, "loss": 0.7614, "step": 251830 }, { "epoch": 1.6089339790194601, "grad_norm": 0.7167786955833435, "learning_rate": 9.169075821091472e-06, "loss": 0.951, "step": 251840 }, { "epoch": 1.6089978661691986, "grad_norm": 0.8369988203048706, "learning_rate": 9.16617993013275e-06, "loss": 1.0164, "step": 251850 }, { "epoch": 1.6090617533189375, "grad_norm": 1.0024378299713135, "learning_rate": 9.163284450405918e-06, "loss": 0.7347, "step": 251860 }, { "epoch": 1.609125640468676, "grad_norm": 0.9501249194145203, "learning_rate": 9.160389381940137e-06, "loss": 0.6731, "step": 251870 }, { "epoch": 1.609189527618415, "grad_norm": 0.7685287594795227, "learning_rate": 9.157494724764577e-06, "loss": 1.06, "step": 251880 }, { "epoch": 1.6092534147681534, "grad_norm": 0.8647333979606628, "learning_rate": 9.154600478908365e-06, "loss": 0.803, "step": 251890 }, { "epoch": 1.6093173019178924, "grad_norm": 0.7192227244377136, "learning_rate": 9.151706644400681e-06, "loss": 0.949, "step": 251900 }, { "epoch": 1.6093811890676308, "grad_norm": 0.7441559433937073, "learning_rate": 9.148813221270635e-06, "loss": 0.9322, "step": 251910 }, { "epoch": 1.6094450762173698, "grad_norm": 2.0405662059783936, "learning_rate": 9.145920209547392e-06, "loss": 1.0214, "step": 251920 }, { "epoch": 1.6095089633671082, "grad_norm": 1.1048287153244019, "learning_rate": 9.143027609260063e-06, "loss": 0.7865, "step": 251930 }, { "epoch": 1.6095728505168472, "grad_norm": 0.8086538910865784, "learning_rate": 9.140135420437812e-06, "loss": 0.7849, "step": 251940 }, { "epoch": 1.6096367376665857, "grad_norm": 1.294543743133545, "learning_rate": 9.137243643109722e-06, "loss": 0.9705, "step": 251950 }, { "epoch": 1.6097006248163246, "grad_norm": 1.7661151885986328, "learning_rate": 9.134352277304964e-06, "loss": 0.744, "step": 251960 }, { "epoch": 1.609764511966063, "grad_norm": 0.9390109777450562, "learning_rate": 9.131461323052615e-06, "loss": 0.7945, "step": 251970 }, { "epoch": 1.609828399115802, "grad_norm": 0.8471163511276245, "learning_rate": 9.128570780381817e-06, "loss": 0.7913, "step": 251980 }, { "epoch": 1.6098922862655405, "grad_norm": 0.7456984519958496, "learning_rate": 9.125680649321661e-06, "loss": 0.8629, "step": 251990 }, { "epoch": 1.6099561734152794, "grad_norm": 0.7650848627090454, "learning_rate": 9.12279092990127e-06, "loss": 0.8887, "step": 252000 }, { "epoch": 1.610020060565018, "grad_norm": 0.687843382358551, "learning_rate": 9.11990162214973e-06, "loss": 0.6964, "step": 252010 }, { "epoch": 1.6100839477147566, "grad_norm": 1.4888943433761597, "learning_rate": 9.117012726096158e-06, "loss": 1.0631, "step": 252020 }, { "epoch": 1.6101478348644953, "grad_norm": 0.8628354072570801, "learning_rate": 9.114124241769622e-06, "loss": 0.8178, "step": 252030 }, { "epoch": 1.610211722014234, "grad_norm": 0.6701124906539917, "learning_rate": 9.111236169199245e-06, "loss": 0.6124, "step": 252040 }, { "epoch": 1.6102756091639727, "grad_norm": 0.9853718280792236, "learning_rate": 9.108348508414078e-06, "loss": 0.7542, "step": 252050 }, { "epoch": 1.6103394963137114, "grad_norm": 0.8621380925178528, "learning_rate": 9.105461259443227e-06, "loss": 0.6891, "step": 252060 }, { "epoch": 1.6104033834634501, "grad_norm": 1.3660041093826294, "learning_rate": 9.102574422315752e-06, "loss": 0.7933, "step": 252070 }, { "epoch": 1.6104672706131888, "grad_norm": 1.1363906860351562, "learning_rate": 9.09968799706073e-06, "loss": 0.9381, "step": 252080 }, { "epoch": 1.6105311577629275, "grad_norm": 1.0103809833526611, "learning_rate": 9.096801983707248e-06, "loss": 0.9705, "step": 252090 }, { "epoch": 1.6105950449126663, "grad_norm": 1.2093793153762817, "learning_rate": 9.093916382284346e-06, "loss": 0.7687, "step": 252100 }, { "epoch": 1.610658932062405, "grad_norm": 0.9786253571510315, "learning_rate": 9.091031192821104e-06, "loss": 1.1084, "step": 252110 }, { "epoch": 1.6107228192121437, "grad_norm": 0.9100025296211243, "learning_rate": 9.088146415346554e-06, "loss": 0.804, "step": 252120 }, { "epoch": 1.6107867063618824, "grad_norm": 4.617587089538574, "learning_rate": 9.085262049889782e-06, "loss": 0.8656, "step": 252130 }, { "epoch": 1.610850593511621, "grad_norm": 1.6649965047836304, "learning_rate": 9.082378096479805e-06, "loss": 1.04, "step": 252140 }, { "epoch": 1.6109144806613598, "grad_norm": 3.090346097946167, "learning_rate": 9.079494555145684e-06, "loss": 1.0039, "step": 252150 }, { "epoch": 1.6109783678110985, "grad_norm": 0.8761329650878906, "learning_rate": 9.076611425916449e-06, "loss": 0.7212, "step": 252160 }, { "epoch": 1.6110422549608372, "grad_norm": 0.6641421318054199, "learning_rate": 9.07372870882115e-06, "loss": 0.8803, "step": 252170 }, { "epoch": 1.611106142110576, "grad_norm": 1.0108219385147095, "learning_rate": 9.0708464038888e-06, "loss": 0.883, "step": 252180 }, { "epoch": 1.6111700292603146, "grad_norm": 0.725486695766449, "learning_rate": 9.067964511148458e-06, "loss": 0.9893, "step": 252190 }, { "epoch": 1.6112339164100533, "grad_norm": 0.5850199460983276, "learning_rate": 9.065083030629102e-06, "loss": 0.7496, "step": 252200 }, { "epoch": 1.611297803559792, "grad_norm": 0.5066903829574585, "learning_rate": 9.062201962359773e-06, "loss": 0.6822, "step": 252210 }, { "epoch": 1.6113616907095307, "grad_norm": 1.204042673110962, "learning_rate": 9.059321306369495e-06, "loss": 0.8696, "step": 252220 }, { "epoch": 1.6114255778592694, "grad_norm": 0.8453181385993958, "learning_rate": 9.056441062687259e-06, "loss": 0.7135, "step": 252230 }, { "epoch": 1.6114894650090081, "grad_norm": 0.9459882378578186, "learning_rate": 9.0535612313421e-06, "loss": 0.645, "step": 252240 }, { "epoch": 1.6115533521587468, "grad_norm": 0.8971336483955383, "learning_rate": 9.050681812362982e-06, "loss": 0.9969, "step": 252250 }, { "epoch": 1.6116172393084853, "grad_norm": 0.8886403441429138, "learning_rate": 9.047802805778948e-06, "loss": 0.8814, "step": 252260 }, { "epoch": 1.6116811264582243, "grad_norm": 1.397530436515808, "learning_rate": 9.044924211618948e-06, "loss": 0.8373, "step": 252270 }, { "epoch": 1.6117450136079627, "grad_norm": 0.8331180214881897, "learning_rate": 9.042046029912005e-06, "loss": 0.7237, "step": 252280 }, { "epoch": 1.6118089007577017, "grad_norm": 0.912804365158081, "learning_rate": 9.03916826068708e-06, "loss": 0.9643, "step": 252290 }, { "epoch": 1.6118727879074402, "grad_norm": 0.8552295565605164, "learning_rate": 9.036290903973183e-06, "loss": 1.1771, "step": 252300 }, { "epoch": 1.611936675057179, "grad_norm": 1.0347743034362793, "learning_rate": 9.033413959799258e-06, "loss": 0.842, "step": 252310 }, { "epoch": 1.6120005622069176, "grad_norm": 0.9951327443122864, "learning_rate": 9.030537428194314e-06, "loss": 0.7409, "step": 252320 }, { "epoch": 1.6120644493566565, "grad_norm": 0.7869225144386292, "learning_rate": 9.027661309187285e-06, "loss": 0.8458, "step": 252330 }, { "epoch": 1.612128336506395, "grad_norm": 0.9086364507675171, "learning_rate": 9.024785602807168e-06, "loss": 0.7975, "step": 252340 }, { "epoch": 1.612192223656134, "grad_norm": 0.7900458574295044, "learning_rate": 9.021910309082893e-06, "loss": 0.7442, "step": 252350 }, { "epoch": 1.6122561108058724, "grad_norm": 0.7535526752471924, "learning_rate": 9.019035428043443e-06, "loss": 0.8918, "step": 252360 }, { "epoch": 1.6123199979556113, "grad_norm": 0.651496410369873, "learning_rate": 9.01616095971775e-06, "loss": 1.081, "step": 252370 }, { "epoch": 1.6123838851053498, "grad_norm": 1.4786691665649414, "learning_rate": 9.013286904134788e-06, "loss": 0.8141, "step": 252380 }, { "epoch": 1.6124477722550887, "grad_norm": 0.8722392320632935, "learning_rate": 9.010413261323469e-06, "loss": 0.7453, "step": 252390 }, { "epoch": 1.6125116594048272, "grad_norm": 0.6708277463912964, "learning_rate": 9.00754003131275e-06, "loss": 1.1094, "step": 252400 }, { "epoch": 1.6125755465545661, "grad_norm": 1.2941380739212036, "learning_rate": 9.00466721413158e-06, "loss": 0.8299, "step": 252410 }, { "epoch": 1.6126394337043046, "grad_norm": 1.2671501636505127, "learning_rate": 9.00179480980886e-06, "loss": 0.7799, "step": 252420 }, { "epoch": 1.6127033208540436, "grad_norm": 0.8946260213851929, "learning_rate": 8.998922818373557e-06, "loss": 0.971, "step": 252430 }, { "epoch": 1.612767208003782, "grad_norm": 1.0225296020507812, "learning_rate": 8.996051239854553e-06, "loss": 1.3356, "step": 252440 }, { "epoch": 1.612831095153521, "grad_norm": 1.6290887594223022, "learning_rate": 8.993180074280799e-06, "loss": 0.6761, "step": 252450 }, { "epoch": 1.6128949823032595, "grad_norm": 1.9371238946914673, "learning_rate": 8.990309321681184e-06, "loss": 1.1749, "step": 252460 }, { "epoch": 1.6129588694529984, "grad_norm": 0.8040488362312317, "learning_rate": 8.98743898208465e-06, "loss": 0.9965, "step": 252470 }, { "epoch": 1.6130227566027369, "grad_norm": 0.9329996705055237, "learning_rate": 8.98456905552007e-06, "loss": 0.7557, "step": 252480 }, { "epoch": 1.6130866437524758, "grad_norm": 0.7548239231109619, "learning_rate": 8.981699542016376e-06, "loss": 0.9107, "step": 252490 }, { "epoch": 1.6131505309022143, "grad_norm": 1.1711938381195068, "learning_rate": 8.978830441602443e-06, "loss": 0.9201, "step": 252500 }, { "epoch": 1.613214418051953, "grad_norm": 1.1052647829055786, "learning_rate": 8.975961754307183e-06, "loss": 0.7716, "step": 252510 }, { "epoch": 1.6132783052016917, "grad_norm": 0.6817499399185181, "learning_rate": 8.973093480159472e-06, "loss": 0.8824, "step": 252520 }, { "epoch": 1.6133421923514304, "grad_norm": 1.1585290431976318, "learning_rate": 8.970225619188216e-06, "loss": 0.9834, "step": 252530 }, { "epoch": 1.613406079501169, "grad_norm": 0.9899934530258179, "learning_rate": 8.967358171422269e-06, "loss": 0.8478, "step": 252540 }, { "epoch": 1.6134699666509078, "grad_norm": 0.9996746778488159, "learning_rate": 8.964491136890535e-06, "loss": 0.6712, "step": 252550 }, { "epoch": 1.6135338538006465, "grad_norm": 1.0687540769577026, "learning_rate": 8.961624515621864e-06, "loss": 0.8051, "step": 252560 }, { "epoch": 1.6135977409503852, "grad_norm": 0.8011711239814758, "learning_rate": 8.958758307645148e-06, "loss": 0.7865, "step": 252570 }, { "epoch": 1.613661628100124, "grad_norm": 0.9394097924232483, "learning_rate": 8.955892512989233e-06, "loss": 0.6621, "step": 252580 }, { "epoch": 1.6137255152498626, "grad_norm": 1.0367693901062012, "learning_rate": 8.953027131683e-06, "loss": 0.9656, "step": 252590 }, { "epoch": 1.6137894023996013, "grad_norm": 1.1489052772521973, "learning_rate": 8.950162163755283e-06, "loss": 0.8708, "step": 252600 }, { "epoch": 1.61385328954934, "grad_norm": 2.150038003921509, "learning_rate": 8.947297609234944e-06, "loss": 0.8722, "step": 252610 }, { "epoch": 1.6139171766990787, "grad_norm": 0.9468705058097839, "learning_rate": 8.944433468150848e-06, "loss": 1.1167, "step": 252620 }, { "epoch": 1.6139810638488175, "grad_norm": 0.9067215323448181, "learning_rate": 8.941569740531808e-06, "loss": 0.876, "step": 252630 }, { "epoch": 1.6140449509985562, "grad_norm": 0.7629149556159973, "learning_rate": 8.938706426406702e-06, "loss": 0.8063, "step": 252640 }, { "epoch": 1.6141088381482949, "grad_norm": 1.2172737121582031, "learning_rate": 8.935843525804328e-06, "loss": 0.8134, "step": 252650 }, { "epoch": 1.6141727252980336, "grad_norm": 1.2737256288528442, "learning_rate": 8.932981038753547e-06, "loss": 0.714, "step": 252660 }, { "epoch": 1.6142366124477723, "grad_norm": 0.5625755190849304, "learning_rate": 8.930118965283174e-06, "loss": 0.6967, "step": 252670 }, { "epoch": 1.614300499597511, "grad_norm": 1.1674059629440308, "learning_rate": 8.927257305422038e-06, "loss": 0.9341, "step": 252680 }, { "epoch": 1.6143643867472497, "grad_norm": 1.0258337259292603, "learning_rate": 8.924396059198936e-06, "loss": 1.0119, "step": 252690 }, { "epoch": 1.6144282738969884, "grad_norm": 1.1566420793533325, "learning_rate": 8.921535226642718e-06, "loss": 0.9388, "step": 252700 }, { "epoch": 1.614492161046727, "grad_norm": 0.6528016328811646, "learning_rate": 8.918674807782163e-06, "loss": 0.9135, "step": 252710 }, { "epoch": 1.6145560481964658, "grad_norm": 1.0167995691299438, "learning_rate": 8.915814802646093e-06, "loss": 0.9045, "step": 252720 }, { "epoch": 1.6146199353462045, "grad_norm": 1.4415538311004639, "learning_rate": 8.912955211263323e-06, "loss": 0.971, "step": 252730 }, { "epoch": 1.6146838224959432, "grad_norm": 1.1365299224853516, "learning_rate": 8.91009603366263e-06, "loss": 1.3696, "step": 252740 }, { "epoch": 1.6147477096456817, "grad_norm": 0.9816776514053345, "learning_rate": 8.907237269872831e-06, "loss": 0.7537, "step": 252750 }, { "epoch": 1.6148115967954206, "grad_norm": 0.7047487497329712, "learning_rate": 8.904378919922684e-06, "loss": 1.2247, "step": 252760 }, { "epoch": 1.6148754839451591, "grad_norm": 1.297702670097351, "learning_rate": 8.901520983841017e-06, "loss": 0.9188, "step": 252770 }, { "epoch": 1.614939371094898, "grad_norm": 0.8933472037315369, "learning_rate": 8.89866346165657e-06, "loss": 0.8977, "step": 252780 }, { "epoch": 1.6150032582446365, "grad_norm": 0.910576343536377, "learning_rate": 8.895806353398151e-06, "loss": 0.8402, "step": 252790 }, { "epoch": 1.6150671453943755, "grad_norm": 0.6891723871231079, "learning_rate": 8.892949659094513e-06, "loss": 1.0497, "step": 252800 }, { "epoch": 1.615131032544114, "grad_norm": 0.7324727177619934, "learning_rate": 8.890093378774439e-06, "loss": 0.8316, "step": 252810 }, { "epoch": 1.6151949196938529, "grad_norm": 1.0393435955047607, "learning_rate": 8.887237512466685e-06, "loss": 1.2601, "step": 252820 }, { "epoch": 1.6152588068435914, "grad_norm": 0.8941654562950134, "learning_rate": 8.88438206020003e-06, "loss": 1.1747, "step": 252830 }, { "epoch": 1.6153226939933303, "grad_norm": 0.872191846370697, "learning_rate": 8.8815270220032e-06, "loss": 0.6791, "step": 252840 }, { "epoch": 1.6153865811430688, "grad_norm": 0.8596194386482239, "learning_rate": 8.878672397904986e-06, "loss": 0.7369, "step": 252850 }, { "epoch": 1.6154504682928077, "grad_norm": 1.0066167116165161, "learning_rate": 8.875818187934098e-06, "loss": 0.6986, "step": 252860 }, { "epoch": 1.6155143554425462, "grad_norm": 0.8400135040283203, "learning_rate": 8.87296439211931e-06, "loss": 0.8633, "step": 252870 }, { "epoch": 1.615578242592285, "grad_norm": 1.0368555784225464, "learning_rate": 8.870111010489341e-06, "loss": 1.041, "step": 252880 }, { "epoch": 1.6156421297420236, "grad_norm": 0.6796692609786987, "learning_rate": 8.867258043072946e-06, "loss": 0.9529, "step": 252890 }, { "epoch": 1.6157060168917625, "grad_norm": 0.8098213076591492, "learning_rate": 8.864405489898837e-06, "loss": 0.9878, "step": 252900 }, { "epoch": 1.615769904041501, "grad_norm": 1.901564121246338, "learning_rate": 8.86155335099576e-06, "loss": 0.7924, "step": 252910 }, { "epoch": 1.61583379119124, "grad_norm": 0.9435287117958069, "learning_rate": 8.858701626392425e-06, "loss": 1.1102, "step": 252920 }, { "epoch": 1.6158976783409784, "grad_norm": 1.3926451206207275, "learning_rate": 8.855850316117554e-06, "loss": 1.0823, "step": 252930 }, { "epoch": 1.6159615654907173, "grad_norm": 0.9484646916389465, "learning_rate": 8.852999420199876e-06, "loss": 1.1486, "step": 252940 }, { "epoch": 1.6160254526404558, "grad_norm": 1.0296976566314697, "learning_rate": 8.85014893866808e-06, "loss": 1.0762, "step": 252950 }, { "epoch": 1.6160893397901948, "grad_norm": 0.8114399909973145, "learning_rate": 8.847298871550896e-06, "loss": 1.0435, "step": 252960 }, { "epoch": 1.6161532269399332, "grad_norm": 1.1355493068695068, "learning_rate": 8.844449218877005e-06, "loss": 0.9153, "step": 252970 }, { "epoch": 1.6162171140896722, "grad_norm": 0.7851929068565369, "learning_rate": 8.841599980675125e-06, "loss": 0.6855, "step": 252980 }, { "epoch": 1.6162810012394107, "grad_norm": 1.2407293319702148, "learning_rate": 8.838751156973923e-06, "loss": 0.9023, "step": 252990 }, { "epoch": 1.6163448883891494, "grad_norm": 1.2262955904006958, "learning_rate": 8.835902747802128e-06, "loss": 1.0251, "step": 253000 }, { "epoch": 1.616408775538888, "grad_norm": 1.3506430387496948, "learning_rate": 8.833054753188386e-06, "loss": 0.8255, "step": 253010 }, { "epoch": 1.6164726626886268, "grad_norm": 0.7670350670814514, "learning_rate": 8.830207173161408e-06, "loss": 0.7006, "step": 253020 }, { "epoch": 1.6165365498383655, "grad_norm": 0.9348157644271851, "learning_rate": 8.827360007749852e-06, "loss": 0.8525, "step": 253030 }, { "epoch": 1.6166004369881042, "grad_norm": 1.010741949081421, "learning_rate": 8.824513256982414e-06, "loss": 0.9768, "step": 253040 }, { "epoch": 1.616664324137843, "grad_norm": 0.9472290277481079, "learning_rate": 8.821666920887733e-06, "loss": 0.846, "step": 253050 }, { "epoch": 1.6167282112875816, "grad_norm": 1.4080737829208374, "learning_rate": 8.818820999494504e-06, "loss": 1.032, "step": 253060 }, { "epoch": 1.6167920984373203, "grad_norm": 0.8792275190353394, "learning_rate": 8.815975492831363e-06, "loss": 0.8813, "step": 253070 }, { "epoch": 1.616855985587059, "grad_norm": 2.1530280113220215, "learning_rate": 8.813130400926988e-06, "loss": 0.8588, "step": 253080 }, { "epoch": 1.6169198727367977, "grad_norm": 0.9053194522857666, "learning_rate": 8.810285723810013e-06, "loss": 0.7985, "step": 253090 }, { "epoch": 1.6169837598865364, "grad_norm": 0.6816472411155701, "learning_rate": 8.807441461509108e-06, "loss": 1.087, "step": 253100 }, { "epoch": 1.6170476470362751, "grad_norm": 1.0651800632476807, "learning_rate": 8.804597614052885e-06, "loss": 0.908, "step": 253110 }, { "epoch": 1.6171115341860138, "grad_norm": 1.0918779373168945, "learning_rate": 8.801754181470023e-06, "loss": 1.0176, "step": 253120 }, { "epoch": 1.6171754213357525, "grad_norm": 2.096322774887085, "learning_rate": 8.79891116378912e-06, "loss": 0.938, "step": 253130 }, { "epoch": 1.6172393084854912, "grad_norm": 1.3487486839294434, "learning_rate": 8.796068561038828e-06, "loss": 0.9764, "step": 253140 }, { "epoch": 1.61730319563523, "grad_norm": 0.8517560362815857, "learning_rate": 8.793226373247787e-06, "loss": 0.828, "step": 253150 }, { "epoch": 1.6173670827849687, "grad_norm": 0.764380931854248, "learning_rate": 8.790384600444601e-06, "loss": 0.8548, "step": 253160 }, { "epoch": 1.6174309699347074, "grad_norm": 1.2008692026138306, "learning_rate": 8.787543242657891e-06, "loss": 0.9342, "step": 253170 }, { "epoch": 1.617494857084446, "grad_norm": 0.9357091188430786, "learning_rate": 8.78470229991627e-06, "loss": 0.6565, "step": 253180 }, { "epoch": 1.6175587442341848, "grad_norm": 1.0450602769851685, "learning_rate": 8.781861772248362e-06, "loss": 0.8956, "step": 253190 }, { "epoch": 1.6176226313839235, "grad_norm": 0.9648509621620178, "learning_rate": 8.779021659682752e-06, "loss": 1.0464, "step": 253200 }, { "epoch": 1.6176865185336622, "grad_norm": 0.9442372918128967, "learning_rate": 8.776181962248065e-06, "loss": 0.9629, "step": 253210 }, { "epoch": 1.617750405683401, "grad_norm": 1.1622222661972046, "learning_rate": 8.773342679972879e-06, "loss": 1.0001, "step": 253220 }, { "epoch": 1.6178142928331396, "grad_norm": 0.7686667442321777, "learning_rate": 8.77050381288581e-06, "loss": 0.9153, "step": 253230 }, { "epoch": 1.617878179982878, "grad_norm": 0.7704346179962158, "learning_rate": 8.767665361015425e-06, "loss": 0.7974, "step": 253240 }, { "epoch": 1.617942067132617, "grad_norm": 0.8347066640853882, "learning_rate": 8.764827324390324e-06, "loss": 1.2218, "step": 253250 }, { "epoch": 1.6180059542823555, "grad_norm": 3.1967313289642334, "learning_rate": 8.761989703039091e-06, "loss": 0.9429, "step": 253260 }, { "epoch": 1.6180698414320944, "grad_norm": 0.7391064763069153, "learning_rate": 8.759152496990291e-06, "loss": 0.8333, "step": 253270 }, { "epoch": 1.618133728581833, "grad_norm": 1.1434670686721802, "learning_rate": 8.756315706272516e-06, "loss": 0.885, "step": 253280 }, { "epoch": 1.6181976157315718, "grad_norm": 1.1375341415405273, "learning_rate": 8.753479330914305e-06, "loss": 0.9347, "step": 253290 }, { "epoch": 1.6182615028813103, "grad_norm": 0.8434621691703796, "learning_rate": 8.750643370944256e-06, "loss": 0.7543, "step": 253300 }, { "epoch": 1.6183253900310492, "grad_norm": 1.2263952493667603, "learning_rate": 8.747807826390902e-06, "loss": 0.7379, "step": 253310 }, { "epoch": 1.6183892771807877, "grad_norm": 0.9305620789527893, "learning_rate": 8.744972697282821e-06, "loss": 0.8767, "step": 253320 }, { "epoch": 1.6184531643305267, "grad_norm": 0.9405209422111511, "learning_rate": 8.742137983648552e-06, "loss": 0.9911, "step": 253330 }, { "epoch": 1.6185170514802651, "grad_norm": 0.964889407157898, "learning_rate": 8.739303685516647e-06, "loss": 0.9723, "step": 253340 }, { "epoch": 1.618580938630004, "grad_norm": 1.0919055938720703, "learning_rate": 8.736469802915648e-06, "loss": 0.8663, "step": 253350 }, { "epoch": 1.6186448257797426, "grad_norm": 0.9384715557098389, "learning_rate": 8.7336363358741e-06, "loss": 0.9946, "step": 253360 }, { "epoch": 1.6187087129294815, "grad_norm": 0.8242424726486206, "learning_rate": 8.730803284420524e-06, "loss": 1.0228, "step": 253370 }, { "epoch": 1.61877260007922, "grad_norm": 1.8711493015289307, "learning_rate": 8.727970648583478e-06, "loss": 0.943, "step": 253380 }, { "epoch": 1.618836487228959, "grad_norm": 1.430071234703064, "learning_rate": 8.725138428391461e-06, "loss": 0.8167, "step": 253390 }, { "epoch": 1.6189003743786974, "grad_norm": 0.7001949548721313, "learning_rate": 8.722306623873016e-06, "loss": 0.7138, "step": 253400 }, { "epoch": 1.6189642615284363, "grad_norm": 0.9516452550888062, "learning_rate": 8.71947523505664e-06, "loss": 0.8375, "step": 253410 }, { "epoch": 1.6190281486781748, "grad_norm": 0.8716029524803162, "learning_rate": 8.71664426197088e-06, "loss": 0.7683, "step": 253420 }, { "epoch": 1.6190920358279137, "grad_norm": 1.038964033126831, "learning_rate": 8.713813704644208e-06, "loss": 1.0598, "step": 253430 }, { "epoch": 1.6191559229776522, "grad_norm": 0.9941467046737671, "learning_rate": 8.71098356310517e-06, "loss": 0.7832, "step": 253440 }, { "epoch": 1.6192198101273911, "grad_norm": 1.290126085281372, "learning_rate": 8.708153837382227e-06, "loss": 0.9266, "step": 253450 }, { "epoch": 1.6192836972771296, "grad_norm": 0.6855711936950684, "learning_rate": 8.7053245275039e-06, "loss": 1.1036, "step": 253460 }, { "epoch": 1.6193475844268683, "grad_norm": 0.8235642910003662, "learning_rate": 8.702495633498697e-06, "loss": 0.8896, "step": 253470 }, { "epoch": 1.619411471576607, "grad_norm": 0.8104490637779236, "learning_rate": 8.699667155395074e-06, "loss": 0.7131, "step": 253480 }, { "epoch": 1.6194753587263457, "grad_norm": 0.9141734838485718, "learning_rate": 8.696839093221542e-06, "loss": 0.6927, "step": 253490 }, { "epoch": 1.6195392458760844, "grad_norm": 0.9874205589294434, "learning_rate": 8.694011447006568e-06, "loss": 0.8266, "step": 253500 }, { "epoch": 1.6196031330258231, "grad_norm": 2.374424457550049, "learning_rate": 8.691184216778642e-06, "loss": 0.7966, "step": 253510 }, { "epoch": 1.6196670201755619, "grad_norm": 1.5083518028259277, "learning_rate": 8.68835740256621e-06, "loss": 1.0701, "step": 253520 }, { "epoch": 1.6197309073253006, "grad_norm": 0.8272247314453125, "learning_rate": 8.68553100439778e-06, "loss": 0.7651, "step": 253530 }, { "epoch": 1.6197947944750393, "grad_norm": 1.3498271703720093, "learning_rate": 8.682705022301779e-06, "loss": 0.8375, "step": 253540 }, { "epoch": 1.619858681624778, "grad_norm": 0.7562444806098938, "learning_rate": 8.679879456306695e-06, "loss": 0.952, "step": 253550 }, { "epoch": 1.6199225687745167, "grad_norm": 0.9159499406814575, "learning_rate": 8.677054306440956e-06, "loss": 0.9591, "step": 253560 }, { "epoch": 1.6199864559242554, "grad_norm": 0.5267454385757446, "learning_rate": 8.67422957273305e-06, "loss": 0.7717, "step": 253570 }, { "epoch": 1.620050343073994, "grad_norm": 1.2697906494140625, "learning_rate": 8.671405255211384e-06, "loss": 1.0003, "step": 253580 }, { "epoch": 1.6201142302237328, "grad_norm": 0.8089349865913391, "learning_rate": 8.668581353904436e-06, "loss": 0.6609, "step": 253590 }, { "epoch": 1.6201781173734715, "grad_norm": 0.5568481087684631, "learning_rate": 8.665757868840624e-06, "loss": 0.6861, "step": 253600 }, { "epoch": 1.6202420045232102, "grad_norm": 1.1049556732177734, "learning_rate": 8.662934800048395e-06, "loss": 0.7879, "step": 253610 }, { "epoch": 1.620305891672949, "grad_norm": 1.5867961645126343, "learning_rate": 8.660112147556165e-06, "loss": 0.5971, "step": 253620 }, { "epoch": 1.6203697788226876, "grad_norm": 0.8453254103660583, "learning_rate": 8.65728991139238e-06, "loss": 0.7813, "step": 253630 }, { "epoch": 1.6204336659724263, "grad_norm": 0.6466057896614075, "learning_rate": 8.654468091585455e-06, "loss": 0.6963, "step": 253640 }, { "epoch": 1.620497553122165, "grad_norm": 0.8569933176040649, "learning_rate": 8.65164668816379e-06, "loss": 0.9115, "step": 253650 }, { "epoch": 1.6205614402719037, "grad_norm": 1.1944561004638672, "learning_rate": 8.648825701155828e-06, "loss": 0.8411, "step": 253660 }, { "epoch": 1.6206253274216424, "grad_norm": 1.0112055540084839, "learning_rate": 8.646005130589951e-06, "loss": 0.7752, "step": 253670 }, { "epoch": 1.6206892145713812, "grad_norm": 0.755969762802124, "learning_rate": 8.643184976494595e-06, "loss": 0.918, "step": 253680 }, { "epoch": 1.6207531017211199, "grad_norm": 1.1352064609527588, "learning_rate": 8.64036523889813e-06, "loss": 0.7852, "step": 253690 }, { "epoch": 1.6208169888708586, "grad_norm": 1.1978332996368408, "learning_rate": 8.637545917828977e-06, "loss": 0.9255, "step": 253700 }, { "epoch": 1.6208808760205973, "grad_norm": 0.9351130127906799, "learning_rate": 8.634727013315513e-06, "loss": 1.0426, "step": 253710 }, { "epoch": 1.620944763170336, "grad_norm": 1.1238881349563599, "learning_rate": 8.631908525386146e-06, "loss": 1.0592, "step": 253720 }, { "epoch": 1.6210086503200745, "grad_norm": 0.7413579821586609, "learning_rate": 8.629090454069233e-06, "loss": 0.9098, "step": 253730 }, { "epoch": 1.6210725374698134, "grad_norm": 1.8136285543441772, "learning_rate": 8.626272799393188e-06, "loss": 0.807, "step": 253740 }, { "epoch": 1.6211364246195519, "grad_norm": 1.440377950668335, "learning_rate": 8.623455561386351e-06, "loss": 0.8329, "step": 253750 }, { "epoch": 1.6212003117692908, "grad_norm": 1.3558531999588013, "learning_rate": 8.620638740077125e-06, "loss": 1.0271, "step": 253760 }, { "epoch": 1.6212641989190293, "grad_norm": 1.0401147603988647, "learning_rate": 8.617822335493858e-06, "loss": 0.7169, "step": 253770 }, { "epoch": 1.6213280860687682, "grad_norm": 1.2938861846923828, "learning_rate": 8.615006347664917e-06, "loss": 1.0175, "step": 253780 }, { "epoch": 1.6213919732185067, "grad_norm": 1.347000241279602, "learning_rate": 8.612190776618678e-06, "loss": 1.2466, "step": 253790 }, { "epoch": 1.6214558603682456, "grad_norm": 1.1044390201568604, "learning_rate": 8.60937562238347e-06, "loss": 0.7113, "step": 253800 }, { "epoch": 1.6215197475179841, "grad_norm": 0.8815792202949524, "learning_rate": 8.606560884987674e-06, "loss": 0.9855, "step": 253810 }, { "epoch": 1.621583634667723, "grad_norm": 0.9015599489212036, "learning_rate": 8.603746564459603e-06, "loss": 0.8028, "step": 253820 }, { "epoch": 1.6216475218174615, "grad_norm": 0.8410263061523438, "learning_rate": 8.600932660827631e-06, "loss": 0.6916, "step": 253830 }, { "epoch": 1.6217114089672005, "grad_norm": 0.9172081351280212, "learning_rate": 8.598119174120072e-06, "loss": 0.8827, "step": 253840 }, { "epoch": 1.621775296116939, "grad_norm": 0.9500551819801331, "learning_rate": 8.595306104365281e-06, "loss": 0.8207, "step": 253850 }, { "epoch": 1.6218391832666779, "grad_norm": 1.037736177444458, "learning_rate": 8.592493451591566e-06, "loss": 0.6628, "step": 253860 }, { "epoch": 1.6219030704164163, "grad_norm": 0.6005664467811584, "learning_rate": 8.589681215827278e-06, "loss": 1.1371, "step": 253870 }, { "epoch": 1.6219669575661553, "grad_norm": 2.2810683250427246, "learning_rate": 8.58686939710071e-06, "loss": 0.8308, "step": 253880 }, { "epoch": 1.6220308447158938, "grad_norm": 0.8344201445579529, "learning_rate": 8.58405799544021e-06, "loss": 0.818, "step": 253890 }, { "epoch": 1.6220947318656327, "grad_norm": 1.553041696548462, "learning_rate": 8.58124701087406e-06, "loss": 0.7736, "step": 253900 }, { "epoch": 1.6221586190153712, "grad_norm": 0.7343198657035828, "learning_rate": 8.578436443430599e-06, "loss": 0.8195, "step": 253910 }, { "epoch": 1.62222250616511, "grad_norm": 0.99444979429245, "learning_rate": 8.575626293138105e-06, "loss": 0.7379, "step": 253920 }, { "epoch": 1.6222863933148486, "grad_norm": 1.0453051328659058, "learning_rate": 8.572816560024904e-06, "loss": 1.2449, "step": 253930 }, { "epoch": 1.6223502804645875, "grad_norm": 1.0699635744094849, "learning_rate": 8.570007244119271e-06, "loss": 0.7713, "step": 253940 }, { "epoch": 1.622414167614326, "grad_norm": 0.913071870803833, "learning_rate": 8.567198345449517e-06, "loss": 0.7638, "step": 253950 }, { "epoch": 1.6224780547640647, "grad_norm": 0.2786030173301697, "learning_rate": 8.564389864043909e-06, "loss": 0.684, "step": 253960 }, { "epoch": 1.6225419419138034, "grad_norm": 0.9149852395057678, "learning_rate": 8.561581799930752e-06, "loss": 0.7624, "step": 253970 }, { "epoch": 1.6226058290635421, "grad_norm": 0.7636930346488953, "learning_rate": 8.558774153138304e-06, "loss": 0.7989, "step": 253980 }, { "epoch": 1.6226697162132808, "grad_norm": 1.0884915590286255, "learning_rate": 8.555966923694848e-06, "loss": 0.9634, "step": 253990 }, { "epoch": 1.6227336033630195, "grad_norm": 0.7749305963516235, "learning_rate": 8.553160111628677e-06, "loss": 0.7077, "step": 254000 }, { "epoch": 1.6227974905127582, "grad_norm": 0.9463643431663513, "learning_rate": 8.55035371696803e-06, "loss": 0.8166, "step": 254010 }, { "epoch": 1.622861377662497, "grad_norm": 1.131610631942749, "learning_rate": 8.547547739741186e-06, "loss": 1.0242, "step": 254020 }, { "epoch": 1.6229252648122356, "grad_norm": 0.9619300961494446, "learning_rate": 8.54474217997639e-06, "loss": 0.9237, "step": 254030 }, { "epoch": 1.6229891519619744, "grad_norm": 1.0643653869628906, "learning_rate": 8.541937037701914e-06, "loss": 0.7191, "step": 254040 }, { "epoch": 1.623053039111713, "grad_norm": 0.9629873633384705, "learning_rate": 8.539132312945985e-06, "loss": 0.6617, "step": 254050 }, { "epoch": 1.6231169262614518, "grad_norm": 0.9147536754608154, "learning_rate": 8.536328005736876e-06, "loss": 0.7178, "step": 254060 }, { "epoch": 1.6231808134111905, "grad_norm": 1.4298889636993408, "learning_rate": 8.533804486274533e-06, "loss": 1.0717, "step": 254070 }, { "epoch": 1.6232447005609292, "grad_norm": 1.483441948890686, "learning_rate": 8.531000972482162e-06, "loss": 0.6687, "step": 254080 }, { "epoch": 1.6233085877106679, "grad_norm": 0.825210690498352, "learning_rate": 8.528197876318472e-06, "loss": 1.025, "step": 254090 }, { "epoch": 1.6233724748604066, "grad_norm": 1.3113638162612915, "learning_rate": 8.525395197811703e-06, "loss": 0.8856, "step": 254100 }, { "epoch": 1.6234363620101453, "grad_norm": 1.4372659921646118, "learning_rate": 8.522592936990103e-06, "loss": 0.8735, "step": 254110 }, { "epoch": 1.623500249159884, "grad_norm": 0.9195582270622253, "learning_rate": 8.519791093881862e-06, "loss": 0.7764, "step": 254120 }, { "epoch": 1.6235641363096227, "grad_norm": 0.6057882308959961, "learning_rate": 8.516989668515224e-06, "loss": 0.7851, "step": 254130 }, { "epoch": 1.6236280234593614, "grad_norm": 0.7561864256858826, "learning_rate": 8.514188660918377e-06, "loss": 0.8902, "step": 254140 }, { "epoch": 1.6236919106091001, "grad_norm": 1.9797289371490479, "learning_rate": 8.511388071119548e-06, "loss": 0.9164, "step": 254150 }, { "epoch": 1.6237557977588388, "grad_norm": 0.8142422437667847, "learning_rate": 8.50858789914692e-06, "loss": 0.7603, "step": 254160 }, { "epoch": 1.6238196849085775, "grad_norm": 1.075189232826233, "learning_rate": 8.505788145028725e-06, "loss": 0.8073, "step": 254170 }, { "epoch": 1.6238835720583162, "grad_norm": 0.8327805399894714, "learning_rate": 8.502988808793127e-06, "loss": 1.0442, "step": 254180 }, { "epoch": 1.623947459208055, "grad_norm": 0.9066655039787292, "learning_rate": 8.500189890468341e-06, "loss": 1.0807, "step": 254190 }, { "epoch": 1.6240113463577934, "grad_norm": 0.7944930791854858, "learning_rate": 8.497391390082538e-06, "loss": 1.1007, "step": 254200 }, { "epoch": 1.6240752335075324, "grad_norm": 1.1014735698699951, "learning_rate": 8.494593307663917e-06, "loss": 0.9846, "step": 254210 }, { "epoch": 1.6241391206572708, "grad_norm": 1.0590271949768066, "learning_rate": 8.491795643240635e-06, "loss": 1.0084, "step": 254220 }, { "epoch": 1.6242030078070098, "grad_norm": 1.487004041671753, "learning_rate": 8.488998396840896e-06, "loss": 0.8395, "step": 254230 }, { "epoch": 1.6242668949567483, "grad_norm": 0.5390102863311768, "learning_rate": 8.48620156849284e-06, "loss": 0.8143, "step": 254240 }, { "epoch": 1.6243307821064872, "grad_norm": 1.1597110033035278, "learning_rate": 8.483405158224666e-06, "loss": 0.8888, "step": 254250 }, { "epoch": 1.6243946692562257, "grad_norm": 1.2700620889663696, "learning_rate": 8.480609166064502e-06, "loss": 0.7846, "step": 254260 }, { "epoch": 1.6244585564059646, "grad_norm": 1.114956021308899, "learning_rate": 8.47781359204054e-06, "loss": 0.8657, "step": 254270 }, { "epoch": 1.624522443555703, "grad_norm": 1.4616273641586304, "learning_rate": 8.475018436180914e-06, "loss": 0.8768, "step": 254280 }, { "epoch": 1.624586330705442, "grad_norm": 1.03916597366333, "learning_rate": 8.472223698513765e-06, "loss": 1.1322, "step": 254290 }, { "epoch": 1.6246502178551805, "grad_norm": 0.8728145360946655, "learning_rate": 8.469429379067263e-06, "loss": 1.0256, "step": 254300 }, { "epoch": 1.6247141050049194, "grad_norm": 0.6270018815994263, "learning_rate": 8.466635477869523e-06, "loss": 0.9038, "step": 254310 }, { "epoch": 1.624777992154658, "grad_norm": 1.0088562965393066, "learning_rate": 8.463841994948707e-06, "loss": 1.1522, "step": 254320 }, { "epoch": 1.6248418793043968, "grad_norm": 1.4455088376998901, "learning_rate": 8.461048930332927e-06, "loss": 0.7958, "step": 254330 }, { "epoch": 1.6249057664541353, "grad_norm": 1.4557523727416992, "learning_rate": 8.458256284050325e-06, "loss": 0.7086, "step": 254340 }, { "epoch": 1.6249696536038742, "grad_norm": 1.1543890237808228, "learning_rate": 8.455464056129015e-06, "loss": 0.7853, "step": 254350 }, { "epoch": 1.6250335407536127, "grad_norm": 0.8756847977638245, "learning_rate": 8.452672246597132e-06, "loss": 0.9691, "step": 254360 }, { "epoch": 1.6250974279033517, "grad_norm": 0.8379682898521423, "learning_rate": 8.449880855482772e-06, "loss": 0.9012, "step": 254370 }, { "epoch": 1.6251613150530901, "grad_norm": 0.8428141474723816, "learning_rate": 8.447089882814074e-06, "loss": 0.7197, "step": 254380 }, { "epoch": 1.625225202202829, "grad_norm": 1.1772140264511108, "learning_rate": 8.444299328619116e-06, "loss": 1.1891, "step": 254390 }, { "epoch": 1.6252890893525676, "grad_norm": 1.0148288011550903, "learning_rate": 8.441509192926023e-06, "loss": 1.0552, "step": 254400 }, { "epoch": 1.6253529765023065, "grad_norm": 1.1200981140136719, "learning_rate": 8.438719475762873e-06, "loss": 0.9241, "step": 254410 }, { "epoch": 1.625416863652045, "grad_norm": 1.1166496276855469, "learning_rate": 8.435930177157775e-06, "loss": 0.6754, "step": 254420 }, { "epoch": 1.625480750801784, "grad_norm": 1.1475327014923096, "learning_rate": 8.43314129713883e-06, "loss": 0.8859, "step": 254430 }, { "epoch": 1.6255446379515224, "grad_norm": 1.413521647453308, "learning_rate": 8.4303528357341e-06, "loss": 0.942, "step": 254440 }, { "epoch": 1.625608525101261, "grad_norm": 1.0335079431533813, "learning_rate": 8.427564792971698e-06, "loss": 1.0567, "step": 254450 }, { "epoch": 1.6256724122509998, "grad_norm": 0.9358431696891785, "learning_rate": 8.424777168879667e-06, "loss": 0.9045, "step": 254460 }, { "epoch": 1.6257362994007385, "grad_norm": 1.4438246488571167, "learning_rate": 8.42198996348611e-06, "loss": 0.8445, "step": 254470 }, { "epoch": 1.6258001865504772, "grad_norm": 0.9652209877967834, "learning_rate": 8.419203176819068e-06, "loss": 0.868, "step": 254480 }, { "epoch": 1.625864073700216, "grad_norm": 0.8575952053070068, "learning_rate": 8.41641680890664e-06, "loss": 0.8968, "step": 254490 }, { "epoch": 1.6259279608499546, "grad_norm": 1.0472744703292847, "learning_rate": 8.413630859776855e-06, "loss": 0.6678, "step": 254500 }, { "epoch": 1.6259918479996933, "grad_norm": 0.7651621103286743, "learning_rate": 8.4108453294578e-06, "loss": 0.8726, "step": 254510 }, { "epoch": 1.626055735149432, "grad_norm": 0.8643214106559753, "learning_rate": 8.408060217977499e-06, "loss": 0.8884, "step": 254520 }, { "epoch": 1.6261196222991707, "grad_norm": 0.9992806911468506, "learning_rate": 8.40527552536402e-06, "loss": 0.8474, "step": 254530 }, { "epoch": 1.6261835094489094, "grad_norm": 0.8906753659248352, "learning_rate": 8.402491251645394e-06, "loss": 0.8043, "step": 254540 }, { "epoch": 1.6262473965986481, "grad_norm": 1.187233567237854, "learning_rate": 8.399707396849682e-06, "loss": 1.0656, "step": 254550 }, { "epoch": 1.6263112837483868, "grad_norm": 1.2150403261184692, "learning_rate": 8.396923961004888e-06, "loss": 0.76, "step": 254560 }, { "epoch": 1.6263751708981256, "grad_norm": 1.0672495365142822, "learning_rate": 8.394140944139079e-06, "loss": 0.9527, "step": 254570 }, { "epoch": 1.6264390580478643, "grad_norm": 0.9496098160743713, "learning_rate": 8.391358346280253e-06, "loss": 0.924, "step": 254580 }, { "epoch": 1.626502945197603, "grad_norm": 0.8727953433990479, "learning_rate": 8.388576167456453e-06, "loss": 1.0533, "step": 254590 }, { "epoch": 1.6265668323473417, "grad_norm": 1.1882212162017822, "learning_rate": 8.385794407695679e-06, "loss": 0.8036, "step": 254600 }, { "epoch": 1.6266307194970804, "grad_norm": 0.7046560645103455, "learning_rate": 8.38301306702597e-06, "loss": 0.6862, "step": 254610 }, { "epoch": 1.626694606646819, "grad_norm": 0.7783632874488831, "learning_rate": 8.380232145475314e-06, "loss": 0.923, "step": 254620 }, { "epoch": 1.6267584937965578, "grad_norm": 1.1140450239181519, "learning_rate": 8.377451643071722e-06, "loss": 0.8692, "step": 254630 }, { "epoch": 1.6268223809462965, "grad_norm": 0.5317877531051636, "learning_rate": 8.374671559843211e-06, "loss": 0.8846, "step": 254640 }, { "epoch": 1.6268862680960352, "grad_norm": 1.6173325777053833, "learning_rate": 8.371891895817763e-06, "loss": 1.2716, "step": 254650 }, { "epoch": 1.626950155245774, "grad_norm": 0.8460519909858704, "learning_rate": 8.369112651023386e-06, "loss": 0.8159, "step": 254660 }, { "epoch": 1.6270140423955126, "grad_norm": 0.9344705939292908, "learning_rate": 8.366333825488048e-06, "loss": 0.6481, "step": 254670 }, { "epoch": 1.6270779295452513, "grad_norm": 1.0719621181488037, "learning_rate": 8.363555419239754e-06, "loss": 0.9804, "step": 254680 }, { "epoch": 1.6271418166949898, "grad_norm": 0.5989803671836853, "learning_rate": 8.360777432306472e-06, "loss": 0.7973, "step": 254690 }, { "epoch": 1.6272057038447287, "grad_norm": 0.5963758826255798, "learning_rate": 8.357999864716192e-06, "loss": 0.9673, "step": 254700 }, { "epoch": 1.6272695909944672, "grad_norm": 1.047027349472046, "learning_rate": 8.355222716496874e-06, "loss": 0.8392, "step": 254710 }, { "epoch": 1.6273334781442061, "grad_norm": 1.8303179740905762, "learning_rate": 8.352445987676493e-06, "loss": 1.0897, "step": 254720 }, { "epoch": 1.6273973652939446, "grad_norm": 1.0254693031311035, "learning_rate": 8.349669678283006e-06, "loss": 1.1549, "step": 254730 }, { "epoch": 1.6274612524436836, "grad_norm": 0.9034429788589478, "learning_rate": 8.34689378834439e-06, "loss": 1.0297, "step": 254740 }, { "epoch": 1.627525139593422, "grad_norm": 0.9026135206222534, "learning_rate": 8.344118317888578e-06, "loss": 0.8806, "step": 254750 }, { "epoch": 1.627589026743161, "grad_norm": 0.9114247560501099, "learning_rate": 8.341343266943541e-06, "loss": 1.0349, "step": 254760 }, { "epoch": 1.6276529138928995, "grad_norm": 1.8976101875305176, "learning_rate": 8.338568635537214e-06, "loss": 1.2793, "step": 254770 }, { "epoch": 1.6277168010426384, "grad_norm": 0.6016636490821838, "learning_rate": 8.335794423697535e-06, "loss": 0.8066, "step": 254780 }, { "epoch": 1.6277806881923769, "grad_norm": 0.5638062357902527, "learning_rate": 8.333020631452465e-06, "loss": 0.6787, "step": 254790 }, { "epoch": 1.6278445753421158, "grad_norm": 0.8801025152206421, "learning_rate": 8.33024725882991e-06, "loss": 0.8708, "step": 254800 }, { "epoch": 1.6279084624918543, "grad_norm": 0.8659884333610535, "learning_rate": 8.327474305857824e-06, "loss": 0.9343, "step": 254810 }, { "epoch": 1.6279723496415932, "grad_norm": 1.0816649198532104, "learning_rate": 8.324701772564114e-06, "loss": 0.8127, "step": 254820 }, { "epoch": 1.6280362367913317, "grad_norm": 0.8168279528617859, "learning_rate": 8.321929658976724e-06, "loss": 0.8289, "step": 254830 }, { "epoch": 1.6281001239410706, "grad_norm": 1.38945734500885, "learning_rate": 8.319157965123542e-06, "loss": 0.6386, "step": 254840 }, { "epoch": 1.628164011090809, "grad_norm": 2.2664241790771484, "learning_rate": 8.316386691032518e-06, "loss": 0.9428, "step": 254850 }, { "epoch": 1.628227898240548, "grad_norm": 0.9218431115150452, "learning_rate": 8.313615836731525e-06, "loss": 0.7923, "step": 254860 }, { "epoch": 1.6282917853902865, "grad_norm": 1.1074163913726807, "learning_rate": 8.310845402248496e-06, "loss": 0.8064, "step": 254870 }, { "epoch": 1.6283556725400254, "grad_norm": 0.8705098628997803, "learning_rate": 8.308075387611309e-06, "loss": 0.6641, "step": 254880 }, { "epoch": 1.628419559689764, "grad_norm": 0.6592504382133484, "learning_rate": 8.305305792847884e-06, "loss": 0.8826, "step": 254890 }, { "epoch": 1.6284834468395029, "grad_norm": 0.715520441532135, "learning_rate": 8.30253661798609e-06, "loss": 0.7623, "step": 254900 }, { "epoch": 1.6285473339892413, "grad_norm": 1.0568970441818237, "learning_rate": 8.299767863053836e-06, "loss": 0.7528, "step": 254910 }, { "epoch": 1.6286112211389803, "grad_norm": 0.5976938009262085, "learning_rate": 8.296999528078985e-06, "loss": 0.8017, "step": 254920 }, { "epoch": 1.6286751082887188, "grad_norm": 1.312427282333374, "learning_rate": 8.294231613089438e-06, "loss": 1.0423, "step": 254930 }, { "epoch": 1.6287389954384575, "grad_norm": 0.9450312256813049, "learning_rate": 8.291464118113046e-06, "loss": 0.7212, "step": 254940 }, { "epoch": 1.6288028825881962, "grad_norm": 0.9719014763832092, "learning_rate": 8.288697043177695e-06, "loss": 0.8867, "step": 254950 }, { "epoch": 1.6288667697379349, "grad_norm": 2.3406286239624023, "learning_rate": 8.285930388311259e-06, "loss": 0.8943, "step": 254960 }, { "epoch": 1.6289306568876736, "grad_norm": 1.1877886056900024, "learning_rate": 8.283164153541583e-06, "loss": 0.8398, "step": 254970 }, { "epoch": 1.6289945440374123, "grad_norm": 1.5753027200698853, "learning_rate": 8.280398338896545e-06, "loss": 0.7947, "step": 254980 }, { "epoch": 1.629058431187151, "grad_norm": 1.7172194719314575, "learning_rate": 8.27763294440398e-06, "loss": 0.81, "step": 254990 }, { "epoch": 1.6291223183368897, "grad_norm": 1.424993872642517, "learning_rate": 8.274867970091755e-06, "loss": 1.0839, "step": 255000 }, { "epoch": 1.6291862054866284, "grad_norm": 1.0607999563217163, "learning_rate": 8.272103415987692e-06, "loss": 0.8039, "step": 255010 }, { "epoch": 1.629250092636367, "grad_norm": 1.1941611766815186, "learning_rate": 8.269339282119665e-06, "loss": 0.9033, "step": 255020 }, { "epoch": 1.6293139797861058, "grad_norm": 0.8026540875434875, "learning_rate": 8.266575568515478e-06, "loss": 0.7958, "step": 255030 }, { "epoch": 1.6293778669358445, "grad_norm": 1.5615814924240112, "learning_rate": 8.263812275202992e-06, "loss": 0.9509, "step": 255040 }, { "epoch": 1.6294417540855832, "grad_norm": 1.3261250257492065, "learning_rate": 8.261049402210014e-06, "loss": 0.8111, "step": 255050 }, { "epoch": 1.629505641235322, "grad_norm": 2.028870105743408, "learning_rate": 8.258286949564387e-06, "loss": 1.0351, "step": 255060 }, { "epoch": 1.6295695283850606, "grad_norm": 0.7149475812911987, "learning_rate": 8.255524917293912e-06, "loss": 0.8323, "step": 255070 }, { "epoch": 1.6296334155347993, "grad_norm": 0.812483549118042, "learning_rate": 8.252763305426425e-06, "loss": 0.7807, "step": 255080 }, { "epoch": 1.629697302684538, "grad_norm": 3.0065112113952637, "learning_rate": 8.250002113989712e-06, "loss": 0.9642, "step": 255090 }, { "epoch": 1.6297611898342768, "grad_norm": 1.015610933303833, "learning_rate": 8.24724134301162e-06, "loss": 0.8268, "step": 255100 }, { "epoch": 1.6298250769840155, "grad_norm": 0.6669619083404541, "learning_rate": 8.24448099251991e-06, "loss": 0.8502, "step": 255110 }, { "epoch": 1.6298889641337542, "grad_norm": 1.0304489135742188, "learning_rate": 8.241721062542413e-06, "loss": 0.7631, "step": 255120 }, { "epoch": 1.6299528512834929, "grad_norm": 0.6376091837882996, "learning_rate": 8.238961553106894e-06, "loss": 0.793, "step": 255130 }, { "epoch": 1.6300167384332316, "grad_norm": 1.7186837196350098, "learning_rate": 8.236202464241177e-06, "loss": 0.9544, "step": 255140 }, { "epoch": 1.6300806255829703, "grad_norm": 0.6518387794494629, "learning_rate": 8.233443795973023e-06, "loss": 0.7596, "step": 255150 }, { "epoch": 1.630144512732709, "grad_norm": 0.9873846769332886, "learning_rate": 8.230685548330219e-06, "loss": 1.0806, "step": 255160 }, { "epoch": 1.6302083998824477, "grad_norm": 0.8769903779029846, "learning_rate": 8.227927721340561e-06, "loss": 0.8805, "step": 255170 }, { "epoch": 1.6302722870321862, "grad_norm": 1.1980665922164917, "learning_rate": 8.225170315031794e-06, "loss": 0.7595, "step": 255180 }, { "epoch": 1.6303361741819251, "grad_norm": 0.5990476608276367, "learning_rate": 8.222413329431721e-06, "loss": 0.8834, "step": 255190 }, { "epoch": 1.6304000613316636, "grad_norm": 0.7708885669708252, "learning_rate": 8.219656764568067e-06, "loss": 0.6584, "step": 255200 }, { "epoch": 1.6304639484814025, "grad_norm": 1.4800968170166016, "learning_rate": 8.216900620468636e-06, "loss": 0.7687, "step": 255210 }, { "epoch": 1.630527835631141, "grad_norm": 1.000148057937622, "learning_rate": 8.214144897161147e-06, "loss": 0.8093, "step": 255220 }, { "epoch": 1.63059172278088, "grad_norm": 1.8174539804458618, "learning_rate": 8.21138959467338e-06, "loss": 0.7309, "step": 255230 }, { "epoch": 1.6306556099306184, "grad_norm": 0.9420324563980103, "learning_rate": 8.20863471303306e-06, "loss": 0.8949, "step": 255240 }, { "epoch": 1.6307194970803573, "grad_norm": 1.1732988357543945, "learning_rate": 8.205880252267966e-06, "loss": 0.7893, "step": 255250 }, { "epoch": 1.6307833842300958, "grad_norm": 1.243893027305603, "learning_rate": 8.2031262124058e-06, "loss": 0.8272, "step": 255260 }, { "epoch": 1.6308472713798348, "grad_norm": 0.9962188005447388, "learning_rate": 8.200372593474304e-06, "loss": 0.8322, "step": 255270 }, { "epoch": 1.6309111585295732, "grad_norm": 1.157586693763733, "learning_rate": 8.197619395501228e-06, "loss": 0.9957, "step": 255280 }, { "epoch": 1.6309750456793122, "grad_norm": 0.7285836338996887, "learning_rate": 8.19486661851428e-06, "loss": 0.7998, "step": 255290 }, { "epoch": 1.6310389328290507, "grad_norm": 1.6524395942687988, "learning_rate": 8.192114262541207e-06, "loss": 0.8012, "step": 255300 }, { "epoch": 1.6311028199787896, "grad_norm": 1.0645508766174316, "learning_rate": 8.189362327609695e-06, "loss": 0.8827, "step": 255310 }, { "epoch": 1.631166707128528, "grad_norm": 0.747715950012207, "learning_rate": 8.186610813747491e-06, "loss": 0.819, "step": 255320 }, { "epoch": 1.631230594278267, "grad_norm": 0.8673967123031616, "learning_rate": 8.183859720982274e-06, "loss": 0.9304, "step": 255330 }, { "epoch": 1.6312944814280055, "grad_norm": 1.3902219533920288, "learning_rate": 8.181109049341783e-06, "loss": 0.9032, "step": 255340 }, { "epoch": 1.6313583685777444, "grad_norm": 1.0869730710983276, "learning_rate": 8.178358798853686e-06, "loss": 1.0941, "step": 255350 }, { "epoch": 1.631422255727483, "grad_norm": 0.6030356287956238, "learning_rate": 8.175608969545711e-06, "loss": 0.9028, "step": 255360 }, { "epoch": 1.6314861428772218, "grad_norm": 0.7987911105155945, "learning_rate": 8.172859561445524e-06, "loss": 0.8724, "step": 255370 }, { "epoch": 1.6315500300269603, "grad_norm": 1.0909885168075562, "learning_rate": 8.170110574580841e-06, "loss": 0.9267, "step": 255380 }, { "epoch": 1.6316139171766992, "grad_norm": 1.3036298751831055, "learning_rate": 8.167362008979319e-06, "loss": 0.7328, "step": 255390 }, { "epoch": 1.6316778043264377, "grad_norm": 0.6408089995384216, "learning_rate": 8.164613864668663e-06, "loss": 0.6808, "step": 255400 }, { "epoch": 1.6317416914761766, "grad_norm": 1.4903781414031982, "learning_rate": 8.161866141676527e-06, "loss": 1.0693, "step": 255410 }, { "epoch": 1.6318055786259151, "grad_norm": 0.9320021867752075, "learning_rate": 8.159118840030606e-06, "loss": 0.8584, "step": 255420 }, { "epoch": 1.6318694657756538, "grad_norm": 0.4931786358356476, "learning_rate": 8.156371959758546e-06, "loss": 0.8167, "step": 255430 }, { "epoch": 1.6319333529253925, "grad_norm": 0.9139940738677979, "learning_rate": 8.153625500888028e-06, "loss": 1.234, "step": 255440 }, { "epoch": 1.6319972400751313, "grad_norm": 0.8138971924781799, "learning_rate": 8.150879463446692e-06, "loss": 0.8459, "step": 255450 }, { "epoch": 1.63206112722487, "grad_norm": 1.0304619073867798, "learning_rate": 8.14813384746222e-06, "loss": 0.7986, "step": 255460 }, { "epoch": 1.6321250143746087, "grad_norm": 1.0934109687805176, "learning_rate": 8.145388652962233e-06, "loss": 1.1236, "step": 255470 }, { "epoch": 1.6321889015243474, "grad_norm": 0.9255013465881348, "learning_rate": 8.142643879974394e-06, "loss": 0.9883, "step": 255480 }, { "epoch": 1.632252788674086, "grad_norm": 0.954413890838623, "learning_rate": 8.139899528526352e-06, "loss": 0.9778, "step": 255490 }, { "epoch": 1.6323166758238248, "grad_norm": 1.3113750219345093, "learning_rate": 8.137155598645724e-06, "loss": 0.8375, "step": 255500 }, { "epoch": 1.6323805629735635, "grad_norm": 1.5685514211654663, "learning_rate": 8.134412090360166e-06, "loss": 0.8376, "step": 255510 }, { "epoch": 1.6324444501233022, "grad_norm": 0.8365868330001831, "learning_rate": 8.13166900369729e-06, "loss": 0.6379, "step": 255520 }, { "epoch": 1.632508337273041, "grad_norm": 1.8244489431381226, "learning_rate": 8.128926338684734e-06, "loss": 1.0256, "step": 255530 }, { "epoch": 1.6325722244227796, "grad_norm": 1.310700535774231, "learning_rate": 8.126184095350109e-06, "loss": 1.0009, "step": 255540 }, { "epoch": 1.6326361115725183, "grad_norm": 0.7904416918754578, "learning_rate": 8.123442273721044e-06, "loss": 0.9701, "step": 255550 }, { "epoch": 1.632699998722257, "grad_norm": 0.6500728130340576, "learning_rate": 8.120700873825133e-06, "loss": 0.7332, "step": 255560 }, { "epoch": 1.6327638858719957, "grad_norm": 0.8715521693229675, "learning_rate": 8.117959895690003e-06, "loss": 0.8498, "step": 255570 }, { "epoch": 1.6328277730217344, "grad_norm": 0.9831506609916687, "learning_rate": 8.115219339343244e-06, "loss": 0.9128, "step": 255580 }, { "epoch": 1.6328916601714731, "grad_norm": 0.7139176726341248, "learning_rate": 8.112479204812468e-06, "loss": 0.9402, "step": 255590 }, { "epoch": 1.6329555473212118, "grad_norm": 0.9015781879425049, "learning_rate": 8.109739492125256e-06, "loss": 1.0385, "step": 255600 }, { "epoch": 1.6330194344709505, "grad_norm": 0.904238224029541, "learning_rate": 8.107000201309217e-06, "loss": 0.6729, "step": 255610 }, { "epoch": 1.6330833216206893, "grad_norm": 0.9031368494033813, "learning_rate": 8.104261332391922e-06, "loss": 1.0791, "step": 255620 }, { "epoch": 1.633147208770428, "grad_norm": 1.1262279748916626, "learning_rate": 8.101522885400964e-06, "loss": 1.0155, "step": 255630 }, { "epoch": 1.6332110959201667, "grad_norm": 1.1558489799499512, "learning_rate": 8.098784860363912e-06, "loss": 0.6596, "step": 255640 }, { "epoch": 1.6332749830699054, "grad_norm": 1.087398886680603, "learning_rate": 8.096047257308354e-06, "loss": 0.7462, "step": 255650 }, { "epoch": 1.633338870219644, "grad_norm": 1.2112863063812256, "learning_rate": 8.093310076261846e-06, "loss": 1.0399, "step": 255660 }, { "epoch": 1.6334027573693826, "grad_norm": 2.303938150405884, "learning_rate": 8.090573317251965e-06, "loss": 0.9191, "step": 255670 }, { "epoch": 1.6334666445191215, "grad_norm": 0.8003772497177124, "learning_rate": 8.087836980306262e-06, "loss": 0.6613, "step": 255680 }, { "epoch": 1.63353053166886, "grad_norm": 1.2863084077835083, "learning_rate": 8.085101065452293e-06, "loss": 0.9107, "step": 255690 }, { "epoch": 1.633594418818599, "grad_norm": 1.1990734338760376, "learning_rate": 8.082365572717638e-06, "loss": 0.6576, "step": 255700 }, { "epoch": 1.6336583059683374, "grad_norm": 1.2158446311950684, "learning_rate": 8.07963050212981e-06, "loss": 0.6629, "step": 255710 }, { "epoch": 1.6337221931180763, "grad_norm": 1.5338866710662842, "learning_rate": 8.076895853716381e-06, "loss": 0.9001, "step": 255720 }, { "epoch": 1.6337860802678148, "grad_norm": 1.4597996473312378, "learning_rate": 8.074161627504879e-06, "loss": 0.9187, "step": 255730 }, { "epoch": 1.6338499674175537, "grad_norm": 0.7120456099510193, "learning_rate": 8.071427823522837e-06, "loss": 0.7856, "step": 255740 }, { "epoch": 1.6339138545672922, "grad_norm": 0.7377516627311707, "learning_rate": 8.06869444179778e-06, "loss": 0.9781, "step": 255750 }, { "epoch": 1.6339777417170311, "grad_norm": 1.3690109252929688, "learning_rate": 8.065961482357264e-06, "loss": 0.8921, "step": 255760 }, { "epoch": 1.6340416288667696, "grad_norm": 0.9299448728561401, "learning_rate": 8.063228945228773e-06, "loss": 0.8308, "step": 255770 }, { "epoch": 1.6341055160165086, "grad_norm": 0.7954294085502625, "learning_rate": 8.060496830439867e-06, "loss": 0.8905, "step": 255780 }, { "epoch": 1.634169403166247, "grad_norm": 1.089705228805542, "learning_rate": 8.057765138018025e-06, "loss": 0.8922, "step": 255790 }, { "epoch": 1.634233290315986, "grad_norm": 1.4746631383895874, "learning_rate": 8.055033867990774e-06, "loss": 0.8673, "step": 255800 }, { "epoch": 1.6342971774657244, "grad_norm": 0.9421790242195129, "learning_rate": 8.052303020385632e-06, "loss": 0.7981, "step": 255810 }, { "epoch": 1.6343610646154634, "grad_norm": 0.873547375202179, "learning_rate": 8.049572595230071e-06, "loss": 0.5999, "step": 255820 }, { "epoch": 1.6344249517652019, "grad_norm": 0.8159425258636475, "learning_rate": 8.046842592551623e-06, "loss": 0.7421, "step": 255830 }, { "epoch": 1.6344888389149408, "grad_norm": 0.6584420800209045, "learning_rate": 8.044113012377752e-06, "loss": 0.7277, "step": 255840 }, { "epoch": 1.6345527260646793, "grad_norm": 1.2918460369110107, "learning_rate": 8.041383854735972e-06, "loss": 0.876, "step": 255850 }, { "epoch": 1.6346166132144182, "grad_norm": 1.1654072999954224, "learning_rate": 8.038655119653748e-06, "loss": 1.0023, "step": 255860 }, { "epoch": 1.6346805003641567, "grad_norm": 0.7087494134902954, "learning_rate": 8.035926807158573e-06, "loss": 0.9122, "step": 255870 }, { "epoch": 1.6347443875138956, "grad_norm": 0.6799043416976929, "learning_rate": 8.033198917277912e-06, "loss": 0.7897, "step": 255880 }, { "epoch": 1.634808274663634, "grad_norm": 1.0854226350784302, "learning_rate": 8.030471450039257e-06, "loss": 0.6717, "step": 255890 }, { "epoch": 1.6348721618133728, "grad_norm": 0.6955246329307556, "learning_rate": 8.02774440547005e-06, "loss": 0.9762, "step": 255900 }, { "epoch": 1.6349360489631115, "grad_norm": 0.7711451053619385, "learning_rate": 8.025017783597777e-06, "loss": 0.7392, "step": 255910 }, { "epoch": 1.6349999361128502, "grad_norm": 0.8822253942489624, "learning_rate": 8.02229158444988e-06, "loss": 0.8686, "step": 255920 }, { "epoch": 1.635063823262589, "grad_norm": 0.7996523976325989, "learning_rate": 8.019565808053836e-06, "loss": 0.8066, "step": 255930 }, { "epoch": 1.6351277104123276, "grad_norm": 1.005338191986084, "learning_rate": 8.016840454437063e-06, "loss": 1.337, "step": 255940 }, { "epoch": 1.6351915975620663, "grad_norm": 0.9166159629821777, "learning_rate": 8.014115523627046e-06, "loss": 1.0206, "step": 255950 }, { "epoch": 1.635255484711805, "grad_norm": 0.864742636680603, "learning_rate": 8.011391015651198e-06, "loss": 0.8222, "step": 255960 }, { "epoch": 1.6353193718615437, "grad_norm": 1.0380345582962036, "learning_rate": 8.008666930536972e-06, "loss": 0.8661, "step": 255970 }, { "epoch": 1.6353832590112825, "grad_norm": 0.8901452422142029, "learning_rate": 8.005943268311794e-06, "loss": 1.0178, "step": 255980 }, { "epoch": 1.6354471461610212, "grad_norm": 1.2338999509811401, "learning_rate": 8.003220029003106e-06, "loss": 0.7705, "step": 255990 }, { "epoch": 1.6355110333107599, "grad_norm": 0.8710522651672363, "learning_rate": 8.000497212638313e-06, "loss": 0.9058, "step": 256000 }, { "epoch": 1.6355749204604986, "grad_norm": 1.0031273365020752, "learning_rate": 7.997774819244846e-06, "loss": 0.9033, "step": 256010 }, { "epoch": 1.6356388076102373, "grad_norm": 1.399172067642212, "learning_rate": 7.995052848850137e-06, "loss": 0.8117, "step": 256020 }, { "epoch": 1.635702694759976, "grad_norm": 0.9795224070549011, "learning_rate": 7.992331301481575e-06, "loss": 0.9511, "step": 256030 }, { "epoch": 1.6357665819097147, "grad_norm": 0.6004918813705444, "learning_rate": 7.98961017716659e-06, "loss": 0.6345, "step": 256040 }, { "epoch": 1.6358304690594534, "grad_norm": 0.7056857943534851, "learning_rate": 7.986889475932558e-06, "loss": 0.9637, "step": 256050 }, { "epoch": 1.635894356209192, "grad_norm": 0.8062849640846252, "learning_rate": 7.984169197806912e-06, "loss": 1.091, "step": 256060 }, { "epoch": 1.6359582433589308, "grad_norm": 0.780800998210907, "learning_rate": 7.981449342817021e-06, "loss": 0.9206, "step": 256070 }, { "epoch": 1.6360221305086695, "grad_norm": 1.4013501405715942, "learning_rate": 7.979001835129851e-06, "loss": 0.9148, "step": 256080 }, { "epoch": 1.6360860176584082, "grad_norm": 1.1285284757614136, "learning_rate": 7.976282784173366e-06, "loss": 0.6931, "step": 256090 }, { "epoch": 1.636149904808147, "grad_norm": 0.704509437084198, "learning_rate": 7.97356415643209e-06, "loss": 0.8667, "step": 256100 }, { "epoch": 1.6362137919578856, "grad_norm": 1.3260267972946167, "learning_rate": 7.970845951933365e-06, "loss": 0.9538, "step": 256110 }, { "epoch": 1.6362776791076243, "grad_norm": 0.758438766002655, "learning_rate": 7.96812817070458e-06, "loss": 0.86, "step": 256120 }, { "epoch": 1.636341566257363, "grad_norm": 0.8738319873809814, "learning_rate": 7.965410812773122e-06, "loss": 0.7856, "step": 256130 }, { "epoch": 1.6364054534071018, "grad_norm": 0.633561372756958, "learning_rate": 7.962693878166328e-06, "loss": 0.8174, "step": 256140 }, { "epoch": 1.6364693405568405, "grad_norm": 0.9667708277702332, "learning_rate": 7.959977366911586e-06, "loss": 0.812, "step": 256150 }, { "epoch": 1.636533227706579, "grad_norm": 0.9830121994018555, "learning_rate": 7.957261279036227e-06, "loss": 1.0059, "step": 256160 }, { "epoch": 1.6365971148563179, "grad_norm": 0.9154394268989563, "learning_rate": 7.954545614567633e-06, "loss": 1.0548, "step": 256170 }, { "epoch": 1.6366610020060564, "grad_norm": 0.8336869478225708, "learning_rate": 7.951830373533132e-06, "loss": 0.7661, "step": 256180 }, { "epoch": 1.6367248891557953, "grad_norm": 0.7483879327774048, "learning_rate": 7.949115555960084e-06, "loss": 0.9512, "step": 256190 }, { "epoch": 1.6367887763055338, "grad_norm": 1.3023123741149902, "learning_rate": 7.946401161875811e-06, "loss": 1.2298, "step": 256200 }, { "epoch": 1.6368526634552727, "grad_norm": 0.709023654460907, "learning_rate": 7.943687191307669e-06, "loss": 1.3336, "step": 256210 }, { "epoch": 1.6369165506050112, "grad_norm": 2.153777837753296, "learning_rate": 7.940973644282967e-06, "loss": 0.85, "step": 256220 }, { "epoch": 1.63698043775475, "grad_norm": 0.7116622924804688, "learning_rate": 7.938260520829065e-06, "loss": 0.834, "step": 256230 }, { "epoch": 1.6370443249044886, "grad_norm": 2.8475935459136963, "learning_rate": 7.935547820973254e-06, "loss": 0.9491, "step": 256240 }, { "epoch": 1.6371082120542275, "grad_norm": 0.6584477424621582, "learning_rate": 7.932835544742877e-06, "loss": 0.9856, "step": 256250 }, { "epoch": 1.637172099203966, "grad_norm": 1.0373663902282715, "learning_rate": 7.930123692165231e-06, "loss": 0.7778, "step": 256260 }, { "epoch": 1.637235986353705, "grad_norm": 0.5286368727684021, "learning_rate": 7.927412263267641e-06, "loss": 0.8536, "step": 256270 }, { "epoch": 1.6372998735034434, "grad_norm": 1.3653309345245361, "learning_rate": 7.9247012580774e-06, "loss": 0.7506, "step": 256280 }, { "epoch": 1.6373637606531823, "grad_norm": 0.7942734956741333, "learning_rate": 7.921990676621832e-06, "loss": 1.1046, "step": 256290 }, { "epoch": 1.6374276478029208, "grad_norm": 0.7212092876434326, "learning_rate": 7.919280518928207e-06, "loss": 0.8769, "step": 256300 }, { "epoch": 1.6374915349526598, "grad_norm": 0.9169106483459473, "learning_rate": 7.916570785023841e-06, "loss": 0.6814, "step": 256310 }, { "epoch": 1.6375554221023982, "grad_norm": 0.7443297505378723, "learning_rate": 7.913861474936002e-06, "loss": 0.827, "step": 256320 }, { "epoch": 1.6376193092521372, "grad_norm": 1.0695321559906006, "learning_rate": 7.911152588691995e-06, "loss": 0.8919, "step": 256330 }, { "epoch": 1.6376831964018757, "grad_norm": 1.0362414121627808, "learning_rate": 7.908444126319098e-06, "loss": 0.8526, "step": 256340 }, { "epoch": 1.6377470835516146, "grad_norm": 0.7101940512657166, "learning_rate": 7.905736087844574e-06, "loss": 1.0854, "step": 256350 }, { "epoch": 1.637810970701353, "grad_norm": 1.1458754539489746, "learning_rate": 7.903028473295714e-06, "loss": 1.0137, "step": 256360 }, { "epoch": 1.637874857851092, "grad_norm": 0.9790529012680054, "learning_rate": 7.900321282699779e-06, "loss": 0.6549, "step": 256370 }, { "epoch": 1.6379387450008305, "grad_norm": 0.736199140548706, "learning_rate": 7.89761451608403e-06, "loss": 0.8473, "step": 256380 }, { "epoch": 1.6380026321505692, "grad_norm": 0.7104125618934631, "learning_rate": 7.894908173475712e-06, "loss": 0.9423, "step": 256390 }, { "epoch": 1.6380665193003079, "grad_norm": 1.0521243810653687, "learning_rate": 7.892202254902108e-06, "loss": 1.0501, "step": 256400 }, { "epoch": 1.6381304064500466, "grad_norm": 1.024814486503601, "learning_rate": 7.889496760390447e-06, "loss": 0.9319, "step": 256410 }, { "epoch": 1.6381942935997853, "grad_norm": 0.6387311816215515, "learning_rate": 7.886791689967993e-06, "loss": 0.8884, "step": 256420 }, { "epoch": 1.638258180749524, "grad_norm": 0.835491955280304, "learning_rate": 7.884087043661969e-06, "loss": 0.9697, "step": 256430 }, { "epoch": 1.6383220678992627, "grad_norm": 0.8031365871429443, "learning_rate": 7.881382821499621e-06, "loss": 0.811, "step": 256440 }, { "epoch": 1.6383859550490014, "grad_norm": 0.6929246187210083, "learning_rate": 7.8786790235082e-06, "loss": 0.9724, "step": 256450 }, { "epoch": 1.6384498421987401, "grad_norm": 0.7216389775276184, "learning_rate": 7.875975649714912e-06, "loss": 0.9006, "step": 256460 }, { "epoch": 1.6385137293484788, "grad_norm": 1.058194875717163, "learning_rate": 7.873272700147e-06, "loss": 0.7515, "step": 256470 }, { "epoch": 1.6385776164982175, "grad_norm": 0.9971916079521179, "learning_rate": 7.870570174831664e-06, "loss": 0.9915, "step": 256480 }, { "epoch": 1.6386415036479562, "grad_norm": 0.5907204747200012, "learning_rate": 7.867868073796154e-06, "loss": 0.8927, "step": 256490 }, { "epoch": 1.638705390797695, "grad_norm": 0.6885647177696228, "learning_rate": 7.865166397067642e-06, "loss": 0.722, "step": 256500 }, { "epoch": 1.6387692779474337, "grad_norm": 0.8568047285079956, "learning_rate": 7.86246514467337e-06, "loss": 0.8133, "step": 256510 }, { "epoch": 1.6388331650971724, "grad_norm": 1.0300815105438232, "learning_rate": 7.859764316640516e-06, "loss": 1.0747, "step": 256520 }, { "epoch": 1.638897052246911, "grad_norm": 0.8222336769104004, "learning_rate": 7.857063912996304e-06, "loss": 0.7396, "step": 256530 }, { "epoch": 1.6389609393966498, "grad_norm": 1.097979187965393, "learning_rate": 7.854363933767906e-06, "loss": 0.8752, "step": 256540 }, { "epoch": 1.6390248265463885, "grad_norm": 1.208530068397522, "learning_rate": 7.851664378982532e-06, "loss": 1.0752, "step": 256550 }, { "epoch": 1.6390887136961272, "grad_norm": 0.7187122702598572, "learning_rate": 7.84896524866735e-06, "loss": 0.7706, "step": 256560 }, { "epoch": 1.639152600845866, "grad_norm": 0.8158242106437683, "learning_rate": 7.846266542849574e-06, "loss": 0.994, "step": 256570 }, { "epoch": 1.6392164879956046, "grad_norm": 1.7641271352767944, "learning_rate": 7.843568261556339e-06, "loss": 0.8622, "step": 256580 }, { "epoch": 1.6392803751453433, "grad_norm": 1.0310348272323608, "learning_rate": 7.84087040481486e-06, "loss": 0.8935, "step": 256590 }, { "epoch": 1.639344262295082, "grad_norm": 1.2308247089385986, "learning_rate": 7.83817297265228e-06, "loss": 0.9992, "step": 256600 }, { "epoch": 1.6394081494448207, "grad_norm": 2.2752044200897217, "learning_rate": 7.835475965095779e-06, "loss": 0.9188, "step": 256610 }, { "epoch": 1.6394720365945594, "grad_norm": 0.6786547899246216, "learning_rate": 7.832779382172506e-06, "loss": 0.9933, "step": 256620 }, { "epoch": 1.639535923744298, "grad_norm": 1.5297602415084839, "learning_rate": 7.830083223909629e-06, "loss": 0.9466, "step": 256630 }, { "epoch": 1.6395998108940368, "grad_norm": 3.73483943939209, "learning_rate": 7.827387490334293e-06, "loss": 1.0265, "step": 256640 }, { "epoch": 1.6396636980437753, "grad_norm": 0.8407526016235352, "learning_rate": 7.824692181473642e-06, "loss": 1.0092, "step": 256650 }, { "epoch": 1.6397275851935142, "grad_norm": 1.1145086288452148, "learning_rate": 7.821997297354844e-06, "loss": 1.0603, "step": 256660 }, { "epoch": 1.6397914723432527, "grad_norm": 0.9100845456123352, "learning_rate": 7.819302838005011e-06, "loss": 1.0177, "step": 256670 }, { "epoch": 1.6398553594929917, "grad_norm": 1.0881621837615967, "learning_rate": 7.816608803451297e-06, "loss": 0.856, "step": 256680 }, { "epoch": 1.6399192466427301, "grad_norm": 1.6409400701522827, "learning_rate": 7.813915193720817e-06, "loss": 0.7758, "step": 256690 }, { "epoch": 1.639983133792469, "grad_norm": 0.9504512548446655, "learning_rate": 7.81122200884072e-06, "loss": 1.2167, "step": 256700 }, { "epoch": 1.6400470209422076, "grad_norm": 1.1367648839950562, "learning_rate": 7.808529248838103e-06, "loss": 0.7544, "step": 256710 }, { "epoch": 1.6401109080919465, "grad_norm": 1.4266903400421143, "learning_rate": 7.805836913740111e-06, "loss": 0.8999, "step": 256720 }, { "epoch": 1.640174795241685, "grad_norm": 0.9106956720352173, "learning_rate": 7.803145003573832e-06, "loss": 0.8518, "step": 256730 }, { "epoch": 1.640238682391424, "grad_norm": 0.6638666391372681, "learning_rate": 7.800453518366397e-06, "loss": 0.6891, "step": 256740 }, { "epoch": 1.6403025695411624, "grad_norm": 1.59250807762146, "learning_rate": 7.797762458144891e-06, "loss": 0.8653, "step": 256750 }, { "epoch": 1.6403664566909013, "grad_norm": 0.8077670931816101, "learning_rate": 7.795071822936446e-06, "loss": 0.8013, "step": 256760 }, { "epoch": 1.6404303438406398, "grad_norm": 1.235260009765625, "learning_rate": 7.792381612768123e-06, "loss": 0.955, "step": 256770 }, { "epoch": 1.6404942309903787, "grad_norm": 0.8226930499076843, "learning_rate": 7.789691827667045e-06, "loss": 0.945, "step": 256780 }, { "epoch": 1.6405581181401172, "grad_norm": 1.6601190567016602, "learning_rate": 7.78700246766027e-06, "loss": 0.6711, "step": 256790 }, { "epoch": 1.6406220052898561, "grad_norm": 1.1035475730895996, "learning_rate": 7.784313532774918e-06, "loss": 0.9102, "step": 256800 }, { "epoch": 1.6406858924395946, "grad_norm": 1.2023992538452148, "learning_rate": 7.781625023038036e-06, "loss": 0.7553, "step": 256810 }, { "epoch": 1.6407497795893335, "grad_norm": 0.8770223259925842, "learning_rate": 7.778936938476728e-06, "loss": 0.8356, "step": 256820 }, { "epoch": 1.640813666739072, "grad_norm": 1.4096797704696655, "learning_rate": 7.776249279118042e-06, "loss": 0.908, "step": 256830 }, { "epoch": 1.640877553888811, "grad_norm": 2.869856595993042, "learning_rate": 7.773562044989063e-06, "loss": 1.0478, "step": 256840 }, { "epoch": 1.6409414410385494, "grad_norm": 1.1328037977218628, "learning_rate": 7.770875236116843e-06, "loss": 0.7729, "step": 256850 }, { "epoch": 1.6410053281882884, "grad_norm": 0.991386890411377, "learning_rate": 7.768188852528434e-06, "loss": 0.8065, "step": 256860 }, { "epoch": 1.6410692153380269, "grad_norm": 0.6845223307609558, "learning_rate": 7.765502894250908e-06, "loss": 0.9615, "step": 256870 }, { "epoch": 1.6411331024877656, "grad_norm": 1.3230713605880737, "learning_rate": 7.762817361311298e-06, "loss": 0.9169, "step": 256880 }, { "epoch": 1.6411969896375043, "grad_norm": 0.6882030963897705, "learning_rate": 7.760132253736668e-06, "loss": 0.6916, "step": 256890 }, { "epoch": 1.641260876787243, "grad_norm": 1.0747628211975098, "learning_rate": 7.757447571554033e-06, "loss": 0.867, "step": 256900 }, { "epoch": 1.6413247639369817, "grad_norm": 1.1972609758377075, "learning_rate": 7.754763314790463e-06, "loss": 1.1478, "step": 256910 }, { "epoch": 1.6413886510867204, "grad_norm": 0.8115911483764648, "learning_rate": 7.752079483472958e-06, "loss": 0.8082, "step": 256920 }, { "epoch": 1.641452538236459, "grad_norm": 0.9861070513725281, "learning_rate": 7.749396077628579e-06, "loss": 0.7608, "step": 256930 }, { "epoch": 1.6415164253861978, "grad_norm": 1.068723440170288, "learning_rate": 7.74671309728432e-06, "loss": 0.8506, "step": 256940 }, { "epoch": 1.6415803125359365, "grad_norm": 7.141610145568848, "learning_rate": 7.744030542467222e-06, "loss": 0.7723, "step": 256950 }, { "epoch": 1.6416441996856752, "grad_norm": 1.6269599199295044, "learning_rate": 7.741348413204286e-06, "loss": 0.8175, "step": 256960 }, { "epoch": 1.641708086835414, "grad_norm": 1.107198715209961, "learning_rate": 7.73866670952253e-06, "loss": 0.968, "step": 256970 }, { "epoch": 1.6417719739851526, "grad_norm": 1.0261207818984985, "learning_rate": 7.735985431448972e-06, "loss": 0.9184, "step": 256980 }, { "epoch": 1.6418358611348913, "grad_norm": 0.8513238430023193, "learning_rate": 7.733304579010591e-06, "loss": 0.7687, "step": 256990 }, { "epoch": 1.64189974828463, "grad_norm": 1.1189079284667969, "learning_rate": 7.730624152234412e-06, "loss": 0.8642, "step": 257000 }, { "epoch": 1.6419636354343687, "grad_norm": 1.1294742822647095, "learning_rate": 7.727944151147403e-06, "loss": 0.9542, "step": 257010 }, { "epoch": 1.6420275225841074, "grad_norm": 1.1503225564956665, "learning_rate": 7.72526457577658e-06, "loss": 0.9426, "step": 257020 }, { "epoch": 1.6420914097338462, "grad_norm": 2.0296530723571777, "learning_rate": 7.722585426148903e-06, "loss": 0.8177, "step": 257030 }, { "epoch": 1.6421552968835849, "grad_norm": 0.9002004265785217, "learning_rate": 7.71990670229138e-06, "loss": 0.8094, "step": 257040 }, { "epoch": 1.6422191840333236, "grad_norm": 1.0578171014785767, "learning_rate": 7.717228404230964e-06, "loss": 0.7348, "step": 257050 }, { "epoch": 1.6422830711830623, "grad_norm": 1.2272627353668213, "learning_rate": 7.71455053199464e-06, "loss": 0.8838, "step": 257060 }, { "epoch": 1.642346958332801, "grad_norm": 0.7876251935958862, "learning_rate": 7.71214081108384e-06, "loss": 0.9401, "step": 257070 }, { "epoch": 1.6424108454825397, "grad_norm": 1.100836157798767, "learning_rate": 7.709463747987582e-06, "loss": 0.7681, "step": 257080 }, { "epoch": 1.6424747326322784, "grad_norm": 1.7621210813522339, "learning_rate": 7.706787110793607e-06, "loss": 1.1007, "step": 257090 }, { "epoch": 1.642538619782017, "grad_norm": 0.7386590242385864, "learning_rate": 7.704110899528872e-06, "loss": 0.8787, "step": 257100 }, { "epoch": 1.6426025069317558, "grad_norm": 0.6226412057876587, "learning_rate": 7.701435114220346e-06, "loss": 0.8882, "step": 257110 }, { "epoch": 1.6426663940814943, "grad_norm": 1.0297595262527466, "learning_rate": 7.698759754894946e-06, "loss": 0.9987, "step": 257120 }, { "epoch": 1.6427302812312332, "grad_norm": 0.6230430603027344, "learning_rate": 7.696084821579641e-06, "loss": 0.8812, "step": 257130 }, { "epoch": 1.6427941683809717, "grad_norm": 0.5937321186065674, "learning_rate": 7.693410314301352e-06, "loss": 1.0652, "step": 257140 }, { "epoch": 1.6428580555307106, "grad_norm": 0.8128228783607483, "learning_rate": 7.690736233087032e-06, "loss": 0.9578, "step": 257150 }, { "epoch": 1.642921942680449, "grad_norm": 0.7910133004188538, "learning_rate": 7.68806257796359e-06, "loss": 0.9108, "step": 257160 }, { "epoch": 1.642985829830188, "grad_norm": 1.0798237323760986, "learning_rate": 7.685389348957978e-06, "loss": 0.8957, "step": 257170 }, { "epoch": 1.6430497169799265, "grad_norm": 1.373158574104309, "learning_rate": 7.682716546097085e-06, "loss": 0.7722, "step": 257180 }, { "epoch": 1.6431136041296655, "grad_norm": 0.7850404977798462, "learning_rate": 7.680044169407858e-06, "loss": 0.7864, "step": 257190 }, { "epoch": 1.643177491279404, "grad_norm": 1.0319653749465942, "learning_rate": 7.677372218917189e-06, "loss": 0.7347, "step": 257200 }, { "epoch": 1.6432413784291429, "grad_norm": 1.0518642663955688, "learning_rate": 7.674700694652004e-06, "loss": 0.9759, "step": 257210 }, { "epoch": 1.6433052655788813, "grad_norm": 1.550307035446167, "learning_rate": 7.672029596639191e-06, "loss": 0.9258, "step": 257220 }, { "epoch": 1.6433691527286203, "grad_norm": 1.0691750049591064, "learning_rate": 7.669358924905673e-06, "loss": 0.8578, "step": 257230 }, { "epoch": 1.6434330398783588, "grad_norm": 1.184849739074707, "learning_rate": 7.666688679478317e-06, "loss": 0.8363, "step": 257240 }, { "epoch": 1.6434969270280977, "grad_norm": 0.7209656834602356, "learning_rate": 7.664018860384042e-06, "loss": 0.7998, "step": 257250 }, { "epoch": 1.6435608141778362, "grad_norm": 0.9371286630630493, "learning_rate": 7.661349467649714e-06, "loss": 0.7666, "step": 257260 }, { "epoch": 1.643624701327575, "grad_norm": 0.8568039536476135, "learning_rate": 7.658680501302235e-06, "loss": 1.1363, "step": 257270 }, { "epoch": 1.6436885884773136, "grad_norm": 0.6117787957191467, "learning_rate": 7.656011961368459e-06, "loss": 0.837, "step": 257280 }, { "epoch": 1.6437524756270525, "grad_norm": 0.845210611820221, "learning_rate": 7.653343847875277e-06, "loss": 1.0475, "step": 257290 }, { "epoch": 1.643816362776791, "grad_norm": 0.9108684659004211, "learning_rate": 7.650676160849568e-06, "loss": 0.9906, "step": 257300 }, { "epoch": 1.64388024992653, "grad_norm": 0.7374324202537537, "learning_rate": 7.648008900318176e-06, "loss": 0.9109, "step": 257310 }, { "epoch": 1.6439441370762684, "grad_norm": 1.5589121580123901, "learning_rate": 7.645342066307986e-06, "loss": 0.8385, "step": 257320 }, { "epoch": 1.6440080242260073, "grad_norm": 1.1741704940795898, "learning_rate": 7.642675658845839e-06, "loss": 0.7737, "step": 257330 }, { "epoch": 1.6440719113757458, "grad_norm": 1.1971560716629028, "learning_rate": 7.640009677958592e-06, "loss": 1.0186, "step": 257340 }, { "epoch": 1.6441357985254847, "grad_norm": 0.8373765349388123, "learning_rate": 7.63734412367309e-06, "loss": 0.9073, "step": 257350 }, { "epoch": 1.6441996856752232, "grad_norm": 0.7890423536300659, "learning_rate": 7.634678996016193e-06, "loss": 0.718, "step": 257360 }, { "epoch": 1.644263572824962, "grad_norm": 1.0626322031021118, "learning_rate": 7.632014295014717e-06, "loss": 1.2763, "step": 257370 }, { "epoch": 1.6443274599747006, "grad_norm": 0.793804407119751, "learning_rate": 7.62935002069552e-06, "loss": 0.8366, "step": 257380 }, { "epoch": 1.6443913471244394, "grad_norm": 1.9477283954620361, "learning_rate": 7.626686173085412e-06, "loss": 0.8928, "step": 257390 }, { "epoch": 1.644455234274178, "grad_norm": 0.6621059775352478, "learning_rate": 7.624022752211246e-06, "loss": 0.9907, "step": 257400 }, { "epoch": 1.6445191214239168, "grad_norm": 1.115936517715454, "learning_rate": 7.621359758099822e-06, "loss": 0.7577, "step": 257410 }, { "epoch": 1.6445830085736555, "grad_norm": 1.2295455932617188, "learning_rate": 7.618697190777979e-06, "loss": 0.7741, "step": 257420 }, { "epoch": 1.6446468957233942, "grad_norm": 0.9931457042694092, "learning_rate": 7.616035050272508e-06, "loss": 0.8288, "step": 257430 }, { "epoch": 1.6447107828731329, "grad_norm": 0.5664223432540894, "learning_rate": 7.613373336610241e-06, "loss": 0.6466, "step": 257440 }, { "epoch": 1.6447746700228716, "grad_norm": 0.9523608684539795, "learning_rate": 7.610712049817964e-06, "loss": 0.7106, "step": 257450 }, { "epoch": 1.6448385571726103, "grad_norm": 0.7553440928459167, "learning_rate": 7.608051189922499e-06, "loss": 0.8361, "step": 257460 }, { "epoch": 1.644902444322349, "grad_norm": 1.1481449604034424, "learning_rate": 7.60539075695062e-06, "loss": 1.0126, "step": 257470 }, { "epoch": 1.6449663314720877, "grad_norm": 0.599646270275116, "learning_rate": 7.6027307509291486e-06, "loss": 0.9073, "step": 257480 }, { "epoch": 1.6450302186218264, "grad_norm": 1.2230756282806396, "learning_rate": 7.60007117188486e-06, "loss": 0.9898, "step": 257490 }, { "epoch": 1.6450941057715651, "grad_norm": 0.9773632287979126, "learning_rate": 7.597412019844513e-06, "loss": 1.0886, "step": 257500 }, { "epoch": 1.6451579929213038, "grad_norm": 1.1554090976715088, "learning_rate": 7.594753294834933e-06, "loss": 0.8617, "step": 257510 }, { "epoch": 1.6452218800710425, "grad_norm": 0.9558780193328857, "learning_rate": 7.592094996882854e-06, "loss": 0.8919, "step": 257520 }, { "epoch": 1.6452857672207812, "grad_norm": 0.7920116186141968, "learning_rate": 7.589437126015081e-06, "loss": 0.8181, "step": 257530 }, { "epoch": 1.64534965437052, "grad_norm": 0.9481412172317505, "learning_rate": 7.586779682258355e-06, "loss": 0.9225, "step": 257540 }, { "epoch": 1.6454135415202586, "grad_norm": 1.214705228805542, "learning_rate": 7.5841226656394645e-06, "loss": 0.8409, "step": 257550 }, { "epoch": 1.6454774286699974, "grad_norm": 0.7767091989517212, "learning_rate": 7.58146607618514e-06, "loss": 0.9165, "step": 257560 }, { "epoch": 1.645541315819736, "grad_norm": 0.6727650761604309, "learning_rate": 7.578809913922158e-06, "loss": 0.7626, "step": 257570 }, { "epoch": 1.6456052029694748, "grad_norm": 0.9648169279098511, "learning_rate": 7.576154178877248e-06, "loss": 0.9397, "step": 257580 }, { "epoch": 1.6456690901192135, "grad_norm": 1.433490514755249, "learning_rate": 7.5734988710771836e-06, "loss": 0.8868, "step": 257590 }, { "epoch": 1.6457329772689522, "grad_norm": 1.0735435485839844, "learning_rate": 7.570843990548676e-06, "loss": 0.9415, "step": 257600 }, { "epoch": 1.6457968644186907, "grad_norm": 0.9692497849464417, "learning_rate": 7.568189537318487e-06, "loss": 0.9294, "step": 257610 }, { "epoch": 1.6458607515684296, "grad_norm": 1.3035361766815186, "learning_rate": 7.5655355114133245e-06, "loss": 0.8981, "step": 257620 }, { "epoch": 1.645924638718168, "grad_norm": 1.2619551420211792, "learning_rate": 7.562881912859937e-06, "loss": 0.9399, "step": 257630 }, { "epoch": 1.645988525867907, "grad_norm": 3.850501775741577, "learning_rate": 7.560228741685049e-06, "loss": 0.7043, "step": 257640 }, { "epoch": 1.6460524130176455, "grad_norm": 0.9732416272163391, "learning_rate": 7.557575997915362e-06, "loss": 0.7152, "step": 257650 }, { "epoch": 1.6461163001673844, "grad_norm": 1.1814053058624268, "learning_rate": 7.554923681577614e-06, "loss": 0.8326, "step": 257660 }, { "epoch": 1.646180187317123, "grad_norm": 2.915731430053711, "learning_rate": 7.5522717926984895e-06, "loss": 0.8365, "step": 257670 }, { "epoch": 1.6462440744668618, "grad_norm": 0.9403936266899109, "learning_rate": 7.54962033130473e-06, "loss": 0.8559, "step": 257680 }, { "epoch": 1.6463079616166003, "grad_norm": 0.9900381565093994, "learning_rate": 7.546969297423001e-06, "loss": 0.9193, "step": 257690 }, { "epoch": 1.6463718487663392, "grad_norm": 1.1494392156600952, "learning_rate": 7.544318691080032e-06, "loss": 1.0322, "step": 257700 }, { "epoch": 1.6464357359160777, "grad_norm": 0.9025363326072693, "learning_rate": 7.541668512302491e-06, "loss": 0.8881, "step": 257710 }, { "epoch": 1.6464996230658167, "grad_norm": 1.2116295099258423, "learning_rate": 7.539018761117095e-06, "loss": 0.9982, "step": 257720 }, { "epoch": 1.6465635102155551, "grad_norm": 1.2075427770614624, "learning_rate": 7.5363694375505e-06, "loss": 0.8318, "step": 257730 }, { "epoch": 1.646627397365294, "grad_norm": 0.9334720969200134, "learning_rate": 7.5337205416294076e-06, "loss": 0.8711, "step": 257740 }, { "epoch": 1.6466912845150325, "grad_norm": 1.1920677423477173, "learning_rate": 7.53107207338048e-06, "loss": 0.9515, "step": 257750 }, { "epoch": 1.6467551716647715, "grad_norm": 0.9365494847297668, "learning_rate": 7.528424032830411e-06, "loss": 1.0222, "step": 257760 }, { "epoch": 1.64681905881451, "grad_norm": 1.1577626466751099, "learning_rate": 7.5257764200058424e-06, "loss": 0.9483, "step": 257770 }, { "epoch": 1.646882945964249, "grad_norm": 4.188989162445068, "learning_rate": 7.5231292349334625e-06, "loss": 0.8343, "step": 257780 }, { "epoch": 1.6469468331139874, "grad_norm": 1.5819982290267944, "learning_rate": 7.520482477639906e-06, "loss": 0.901, "step": 257790 }, { "epoch": 1.6470107202637263, "grad_norm": 0.8653442859649658, "learning_rate": 7.517836148151852e-06, "loss": 0.8159, "step": 257800 }, { "epoch": 1.6470746074134648, "grad_norm": 1.4531724452972412, "learning_rate": 7.5151902464959344e-06, "loss": 1.0052, "step": 257810 }, { "epoch": 1.6471384945632037, "grad_norm": 0.883104145526886, "learning_rate": 7.512544772698799e-06, "loss": 0.868, "step": 257820 }, { "epoch": 1.6472023817129422, "grad_norm": 0.8851460814476013, "learning_rate": 7.509899726787106e-06, "loss": 0.6053, "step": 257830 }, { "epoch": 1.6472662688626811, "grad_norm": 0.77079176902771, "learning_rate": 7.507255108787475e-06, "loss": 1.041, "step": 257840 }, { "epoch": 1.6473301560124196, "grad_norm": 0.7836570739746094, "learning_rate": 7.504610918726557e-06, "loss": 0.9428, "step": 257850 }, { "epoch": 1.6473940431621583, "grad_norm": 1.032909631729126, "learning_rate": 7.501967156630957e-06, "loss": 0.7965, "step": 257860 }, { "epoch": 1.647457930311897, "grad_norm": 0.612842321395874, "learning_rate": 7.499323822527327e-06, "loss": 0.9013, "step": 257870 }, { "epoch": 1.6475218174616357, "grad_norm": 1.1342487335205078, "learning_rate": 7.496680916442262e-06, "loss": 0.8627, "step": 257880 }, { "epoch": 1.6475857046113744, "grad_norm": 0.634815514087677, "learning_rate": 7.494038438402401e-06, "loss": 0.7458, "step": 257890 }, { "epoch": 1.6476495917611131, "grad_norm": 1.1310441493988037, "learning_rate": 7.491396388434336e-06, "loss": 0.8711, "step": 257900 }, { "epoch": 1.6477134789108518, "grad_norm": 1.735602855682373, "learning_rate": 7.488754766564698e-06, "loss": 0.7482, "step": 257910 }, { "epoch": 1.6477773660605906, "grad_norm": 1.2173813581466675, "learning_rate": 7.486113572820064e-06, "loss": 0.7555, "step": 257920 }, { "epoch": 1.6478412532103293, "grad_norm": 0.9211714267730713, "learning_rate": 7.48347280722706e-06, "loss": 0.9156, "step": 257930 }, { "epoch": 1.647905140360068, "grad_norm": 0.8173043131828308, "learning_rate": 7.480832469812249e-06, "loss": 0.8714, "step": 257940 }, { "epoch": 1.6479690275098067, "grad_norm": 1.0287995338439941, "learning_rate": 7.478192560602254e-06, "loss": 0.9726, "step": 257950 }, { "epoch": 1.6480329146595454, "grad_norm": 1.5631654262542725, "learning_rate": 7.475553079623637e-06, "loss": 1.0207, "step": 257960 }, { "epoch": 1.648096801809284, "grad_norm": 0.9244776964187622, "learning_rate": 7.472914026903e-06, "loss": 0.7689, "step": 257970 }, { "epoch": 1.6481606889590228, "grad_norm": 1.429133653640747, "learning_rate": 7.470275402466909e-06, "loss": 0.8569, "step": 257980 }, { "epoch": 1.6482245761087615, "grad_norm": 0.9924103021621704, "learning_rate": 7.467637206341927e-06, "loss": 0.7181, "step": 257990 }, { "epoch": 1.6482884632585002, "grad_norm": 1.2423604726791382, "learning_rate": 7.464999438554643e-06, "loss": 1.1156, "step": 258000 }, { "epoch": 1.648352350408239, "grad_norm": 1.2210172414779663, "learning_rate": 7.462362099131603e-06, "loss": 0.7794, "step": 258010 }, { "epoch": 1.6484162375579776, "grad_norm": 0.8788898587226868, "learning_rate": 7.459725188099387e-06, "loss": 0.6866, "step": 258020 }, { "epoch": 1.6484801247077163, "grad_norm": 1.631868839263916, "learning_rate": 7.457088705484533e-06, "loss": 1.0881, "step": 258030 }, { "epoch": 1.648544011857455, "grad_norm": 0.6207049489021301, "learning_rate": 7.4544526513136095e-06, "loss": 0.6963, "step": 258040 }, { "epoch": 1.6486078990071937, "grad_norm": 0.7716155648231506, "learning_rate": 7.451817025613145e-06, "loss": 0.7368, "step": 258050 }, { "epoch": 1.6486717861569324, "grad_norm": 0.9051589965820312, "learning_rate": 7.449181828409702e-06, "loss": 0.9487, "step": 258060 }, { "epoch": 1.6487356733066711, "grad_norm": 0.8805955648422241, "learning_rate": 7.446547059729802e-06, "loss": 0.759, "step": 258070 }, { "epoch": 1.6487995604564099, "grad_norm": 0.9993048906326294, "learning_rate": 7.443912719599993e-06, "loss": 0.8764, "step": 258080 }, { "epoch": 1.6488634476061486, "grad_norm": 0.953590989112854, "learning_rate": 7.441278808046792e-06, "loss": 0.885, "step": 258090 }, { "epoch": 1.648927334755887, "grad_norm": 0.9039053320884705, "learning_rate": 7.438645325096743e-06, "loss": 1.1734, "step": 258100 }, { "epoch": 1.648991221905626, "grad_norm": 0.8109670877456665, "learning_rate": 7.436012270776343e-06, "loss": 0.7155, "step": 258110 }, { "epoch": 1.6490551090553645, "grad_norm": 1.853824496269226, "learning_rate": 7.4333796451121374e-06, "loss": 0.8389, "step": 258120 }, { "epoch": 1.6491189962051034, "grad_norm": 1.172304630279541, "learning_rate": 7.430747448130609e-06, "loss": 0.9715, "step": 258130 }, { "epoch": 1.6491828833548419, "grad_norm": 0.904864490032196, "learning_rate": 7.428115679858283e-06, "loss": 0.8458, "step": 258140 }, { "epoch": 1.6492467705045808, "grad_norm": 1.2634391784667969, "learning_rate": 7.425484340321676e-06, "loss": 0.8386, "step": 258150 }, { "epoch": 1.6493106576543193, "grad_norm": 1.1437947750091553, "learning_rate": 7.422853429547255e-06, "loss": 0.8137, "step": 258160 }, { "epoch": 1.6493745448040582, "grad_norm": 1.268520474433899, "learning_rate": 7.420222947561556e-06, "loss": 1.2592, "step": 258170 }, { "epoch": 1.6494384319537967, "grad_norm": 0.8500333428382874, "learning_rate": 7.417592894391029e-06, "loss": 0.9249, "step": 258180 }, { "epoch": 1.6495023191035356, "grad_norm": 0.9268811345100403, "learning_rate": 7.414963270062198e-06, "loss": 0.8247, "step": 258190 }, { "epoch": 1.649566206253274, "grad_norm": 0.7710387110710144, "learning_rate": 7.412334074601513e-06, "loss": 0.8659, "step": 258200 }, { "epoch": 1.649630093403013, "grad_norm": 0.9998272061347961, "learning_rate": 7.409705308035475e-06, "loss": 0.9299, "step": 258210 }, { "epoch": 1.6496939805527515, "grad_norm": 1.1036978960037231, "learning_rate": 7.407076970390547e-06, "loss": 0.8578, "step": 258220 }, { "epoch": 1.6497578677024904, "grad_norm": 0.7306403517723083, "learning_rate": 7.404449061693208e-06, "loss": 0.738, "step": 258230 }, { "epoch": 1.649821754852229, "grad_norm": 0.7348700761795044, "learning_rate": 7.401821581969909e-06, "loss": 0.8739, "step": 258240 }, { "epoch": 1.6498856420019679, "grad_norm": 0.921229898929596, "learning_rate": 7.399194531247128e-06, "loss": 0.8938, "step": 258250 }, { "epoch": 1.6499495291517063, "grad_norm": 1.5429316759109497, "learning_rate": 7.396567909551305e-06, "loss": 0.8227, "step": 258260 }, { "epoch": 1.6500134163014453, "grad_norm": 0.7975358366966248, "learning_rate": 7.3939417169089085e-06, "loss": 0.7359, "step": 258270 }, { "epoch": 1.6500773034511838, "grad_norm": 0.6854422092437744, "learning_rate": 7.3913159533463675e-06, "loss": 0.7961, "step": 258280 }, { "epoch": 1.6501411906009227, "grad_norm": 1.200284719467163, "learning_rate": 7.388690618890148e-06, "loss": 0.8829, "step": 258290 }, { "epoch": 1.6502050777506612, "grad_norm": 0.825060248374939, "learning_rate": 7.38606571356667e-06, "loss": 1.0262, "step": 258300 }, { "epoch": 1.6502689649004, "grad_norm": 0.9735358357429504, "learning_rate": 7.383441237402383e-06, "loss": 0.9383, "step": 258310 }, { "epoch": 1.6503328520501386, "grad_norm": 1.1090470552444458, "learning_rate": 7.380817190423706e-06, "loss": 1.0839, "step": 258320 }, { "epoch": 1.6503967391998773, "grad_norm": 1.1171561479568481, "learning_rate": 7.378193572657077e-06, "loss": 0.8753, "step": 258330 }, { "epoch": 1.650460626349616, "grad_norm": 0.9593927264213562, "learning_rate": 7.375570384128905e-06, "loss": 0.8434, "step": 258340 }, { "epoch": 1.6505245134993547, "grad_norm": 0.9938549995422363, "learning_rate": 7.372947624865612e-06, "loss": 0.761, "step": 258350 }, { "epoch": 1.6505884006490934, "grad_norm": 0.8092223405838013, "learning_rate": 7.370325294893621e-06, "loss": 0.9039, "step": 258360 }, { "epoch": 1.650652287798832, "grad_norm": 2.7092342376708984, "learning_rate": 7.367703394239328e-06, "loss": 0.8425, "step": 258370 }, { "epoch": 1.6507161749485708, "grad_norm": 1.0019803047180176, "learning_rate": 7.3650819229291565e-06, "loss": 0.8753, "step": 258380 }, { "epoch": 1.6507800620983095, "grad_norm": 1.1598658561706543, "learning_rate": 7.36246088098948e-06, "loss": 0.9008, "step": 258390 }, { "epoch": 1.6508439492480482, "grad_norm": 1.3171591758728027, "learning_rate": 7.359840268446722e-06, "loss": 0.9998, "step": 258400 }, { "epoch": 1.650907836397787, "grad_norm": 0.9848918914794922, "learning_rate": 7.357220085327249e-06, "loss": 0.7861, "step": 258410 }, { "epoch": 1.6509717235475256, "grad_norm": 0.9984748363494873, "learning_rate": 7.354600331657468e-06, "loss": 0.8645, "step": 258420 }, { "epoch": 1.6510356106972643, "grad_norm": 1.6876024007797241, "learning_rate": 7.351981007463754e-06, "loss": 1.0606, "step": 258430 }, { "epoch": 1.651099497847003, "grad_norm": 0.5669913291931152, "learning_rate": 7.34936211277249e-06, "loss": 0.9474, "step": 258440 }, { "epoch": 1.6511633849967418, "grad_norm": 1.1290777921676636, "learning_rate": 7.346743647610038e-06, "loss": 0.9748, "step": 258450 }, { "epoch": 1.6512272721464805, "grad_norm": 0.7757238149642944, "learning_rate": 7.344125612002794e-06, "loss": 1.1126, "step": 258460 }, { "epoch": 1.6512911592962192, "grad_norm": 2.1162233352661133, "learning_rate": 7.341508005977104e-06, "loss": 0.787, "step": 258470 }, { "epoch": 1.6513550464459579, "grad_norm": 0.9174578785896301, "learning_rate": 7.338890829559319e-06, "loss": 0.6748, "step": 258480 }, { "epoch": 1.6514189335956966, "grad_norm": 1.0966631174087524, "learning_rate": 7.336274082775829e-06, "loss": 1.0263, "step": 258490 }, { "epoch": 1.6514828207454353, "grad_norm": 0.8177943229675293, "learning_rate": 7.333657765652951e-06, "loss": 1.035, "step": 258500 }, { "epoch": 1.651546707895174, "grad_norm": 0.8279902338981628, "learning_rate": 7.331041878217065e-06, "loss": 1.0372, "step": 258510 }, { "epoch": 1.6516105950449127, "grad_norm": 0.7911148071289062, "learning_rate": 7.328426420494488e-06, "loss": 1.1565, "step": 258520 }, { "epoch": 1.6516744821946514, "grad_norm": 0.9187765717506409, "learning_rate": 7.325811392511583e-06, "loss": 0.7033, "step": 258530 }, { "epoch": 1.6517383693443901, "grad_norm": 1.4142966270446777, "learning_rate": 7.323196794294668e-06, "loss": 0.9938, "step": 258540 }, { "epoch": 1.6518022564941288, "grad_norm": 2.9758827686309814, "learning_rate": 7.320582625870092e-06, "loss": 1.0167, "step": 258550 }, { "epoch": 1.6518661436438675, "grad_norm": 0.7503261566162109, "learning_rate": 7.3179688872641595e-06, "loss": 0.7175, "step": 258560 }, { "epoch": 1.651930030793606, "grad_norm": 0.5419626235961914, "learning_rate": 7.315355578503219e-06, "loss": 0.7395, "step": 258570 }, { "epoch": 1.651993917943345, "grad_norm": 0.9171911478042603, "learning_rate": 7.312742699613561e-06, "loss": 0.6511, "step": 258580 }, { "epoch": 1.6520578050930834, "grad_norm": 3.700895071029663, "learning_rate": 7.310130250621533e-06, "loss": 0.8161, "step": 258590 }, { "epoch": 1.6521216922428223, "grad_norm": 0.655987560749054, "learning_rate": 7.307518231553406e-06, "loss": 0.7787, "step": 258600 }, { "epoch": 1.6521855793925608, "grad_norm": 1.6462640762329102, "learning_rate": 7.304906642435516e-06, "loss": 1.0206, "step": 258610 }, { "epoch": 1.6522494665422998, "grad_norm": 0.9825352430343628, "learning_rate": 7.302295483294147e-06, "loss": 0.9565, "step": 258620 }, { "epoch": 1.6523133536920382, "grad_norm": 1.0422539710998535, "learning_rate": 7.299684754155606e-06, "loss": 1.143, "step": 258630 }, { "epoch": 1.6523772408417772, "grad_norm": 1.152791142463684, "learning_rate": 7.297074455046171e-06, "loss": 1.1539, "step": 258640 }, { "epoch": 1.6524411279915157, "grad_norm": 0.7904389500617981, "learning_rate": 7.294464585992156e-06, "loss": 0.8516, "step": 258650 }, { "epoch": 1.6525050151412546, "grad_norm": 1.0914205312728882, "learning_rate": 7.29185514701981e-06, "loss": 0.6568, "step": 258660 }, { "epoch": 1.652568902290993, "grad_norm": 0.9571302533149719, "learning_rate": 7.289246138155436e-06, "loss": 1.0887, "step": 258670 }, { "epoch": 1.652632789440732, "grad_norm": 0.8392852544784546, "learning_rate": 7.286637559425313e-06, "loss": 0.9472, "step": 258680 }, { "epoch": 1.6526966765904705, "grad_norm": 1.3355880975723267, "learning_rate": 7.284029410855686e-06, "loss": 0.7675, "step": 258690 }, { "epoch": 1.6527605637402094, "grad_norm": 0.7684211134910583, "learning_rate": 7.281421692472856e-06, "loss": 0.879, "step": 258700 }, { "epoch": 1.652824450889948, "grad_norm": 0.7331956028938293, "learning_rate": 7.278814404303047e-06, "loss": 0.7309, "step": 258710 }, { "epoch": 1.6528883380396868, "grad_norm": 0.8413065671920776, "learning_rate": 7.276207546372555e-06, "loss": 0.8249, "step": 258720 }, { "epoch": 1.6529522251894253, "grad_norm": 0.8433711528778076, "learning_rate": 7.273601118707601e-06, "loss": 1.0195, "step": 258730 }, { "epoch": 1.6530161123391642, "grad_norm": 1.394230842590332, "learning_rate": 7.270995121334456e-06, "loss": 0.882, "step": 258740 }, { "epoch": 1.6530799994889027, "grad_norm": 1.0308012962341309, "learning_rate": 7.268389554279348e-06, "loss": 0.7528, "step": 258750 }, { "epoch": 1.6531438866386416, "grad_norm": 0.8747785091400146, "learning_rate": 7.265784417568533e-06, "loss": 1.0698, "step": 258760 }, { "epoch": 1.6532077737883801, "grad_norm": 4.951910495758057, "learning_rate": 7.26317971122823e-06, "loss": 1.1276, "step": 258770 }, { "epoch": 1.653271660938119, "grad_norm": 1.2269554138183594, "learning_rate": 7.260575435284694e-06, "loss": 1.0913, "step": 258780 }, { "epoch": 1.6533355480878575, "grad_norm": 1.137904405593872, "learning_rate": 7.257971589764123e-06, "loss": 1.0261, "step": 258790 }, { "epoch": 1.6533994352375965, "grad_norm": 1.1804286241531372, "learning_rate": 7.255368174692767e-06, "loss": 0.7504, "step": 258800 }, { "epoch": 1.653463322387335, "grad_norm": 0.7149761915206909, "learning_rate": 7.252765190096822e-06, "loss": 0.8265, "step": 258810 }, { "epoch": 1.6535272095370737, "grad_norm": 1.0930120944976807, "learning_rate": 7.250162636002528e-06, "loss": 1.0783, "step": 258820 }, { "epoch": 1.6535910966868124, "grad_norm": 0.8766050934791565, "learning_rate": 7.247560512436063e-06, "loss": 0.8981, "step": 258830 }, { "epoch": 1.653654983836551, "grad_norm": 0.5357649326324463, "learning_rate": 7.244958819423664e-06, "loss": 0.7771, "step": 258840 }, { "epoch": 1.6537188709862898, "grad_norm": 0.979239284992218, "learning_rate": 7.242357556991508e-06, "loss": 0.8438, "step": 258850 }, { "epoch": 1.6537827581360285, "grad_norm": 0.8525903224945068, "learning_rate": 7.2397567251658086e-06, "loss": 0.8439, "step": 258860 }, { "epoch": 1.6538466452857672, "grad_norm": 0.9817124605178833, "learning_rate": 7.237156323972744e-06, "loss": 0.6514, "step": 258870 }, { "epoch": 1.653910532435506, "grad_norm": 1.5313221216201782, "learning_rate": 7.2345563534385095e-06, "loss": 0.6729, "step": 258880 }, { "epoch": 1.6539744195852446, "grad_norm": 0.5732386112213135, "learning_rate": 7.231956813589302e-06, "loss": 0.9224, "step": 258890 }, { "epoch": 1.6540383067349833, "grad_norm": 1.3414524793624878, "learning_rate": 7.2293577044512785e-06, "loss": 0.8311, "step": 258900 }, { "epoch": 1.654102193884722, "grad_norm": 0.8082197308540344, "learning_rate": 7.226759026050633e-06, "loss": 0.9462, "step": 258910 }, { "epoch": 1.6541660810344607, "grad_norm": 0.9954521656036377, "learning_rate": 7.224160778413519e-06, "loss": 0.9759, "step": 258920 }, { "epoch": 1.6542299681841994, "grad_norm": 1.4728593826293945, "learning_rate": 7.221562961566125e-06, "loss": 0.7694, "step": 258930 }, { "epoch": 1.6542938553339381, "grad_norm": 1.2984557151794434, "learning_rate": 7.218965575534597e-06, "loss": 0.9735, "step": 258940 }, { "epoch": 1.6543577424836768, "grad_norm": 1.1309173107147217, "learning_rate": 7.216368620345098e-06, "loss": 0.8834, "step": 258950 }, { "epoch": 1.6544216296334155, "grad_norm": 1.143385648727417, "learning_rate": 7.2137720960237685e-06, "loss": 0.8156, "step": 258960 }, { "epoch": 1.6544855167831543, "grad_norm": 0.8312119245529175, "learning_rate": 7.211176002596787e-06, "loss": 0.8482, "step": 258970 }, { "epoch": 1.654549403932893, "grad_norm": 1.2088533639907837, "learning_rate": 7.20858034009026e-06, "loss": 0.8226, "step": 258980 }, { "epoch": 1.6546132910826317, "grad_norm": 0.682985246181488, "learning_rate": 7.205985108530355e-06, "loss": 0.8264, "step": 258990 }, { "epoch": 1.6546771782323704, "grad_norm": 0.8336872458457947, "learning_rate": 7.2033903079432085e-06, "loss": 1.1374, "step": 259000 }, { "epoch": 1.654741065382109, "grad_norm": 1.0589526891708374, "learning_rate": 7.200795938354937e-06, "loss": 0.9523, "step": 259010 }, { "epoch": 1.6548049525318478, "grad_norm": 1.032314658164978, "learning_rate": 7.198201999791687e-06, "loss": 1.2553, "step": 259020 }, { "epoch": 1.6548688396815865, "grad_norm": 1.1617461442947388, "learning_rate": 7.195608492279565e-06, "loss": 1.1661, "step": 259030 }, { "epoch": 1.6549327268313252, "grad_norm": 0.8610371947288513, "learning_rate": 7.193015415844706e-06, "loss": 0.8, "step": 259040 }, { "epoch": 1.654996613981064, "grad_norm": 0.7629457712173462, "learning_rate": 7.1904227705131995e-06, "loss": 0.9533, "step": 259050 }, { "epoch": 1.6550605011308024, "grad_norm": 0.7997171878814697, "learning_rate": 7.187830556311187e-06, "loss": 0.844, "step": 259060 }, { "epoch": 1.6551243882805413, "grad_norm": 0.6464492678642273, "learning_rate": 7.185238773264746e-06, "loss": 0.8245, "step": 259070 }, { "epoch": 1.6551882754302798, "grad_norm": 1.1188616752624512, "learning_rate": 7.182647421400002e-06, "loss": 0.9905, "step": 259080 }, { "epoch": 1.6552521625800187, "grad_norm": 0.8652756214141846, "learning_rate": 7.180056500743032e-06, "loss": 0.9505, "step": 259090 }, { "epoch": 1.6553160497297572, "grad_norm": 0.8543888330459595, "learning_rate": 7.177466011319945e-06, "loss": 1.05, "step": 259100 }, { "epoch": 1.6553799368794961, "grad_norm": 1.304616093635559, "learning_rate": 7.174875953156812e-06, "loss": 0.9993, "step": 259110 }, { "epoch": 1.6554438240292346, "grad_norm": 0.7675788998603821, "learning_rate": 7.1722863262797365e-06, "loss": 0.9806, "step": 259120 }, { "epoch": 1.6555077111789736, "grad_norm": 1.1626311540603638, "learning_rate": 7.169697130714781e-06, "loss": 0.8677, "step": 259130 }, { "epoch": 1.655571598328712, "grad_norm": 3.422379970550537, "learning_rate": 7.16710836648804e-06, "loss": 0.7833, "step": 259140 }, { "epoch": 1.655635485478451, "grad_norm": 1.048319935798645, "learning_rate": 7.16452003362556e-06, "loss": 0.8893, "step": 259150 }, { "epoch": 1.6556993726281894, "grad_norm": 0.8422994613647461, "learning_rate": 7.161932132153432e-06, "loss": 0.9099, "step": 259160 }, { "epoch": 1.6557632597779284, "grad_norm": 0.6454704403877258, "learning_rate": 7.1593446620977e-06, "loss": 0.7903, "step": 259170 }, { "epoch": 1.6558271469276669, "grad_norm": 0.8068992495536804, "learning_rate": 7.156757623484433e-06, "loss": 0.8284, "step": 259180 }, { "epoch": 1.6558910340774058, "grad_norm": 1.7042864561080933, "learning_rate": 7.154171016339678e-06, "loss": 1.1194, "step": 259190 }, { "epoch": 1.6559549212271443, "grad_norm": 0.6754870414733887, "learning_rate": 7.151584840689485e-06, "loss": 0.8066, "step": 259200 }, { "epoch": 1.6560188083768832, "grad_norm": 0.5614364147186279, "learning_rate": 7.1489990965599165e-06, "loss": 0.6847, "step": 259210 }, { "epoch": 1.6560826955266217, "grad_norm": 0.9404881000518799, "learning_rate": 7.146413783976979e-06, "loss": 1.162, "step": 259220 }, { "epoch": 1.6561465826763606, "grad_norm": 0.7669598460197449, "learning_rate": 7.1438289029667485e-06, "loss": 0.7352, "step": 259230 }, { "epoch": 1.656210469826099, "grad_norm": 0.7307997345924377, "learning_rate": 7.141244453555224e-06, "loss": 0.8912, "step": 259240 }, { "epoch": 1.656274356975838, "grad_norm": 0.7206220626831055, "learning_rate": 7.138660435768452e-06, "loss": 1.2435, "step": 259250 }, { "epoch": 1.6563382441255765, "grad_norm": 0.9848304986953735, "learning_rate": 7.136076849632445e-06, "loss": 0.9284, "step": 259260 }, { "epoch": 1.6564021312753154, "grad_norm": 0.5905624628067017, "learning_rate": 7.133493695173233e-06, "loss": 0.8219, "step": 259270 }, { "epoch": 1.656466018425054, "grad_norm": 0.7588204145431519, "learning_rate": 7.130910972416816e-06, "loss": 0.7849, "step": 259280 }, { "epoch": 1.6565299055747928, "grad_norm": 0.9828800559043884, "learning_rate": 7.128328681389224e-06, "loss": 0.5819, "step": 259290 }, { "epoch": 1.6565937927245313, "grad_norm": 1.427322268486023, "learning_rate": 7.125746822116436e-06, "loss": 0.7216, "step": 259300 }, { "epoch": 1.65665767987427, "grad_norm": 0.6109748482704163, "learning_rate": 7.123165394624481e-06, "loss": 0.9879, "step": 259310 }, { "epoch": 1.6567215670240087, "grad_norm": 1.3023579120635986, "learning_rate": 7.12058439893934e-06, "loss": 0.9592, "step": 259320 }, { "epoch": 1.6567854541737475, "grad_norm": 1.2052748203277588, "learning_rate": 7.118003835087011e-06, "loss": 1.0689, "step": 259330 }, { "epoch": 1.6568493413234862, "grad_norm": 2.0568909645080566, "learning_rate": 7.115423703093477e-06, "loss": 0.8332, "step": 259340 }, { "epoch": 1.6569132284732249, "grad_norm": 0.9474199414253235, "learning_rate": 7.112844002984737e-06, "loss": 0.5583, "step": 259350 }, { "epoch": 1.6569771156229636, "grad_norm": 0.8970088958740234, "learning_rate": 7.110264734786748e-06, "loss": 0.8069, "step": 259360 }, { "epoch": 1.6570410027727023, "grad_norm": 0.5539265871047974, "learning_rate": 7.107685898525512e-06, "loss": 0.8496, "step": 259370 }, { "epoch": 1.657104889922441, "grad_norm": 0.9140658378601074, "learning_rate": 7.10510749422697e-06, "loss": 0.9984, "step": 259380 }, { "epoch": 1.6571687770721797, "grad_norm": 2.285615921020508, "learning_rate": 7.102529521917123e-06, "loss": 0.8285, "step": 259390 }, { "epoch": 1.6572326642219184, "grad_norm": 0.8295195698738098, "learning_rate": 7.099951981621899e-06, "loss": 0.6997, "step": 259400 }, { "epoch": 1.657296551371657, "grad_norm": 0.6087706685066223, "learning_rate": 7.097374873367274e-06, "loss": 0.7951, "step": 259410 }, { "epoch": 1.6573604385213958, "grad_norm": 1.149000644683838, "learning_rate": 7.094798197179209e-06, "loss": 1.0521, "step": 259420 }, { "epoch": 1.6574243256711345, "grad_norm": 0.9276943206787109, "learning_rate": 7.0922219530836495e-06, "loss": 1.0712, "step": 259430 }, { "epoch": 1.6574882128208732, "grad_norm": 0.8571450114250183, "learning_rate": 7.089646141106532e-06, "loss": 1.1182, "step": 259440 }, { "epoch": 1.657552099970612, "grad_norm": 0.8675596117973328, "learning_rate": 7.0870707612737894e-06, "loss": 0.8654, "step": 259450 }, { "epoch": 1.6576159871203506, "grad_norm": 0.7856946587562561, "learning_rate": 7.08449581361138e-06, "loss": 0.8388, "step": 259460 }, { "epoch": 1.6576798742700893, "grad_norm": 1.1640080213546753, "learning_rate": 7.081921298145217e-06, "loss": 0.7823, "step": 259470 }, { "epoch": 1.657743761419828, "grad_norm": 1.505099892616272, "learning_rate": 7.079347214901244e-06, "loss": 0.8802, "step": 259480 }, { "epoch": 1.6578076485695667, "grad_norm": 1.1108448505401611, "learning_rate": 7.076773563905364e-06, "loss": 0.9317, "step": 259490 }, { "epoch": 1.6578715357193055, "grad_norm": 0.66489177942276, "learning_rate": 7.074200345183518e-06, "loss": 1.0849, "step": 259500 }, { "epoch": 1.6579354228690442, "grad_norm": 2.0368494987487793, "learning_rate": 7.071627558761607e-06, "loss": 1.0492, "step": 259510 }, { "epoch": 1.6579993100187829, "grad_norm": 0.9006597399711609, "learning_rate": 7.069055204665537e-06, "loss": 1.1099, "step": 259520 }, { "epoch": 1.6580631971685216, "grad_norm": 1.1404061317443848, "learning_rate": 7.066483282921238e-06, "loss": 1.2387, "step": 259530 }, { "epoch": 1.6581270843182603, "grad_norm": 0.8864837884902954, "learning_rate": 7.063911793554584e-06, "loss": 0.9816, "step": 259540 }, { "epoch": 1.6581909714679988, "grad_norm": 1.1338231563568115, "learning_rate": 7.061340736591487e-06, "loss": 0.8627, "step": 259550 }, { "epoch": 1.6582548586177377, "grad_norm": 1.0094668865203857, "learning_rate": 7.058770112057833e-06, "loss": 0.8275, "step": 259560 }, { "epoch": 1.6583187457674762, "grad_norm": 0.8408057689666748, "learning_rate": 7.056199919979522e-06, "loss": 1.094, "step": 259570 }, { "epoch": 1.658382632917215, "grad_norm": 0.8108777403831482, "learning_rate": 7.053630160382418e-06, "loss": 0.9879, "step": 259580 }, { "epoch": 1.6584465200669536, "grad_norm": 0.8305297493934631, "learning_rate": 7.051060833292422e-06, "loss": 0.7306, "step": 259590 }, { "epoch": 1.6585104072166925, "grad_norm": 1.6006922721862793, "learning_rate": 7.048491938735391e-06, "loss": 0.7892, "step": 259600 }, { "epoch": 1.658574294366431, "grad_norm": 0.9414778351783752, "learning_rate": 7.045923476737215e-06, "loss": 0.8955, "step": 259610 }, { "epoch": 1.65863818151617, "grad_norm": 0.5770961046218872, "learning_rate": 7.043355447323735e-06, "loss": 0.9527, "step": 259620 }, { "epoch": 1.6587020686659084, "grad_norm": 1.4286727905273438, "learning_rate": 7.040787850520847e-06, "loss": 0.837, "step": 259630 }, { "epoch": 1.6587659558156473, "grad_norm": 0.8571046590805054, "learning_rate": 7.0382206863543745e-06, "loss": 1.1439, "step": 259640 }, { "epoch": 1.6588298429653858, "grad_norm": 0.9641390442848206, "learning_rate": 7.0356539548501965e-06, "loss": 0.7242, "step": 259650 }, { "epoch": 1.6588937301151248, "grad_norm": 0.7776396870613098, "learning_rate": 7.033087656034143e-06, "loss": 0.9499, "step": 259660 }, { "epoch": 1.6589576172648632, "grad_norm": 1.0999819040298462, "learning_rate": 7.030521789932082e-06, "loss": 0.9728, "step": 259670 }, { "epoch": 1.6590215044146022, "grad_norm": 0.7827914357185364, "learning_rate": 7.027956356569831e-06, "loss": 0.8949, "step": 259680 }, { "epoch": 1.6590853915643407, "grad_norm": 1.1689743995666504, "learning_rate": 7.025391355973243e-06, "loss": 0.8803, "step": 259690 }, { "epoch": 1.6591492787140796, "grad_norm": 0.9321467876434326, "learning_rate": 7.022826788168129e-06, "loss": 1.0456, "step": 259700 }, { "epoch": 1.659213165863818, "grad_norm": 2.064208984375, "learning_rate": 7.020262653180343e-06, "loss": 0.8112, "step": 259710 }, { "epoch": 1.659277053013557, "grad_norm": 1.093505620956421, "learning_rate": 7.017698951035684e-06, "loss": 0.7237, "step": 259720 }, { "epoch": 1.6593409401632955, "grad_norm": 0.7893190383911133, "learning_rate": 7.015135681759983e-06, "loss": 0.7623, "step": 259730 }, { "epoch": 1.6594048273130344, "grad_norm": 0.39939016103744507, "learning_rate": 7.012572845379062e-06, "loss": 1.0163, "step": 259740 }, { "epoch": 1.6594687144627729, "grad_norm": 0.834723711013794, "learning_rate": 7.010010441918713e-06, "loss": 0.8614, "step": 259750 }, { "epoch": 1.6595326016125118, "grad_norm": 1.1181209087371826, "learning_rate": 7.007448471404759e-06, "loss": 0.8843, "step": 259760 }, { "epoch": 1.6595964887622503, "grad_norm": 2.0455477237701416, "learning_rate": 7.004886933862986e-06, "loss": 1.0729, "step": 259770 }, { "epoch": 1.6596603759119892, "grad_norm": 1.9647599458694458, "learning_rate": 7.002325829319206e-06, "loss": 0.8421, "step": 259780 }, { "epoch": 1.6597242630617277, "grad_norm": 0.7878594994544983, "learning_rate": 6.9997651577991875e-06, "loss": 0.7208, "step": 259790 }, { "epoch": 1.6597881502114664, "grad_norm": 1.202373743057251, "learning_rate": 6.997204919328753e-06, "loss": 0.7398, "step": 259800 }, { "epoch": 1.6598520373612051, "grad_norm": 1.152909517288208, "learning_rate": 6.994645113933651e-06, "loss": 0.7517, "step": 259810 }, { "epoch": 1.6599159245109438, "grad_norm": 0.6811971664428711, "learning_rate": 6.992085741639692e-06, "loss": 0.8073, "step": 259820 }, { "epoch": 1.6599798116606825, "grad_norm": 1.4438114166259766, "learning_rate": 6.989526802472623e-06, "loss": 1.0148, "step": 259830 }, { "epoch": 1.6600436988104212, "grad_norm": 0.7701313495635986, "learning_rate": 6.986968296458241e-06, "loss": 0.886, "step": 259840 }, { "epoch": 1.66010758596016, "grad_norm": 0.7953954935073853, "learning_rate": 6.98441022362229e-06, "loss": 0.7475, "step": 259850 }, { "epoch": 1.6601714731098987, "grad_norm": 0.9012263417243958, "learning_rate": 6.9818525839905555e-06, "loss": 0.749, "step": 259860 }, { "epoch": 1.6602353602596374, "grad_norm": 0.8417377471923828, "learning_rate": 6.979295377588762e-06, "loss": 0.6071, "step": 259870 }, { "epoch": 1.660299247409376, "grad_norm": 0.8795028924942017, "learning_rate": 6.9767386044427e-06, "loss": 0.7728, "step": 259880 }, { "epoch": 1.6603631345591148, "grad_norm": 0.8333396911621094, "learning_rate": 6.974182264578088e-06, "loss": 0.9647, "step": 259890 }, { "epoch": 1.6604270217088535, "grad_norm": 0.7249652147293091, "learning_rate": 6.971626358020694e-06, "loss": 0.8005, "step": 259900 }, { "epoch": 1.6604909088585922, "grad_norm": 1.110545039176941, "learning_rate": 6.969070884796247e-06, "loss": 0.9663, "step": 259910 }, { "epoch": 1.660554796008331, "grad_norm": 0.9945274591445923, "learning_rate": 6.966515844930471e-06, "loss": 0.9558, "step": 259920 }, { "epoch": 1.6606186831580696, "grad_norm": 0.9578294157981873, "learning_rate": 6.963961238449124e-06, "loss": 0.7574, "step": 259930 }, { "epoch": 1.6606825703078083, "grad_norm": 1.060130000114441, "learning_rate": 6.961407065377906e-06, "loss": 1.1417, "step": 259940 }, { "epoch": 1.660746457457547, "grad_norm": 0.7615020275115967, "learning_rate": 6.9588533257425645e-06, "loss": 0.8906, "step": 259950 }, { "epoch": 1.6608103446072857, "grad_norm": 0.857650876045227, "learning_rate": 6.956300019568795e-06, "loss": 0.9114, "step": 259960 }, { "epoch": 1.6608742317570244, "grad_norm": 1.2820098400115967, "learning_rate": 6.953747146882328e-06, "loss": 0.9001, "step": 259970 }, { "epoch": 1.6609381189067631, "grad_norm": 1.1419647932052612, "learning_rate": 6.951194707708863e-06, "loss": 0.9179, "step": 259980 }, { "epoch": 1.6610020060565018, "grad_norm": 1.4320467710494995, "learning_rate": 6.94864270207412e-06, "loss": 0.9435, "step": 259990 }, { "epoch": 1.6610658932062405, "grad_norm": 1.4276831150054932, "learning_rate": 6.946091130003779e-06, "loss": 0.6951, "step": 260000 }, { "epoch": 1.6611297803559792, "grad_norm": 1.3144681453704834, "learning_rate": 6.9435399915235566e-06, "loss": 0.8405, "step": 260010 }, { "epoch": 1.661193667505718, "grad_norm": 0.8671199083328247, "learning_rate": 6.940989286659122e-06, "loss": 0.8414, "step": 260020 }, { "epoch": 1.6612575546554567, "grad_norm": 0.8773459792137146, "learning_rate": 6.938439015436193e-06, "loss": 0.9126, "step": 260030 }, { "epoch": 1.6613214418051951, "grad_norm": 1.2610617876052856, "learning_rate": 6.935889177880422e-06, "loss": 1.253, "step": 260040 }, { "epoch": 1.661385328954934, "grad_norm": 0.6040871143341064, "learning_rate": 6.9333397740175055e-06, "loss": 0.8628, "step": 260050 }, { "epoch": 1.6614492161046726, "grad_norm": 0.6715024709701538, "learning_rate": 6.930790803873122e-06, "loss": 0.6639, "step": 260060 }, { "epoch": 1.6615131032544115, "grad_norm": 0.5819202065467834, "learning_rate": 6.928242267472928e-06, "loss": 0.9687, "step": 260070 }, { "epoch": 1.66157699040415, "grad_norm": 1.6079118251800537, "learning_rate": 6.9256941648426055e-06, "loss": 0.8493, "step": 260080 }, { "epoch": 1.661640877553889, "grad_norm": 0.8289951086044312, "learning_rate": 6.9231464960077986e-06, "loss": 1.1034, "step": 260090 }, { "epoch": 1.6617047647036274, "grad_norm": 0.7515999674797058, "learning_rate": 6.920599260994182e-06, "loss": 1.0362, "step": 260100 }, { "epoch": 1.6617686518533663, "grad_norm": 0.8849010467529297, "learning_rate": 6.91805245982739e-06, "loss": 0.6695, "step": 260110 }, { "epoch": 1.6618325390031048, "grad_norm": 1.1140575408935547, "learning_rate": 6.915506092533092e-06, "loss": 0.8597, "step": 260120 }, { "epoch": 1.6618964261528437, "grad_norm": 1.2413811683654785, "learning_rate": 6.912960159136911e-06, "loss": 0.9088, "step": 260130 }, { "epoch": 1.6619603133025822, "grad_norm": 0.6170461177825928, "learning_rate": 6.910414659664505e-06, "loss": 1.1817, "step": 260140 }, { "epoch": 1.6620242004523211, "grad_norm": 0.8800461888313293, "learning_rate": 6.907869594141492e-06, "loss": 0.7515, "step": 260150 }, { "epoch": 1.6620880876020596, "grad_norm": 2.8232409954071045, "learning_rate": 6.905324962593523e-06, "loss": 0.6154, "step": 260160 }, { "epoch": 1.6621519747517985, "grad_norm": 1.2982561588287354, "learning_rate": 6.902780765046202e-06, "loss": 0.8484, "step": 260170 }, { "epoch": 1.662215861901537, "grad_norm": 1.3580363988876343, "learning_rate": 6.9002370015251785e-06, "loss": 0.7412, "step": 260180 }, { "epoch": 1.662279749051276, "grad_norm": 1.23239004611969, "learning_rate": 6.897693672056038e-06, "loss": 0.8696, "step": 260190 }, { "epoch": 1.6623436362010144, "grad_norm": 1.3502616882324219, "learning_rate": 6.895150776664428e-06, "loss": 0.8322, "step": 260200 }, { "epoch": 1.6624075233507534, "grad_norm": 2.1571125984191895, "learning_rate": 6.8926083153759256e-06, "loss": 0.8226, "step": 260210 }, { "epoch": 1.6624714105004919, "grad_norm": 1.4189776182174683, "learning_rate": 6.890066288216163e-06, "loss": 0.7431, "step": 260220 }, { "epoch": 1.6625352976502308, "grad_norm": 0.8198552131652832, "learning_rate": 6.887524695210712e-06, "loss": 1.0104, "step": 260230 }, { "epoch": 1.6625991847999693, "grad_norm": 2.3322837352752686, "learning_rate": 6.884983536385203e-06, "loss": 1.0087, "step": 260240 }, { "epoch": 1.6626630719497082, "grad_norm": 1.4671247005462646, "learning_rate": 6.882442811765194e-06, "loss": 0.9418, "step": 260250 }, { "epoch": 1.6627269590994467, "grad_norm": 1.3341039419174194, "learning_rate": 6.879902521376291e-06, "loss": 0.8353, "step": 260260 }, { "epoch": 1.6627908462491854, "grad_norm": 0.7063724994659424, "learning_rate": 6.877362665244086e-06, "loss": 1.0032, "step": 260270 }, { "epoch": 1.662854733398924, "grad_norm": 0.9785807132720947, "learning_rate": 6.87482324339413e-06, "loss": 0.7307, "step": 260280 }, { "epoch": 1.6629186205486628, "grad_norm": 0.6390813589096069, "learning_rate": 6.872284255852024e-06, "loss": 0.7543, "step": 260290 }, { "epoch": 1.6629825076984015, "grad_norm": 0.8422411680221558, "learning_rate": 6.869745702643321e-06, "loss": 0.9207, "step": 260300 }, { "epoch": 1.6630463948481402, "grad_norm": 0.996957540512085, "learning_rate": 6.867207583793595e-06, "loss": 0.7347, "step": 260310 }, { "epoch": 1.663110281997879, "grad_norm": 1.3174203634262085, "learning_rate": 6.864669899328396e-06, "loss": 0.8377, "step": 260320 }, { "epoch": 1.6631741691476176, "grad_norm": 1.2684632539749146, "learning_rate": 6.8621326492733e-06, "loss": 0.8836, "step": 260330 }, { "epoch": 1.6632380562973563, "grad_norm": 1.1714234352111816, "learning_rate": 6.8595958336538366e-06, "loss": 1.0765, "step": 260340 }, { "epoch": 1.663301943447095, "grad_norm": 0.5843120813369751, "learning_rate": 6.857059452495579e-06, "loss": 0.8196, "step": 260350 }, { "epoch": 1.6633658305968337, "grad_norm": 2.4564054012298584, "learning_rate": 6.8545235058240455e-06, "loss": 0.8696, "step": 260360 }, { "epoch": 1.6634297177465724, "grad_norm": 1.0437301397323608, "learning_rate": 6.8519879936647936e-06, "loss": 1.1825, "step": 260370 }, { "epoch": 1.6634936048963112, "grad_norm": 1.1650587320327759, "learning_rate": 6.849452916043342e-06, "loss": 0.9162, "step": 260380 }, { "epoch": 1.6635574920460499, "grad_norm": 0.844201922416687, "learning_rate": 6.846918272985236e-06, "loss": 0.811, "step": 260390 }, { "epoch": 1.6636213791957886, "grad_norm": 0.9441670179367065, "learning_rate": 6.844384064516002e-06, "loss": 0.835, "step": 260400 }, { "epoch": 1.6636852663455273, "grad_norm": 0.881705641746521, "learning_rate": 6.841850290661145e-06, "loss": 0.8547, "step": 260410 }, { "epoch": 1.663749153495266, "grad_norm": 1.6114307641983032, "learning_rate": 6.839316951446201e-06, "loss": 0.9447, "step": 260420 }, { "epoch": 1.6638130406450047, "grad_norm": 1.4205198287963867, "learning_rate": 6.83678404689666e-06, "loss": 1.0585, "step": 260430 }, { "epoch": 1.6638769277947434, "grad_norm": 1.1777595281600952, "learning_rate": 6.834251577038059e-06, "loss": 0.9045, "step": 260440 }, { "epoch": 1.663940814944482, "grad_norm": 0.6923563480377197, "learning_rate": 6.831719541895881e-06, "loss": 0.5997, "step": 260450 }, { "epoch": 1.6640047020942208, "grad_norm": 1.00764000415802, "learning_rate": 6.8291879414956404e-06, "loss": 0.8307, "step": 260460 }, { "epoch": 1.6640685892439595, "grad_norm": 0.7519059777259827, "learning_rate": 6.826656775862816e-06, "loss": 0.8285, "step": 260470 }, { "epoch": 1.6641324763936982, "grad_norm": 1.0380022525787354, "learning_rate": 6.8241260450229164e-06, "loss": 0.999, "step": 260480 }, { "epoch": 1.664196363543437, "grad_norm": 1.5414055585861206, "learning_rate": 6.8215957490014135e-06, "loss": 1.0058, "step": 260490 }, { "epoch": 1.6642602506931756, "grad_norm": 1.0284233093261719, "learning_rate": 6.8190658878238e-06, "loss": 0.7669, "step": 260500 }, { "epoch": 1.6643241378429143, "grad_norm": 0.8977694511413574, "learning_rate": 6.816536461515544e-06, "loss": 0.891, "step": 260510 }, { "epoch": 1.664388024992653, "grad_norm": 0.6301488280296326, "learning_rate": 6.814007470102135e-06, "loss": 0.8527, "step": 260520 }, { "epoch": 1.6644519121423915, "grad_norm": 0.8851107358932495, "learning_rate": 6.811478913609021e-06, "loss": 0.9336, "step": 260530 }, { "epoch": 1.6645157992921304, "grad_norm": 2.0833637714385986, "learning_rate": 6.808950792061692e-06, "loss": 0.9544, "step": 260540 }, { "epoch": 1.664579686441869, "grad_norm": 1.2819724082946777, "learning_rate": 6.806423105485577e-06, "loss": 0.9766, "step": 260550 }, { "epoch": 1.6646435735916079, "grad_norm": 0.8446447253227234, "learning_rate": 6.803895853906161e-06, "loss": 0.7945, "step": 260560 }, { "epoch": 1.6647074607413463, "grad_norm": 1.1862691640853882, "learning_rate": 6.801369037348876e-06, "loss": 0.8252, "step": 260570 }, { "epoch": 1.6647713478910853, "grad_norm": 0.799518883228302, "learning_rate": 6.798842655839177e-06, "loss": 0.7502, "step": 260580 }, { "epoch": 1.6648352350408238, "grad_norm": 0.642816424369812, "learning_rate": 6.796316709402517e-06, "loss": 0.6272, "step": 260590 }, { "epoch": 1.6648991221905627, "grad_norm": 0.8209549188613892, "learning_rate": 6.793791198064309e-06, "loss": 0.5957, "step": 260600 }, { "epoch": 1.6649630093403012, "grad_norm": 0.8683721423149109, "learning_rate": 6.79126612185002e-06, "loss": 0.7731, "step": 260610 }, { "epoch": 1.66502689649004, "grad_norm": 0.5981495380401611, "learning_rate": 6.788741480785049e-06, "loss": 0.9643, "step": 260620 }, { "epoch": 1.6650907836397786, "grad_norm": 0.8800461292266846, "learning_rate": 6.7862172748948405e-06, "loss": 0.9372, "step": 260630 }, { "epoch": 1.6651546707895175, "grad_norm": 2.0129380226135254, "learning_rate": 6.7836935042048025e-06, "loss": 0.8178, "step": 260640 }, { "epoch": 1.665218557939256, "grad_norm": 2.3390965461730957, "learning_rate": 6.781170168740369e-06, "loss": 0.826, "step": 260650 }, { "epoch": 1.665282445088995, "grad_norm": 0.812234103679657, "learning_rate": 6.778647268526933e-06, "loss": 0.8003, "step": 260660 }, { "epoch": 1.6653463322387334, "grad_norm": 1.0095510482788086, "learning_rate": 6.7761248035899175e-06, "loss": 0.9685, "step": 260670 }, { "epoch": 1.6654102193884723, "grad_norm": 0.8833625316619873, "learning_rate": 6.7736027739547125e-06, "loss": 0.8384, "step": 260680 }, { "epoch": 1.6654741065382108, "grad_norm": 1.3582264184951782, "learning_rate": 6.7710811796467265e-06, "loss": 1.0906, "step": 260690 }, { "epoch": 1.6655379936879497, "grad_norm": 0.8271639347076416, "learning_rate": 6.76856002069135e-06, "loss": 0.6488, "step": 260700 }, { "epoch": 1.6656018808376882, "grad_norm": 0.9180144667625427, "learning_rate": 6.76603929711398e-06, "loss": 1.0439, "step": 260710 }, { "epoch": 1.6656657679874272, "grad_norm": 1.2316609621047974, "learning_rate": 6.763519008939989e-06, "loss": 0.8008, "step": 260720 }, { "epoch": 1.6657296551371656, "grad_norm": 0.8312288522720337, "learning_rate": 6.760999156194775e-06, "loss": 0.6873, "step": 260730 }, { "epoch": 1.6657935422869046, "grad_norm": 0.6729956269264221, "learning_rate": 6.758479738903695e-06, "loss": 0.7072, "step": 260740 }, { "epoch": 1.665857429436643, "grad_norm": 0.7326833605766296, "learning_rate": 6.7559607570921445e-06, "loss": 1.0599, "step": 260750 }, { "epoch": 1.6659213165863818, "grad_norm": 0.9062512516975403, "learning_rate": 6.7534422107854715e-06, "loss": 0.8788, "step": 260760 }, { "epoch": 1.6659852037361205, "grad_norm": 1.1865432262420654, "learning_rate": 6.75092410000906e-06, "loss": 1.0603, "step": 260770 }, { "epoch": 1.6660490908858592, "grad_norm": 1.0734955072402954, "learning_rate": 6.748406424788251e-06, "loss": 0.8722, "step": 260780 }, { "epoch": 1.6661129780355979, "grad_norm": 1.5629147291183472, "learning_rate": 6.745889185148402e-06, "loss": 0.7382, "step": 260790 }, { "epoch": 1.6661768651853366, "grad_norm": 0.8913010954856873, "learning_rate": 6.7433723811148816e-06, "loss": 0.7195, "step": 260800 }, { "epoch": 1.6662407523350753, "grad_norm": 0.9528048038482666, "learning_rate": 6.74085601271301e-06, "loss": 0.8426, "step": 260810 }, { "epoch": 1.666304639484814, "grad_norm": 0.8141299486160278, "learning_rate": 6.738340079968158e-06, "loss": 1.0155, "step": 260820 }, { "epoch": 1.6663685266345527, "grad_norm": 0.9365982413291931, "learning_rate": 6.735824582905636e-06, "loss": 0.9692, "step": 260830 }, { "epoch": 1.6664324137842914, "grad_norm": 1.3122490644454956, "learning_rate": 6.7333095215507956e-06, "loss": 0.9352, "step": 260840 }, { "epoch": 1.6664963009340301, "grad_norm": 0.897006094455719, "learning_rate": 6.730794895928954e-06, "loss": 0.9062, "step": 260850 }, { "epoch": 1.6665601880837688, "grad_norm": 1.195637822151184, "learning_rate": 6.728280706065448e-06, "loss": 0.6769, "step": 260860 }, { "epoch": 1.6666240752335075, "grad_norm": 0.8795990943908691, "learning_rate": 6.725766951985585e-06, "loss": 0.8848, "step": 260870 }, { "epoch": 1.6666879623832462, "grad_norm": 1.1648136377334595, "learning_rate": 6.723253633714694e-06, "loss": 0.907, "step": 260880 }, { "epoch": 1.666751849532985, "grad_norm": 1.356160283088684, "learning_rate": 6.720740751278082e-06, "loss": 0.8372, "step": 260890 }, { "epoch": 1.6668157366827236, "grad_norm": 1.5002801418304443, "learning_rate": 6.7182283047010355e-06, "loss": 0.9494, "step": 260900 }, { "epoch": 1.6668796238324624, "grad_norm": 0.8991245627403259, "learning_rate": 6.715716294008889e-06, "loss": 0.7429, "step": 260910 }, { "epoch": 1.666943510982201, "grad_norm": 0.7147846221923828, "learning_rate": 6.713204719226912e-06, "loss": 1.0831, "step": 260920 }, { "epoch": 1.6670073981319398, "grad_norm": 1.274672508239746, "learning_rate": 6.710693580380423e-06, "loss": 1.1, "step": 260930 }, { "epoch": 1.6670712852816785, "grad_norm": 1.1722091436386108, "learning_rate": 6.70818287749469e-06, "loss": 1.0835, "step": 260940 }, { "epoch": 1.6671351724314172, "grad_norm": 0.9067403674125671, "learning_rate": 6.705672610595021e-06, "loss": 0.7682, "step": 260950 }, { "epoch": 1.6671990595811559, "grad_norm": 1.082120656967163, "learning_rate": 6.703162779706673e-06, "loss": 1.2327, "step": 260960 }, { "epoch": 1.6672629467308946, "grad_norm": 1.0921111106872559, "learning_rate": 6.700653384854944e-06, "loss": 0.8915, "step": 260970 }, { "epoch": 1.6673268338806333, "grad_norm": 1.0145679712295532, "learning_rate": 6.698144426065079e-06, "loss": 1.1276, "step": 260980 }, { "epoch": 1.667390721030372, "grad_norm": 0.8104844689369202, "learning_rate": 6.6956359033623795e-06, "loss": 0.7862, "step": 260990 }, { "epoch": 1.6674546081801105, "grad_norm": 1.0032187700271606, "learning_rate": 6.693127816772071e-06, "loss": 0.9296, "step": 261000 }, { "epoch": 1.6675184953298494, "grad_norm": 1.3268076181411743, "learning_rate": 6.6906201663194505e-06, "loss": 0.6579, "step": 261010 }, { "epoch": 1.667582382479588, "grad_norm": 0.8994929194450378, "learning_rate": 6.688112952029735e-06, "loss": 0.8517, "step": 261020 }, { "epoch": 1.6676462696293268, "grad_norm": 1.6113344430923462, "learning_rate": 6.685606173928205e-06, "loss": 0.742, "step": 261030 }, { "epoch": 1.6677101567790653, "grad_norm": 1.1202119588851929, "learning_rate": 6.68309983204008e-06, "loss": 0.6721, "step": 261040 }, { "epoch": 1.6677740439288042, "grad_norm": 1.158851146697998, "learning_rate": 6.680593926390627e-06, "loss": 0.9163, "step": 261050 }, { "epoch": 1.6678379310785427, "grad_norm": 0.9848284125328064, "learning_rate": 6.678088457005061e-06, "loss": 0.8478, "step": 261060 }, { "epoch": 1.6679018182282817, "grad_norm": 2.442725419998169, "learning_rate": 6.675583423908632e-06, "loss": 0.9834, "step": 261070 }, { "epoch": 1.6679657053780201, "grad_norm": 0.6988223195075989, "learning_rate": 6.67307882712655e-06, "loss": 0.7203, "step": 261080 }, { "epoch": 1.668029592527759, "grad_norm": 1.1047319173812866, "learning_rate": 6.670574666684054e-06, "loss": 0.8124, "step": 261090 }, { "epoch": 1.6680934796774975, "grad_norm": 0.8708413243293762, "learning_rate": 6.668070942606352e-06, "loss": 0.9876, "step": 261100 }, { "epoch": 1.6681573668272365, "grad_norm": 1.0053987503051758, "learning_rate": 6.665567654918659e-06, "loss": 0.8756, "step": 261110 }, { "epoch": 1.668221253976975, "grad_norm": 0.6905286908149719, "learning_rate": 6.6630648036462015e-06, "loss": 0.9977, "step": 261120 }, { "epoch": 1.6682851411267139, "grad_norm": 0.7959697246551514, "learning_rate": 6.660562388814162e-06, "loss": 0.8796, "step": 261130 }, { "epoch": 1.6683490282764524, "grad_norm": 0.7797834277153015, "learning_rate": 6.658060410447764e-06, "loss": 0.825, "step": 261140 }, { "epoch": 1.6684129154261913, "grad_norm": 0.5888381600379944, "learning_rate": 6.6555588685721815e-06, "loss": 0.6742, "step": 261150 }, { "epoch": 1.6684768025759298, "grad_norm": 1.089735984802246, "learning_rate": 6.653057763212628e-06, "loss": 1.0176, "step": 261160 }, { "epoch": 1.6685406897256687, "grad_norm": 0.662281334400177, "learning_rate": 6.650557094394278e-06, "loss": 0.7775, "step": 261170 }, { "epoch": 1.6686045768754072, "grad_norm": 0.7887861132621765, "learning_rate": 6.6480568621423265e-06, "loss": 0.6818, "step": 261180 }, { "epoch": 1.6686684640251461, "grad_norm": 1.110487937927246, "learning_rate": 6.645557066481939e-06, "loss": 0.7015, "step": 261190 }, { "epoch": 1.6687323511748846, "grad_norm": 0.8117868304252625, "learning_rate": 6.643057707438311e-06, "loss": 0.9294, "step": 261200 }, { "epoch": 1.6687962383246235, "grad_norm": 1.027706503868103, "learning_rate": 6.640558785036588e-06, "loss": 0.8158, "step": 261210 }, { "epoch": 1.668860125474362, "grad_norm": 1.0780733823776245, "learning_rate": 6.638060299301962e-06, "loss": 0.9441, "step": 261220 }, { "epoch": 1.668924012624101, "grad_norm": 0.6005793213844299, "learning_rate": 6.6355622502595696e-06, "loss": 0.7146, "step": 261230 }, { "epoch": 1.6689878997738394, "grad_norm": 1.7903164625167847, "learning_rate": 6.633064637934594e-06, "loss": 0.7896, "step": 261240 }, { "epoch": 1.6690517869235781, "grad_norm": 0.8301639556884766, "learning_rate": 6.630567462352161e-06, "loss": 0.9835, "step": 261250 }, { "epoch": 1.6691156740733168, "grad_norm": 0.9477317333221436, "learning_rate": 6.628070723537444e-06, "loss": 0.7894, "step": 261260 }, { "epoch": 1.6691795612230556, "grad_norm": 0.8038954734802246, "learning_rate": 6.625574421515568e-06, "loss": 1.0712, "step": 261270 }, { "epoch": 1.6692434483727943, "grad_norm": 0.5543741583824158, "learning_rate": 6.623078556311696e-06, "loss": 1.0573, "step": 261280 }, { "epoch": 1.669307335522533, "grad_norm": 0.5797288417816162, "learning_rate": 6.620583127950936e-06, "loss": 0.7646, "step": 261290 }, { "epoch": 1.6693712226722717, "grad_norm": 0.6953587532043457, "learning_rate": 6.618088136458428e-06, "loss": 0.8131, "step": 261300 }, { "epoch": 1.6694351098220104, "grad_norm": 0.9206896424293518, "learning_rate": 6.615593581859319e-06, "loss": 0.6902, "step": 261310 }, { "epoch": 1.669498996971749, "grad_norm": 0.7176907062530518, "learning_rate": 6.613348856284723e-06, "loss": 0.9896, "step": 261320 }, { "epoch": 1.6695628841214878, "grad_norm": 0.7699728012084961, "learning_rate": 6.61085513185225e-06, "loss": 0.9096, "step": 261330 }, { "epoch": 1.6696267712712265, "grad_norm": 1.0003939867019653, "learning_rate": 6.6083618443859906e-06, "loss": 1.0432, "step": 261340 }, { "epoch": 1.6696906584209652, "grad_norm": 0.6458075642585754, "learning_rate": 6.605868993911074e-06, "loss": 0.8929, "step": 261350 }, { "epoch": 1.669754545570704, "grad_norm": 0.8971933722496033, "learning_rate": 6.603376580452591e-06, "loss": 0.9198, "step": 261360 }, { "epoch": 1.6698184327204426, "grad_norm": 0.899749219417572, "learning_rate": 6.600884604035662e-06, "loss": 1.0874, "step": 261370 }, { "epoch": 1.6698823198701813, "grad_norm": 0.8684645295143127, "learning_rate": 6.598393064685354e-06, "loss": 0.9654, "step": 261380 }, { "epoch": 1.66994620701992, "grad_norm": 1.1220735311508179, "learning_rate": 6.595901962426793e-06, "loss": 0.9396, "step": 261390 }, { "epoch": 1.6700100941696587, "grad_norm": 0.7436129450798035, "learning_rate": 6.593411297285035e-06, "loss": 0.8479, "step": 261400 }, { "epoch": 1.6700739813193974, "grad_norm": 0.8839870095252991, "learning_rate": 6.590921069285188e-06, "loss": 0.8318, "step": 261410 }, { "epoch": 1.6701378684691361, "grad_norm": 1.568926453590393, "learning_rate": 6.588431278452312e-06, "loss": 0.6912, "step": 261420 }, { "epoch": 1.6702017556188749, "grad_norm": 0.7818752527236938, "learning_rate": 6.585941924811484e-06, "loss": 0.8117, "step": 261430 }, { "epoch": 1.6702656427686136, "grad_norm": 0.7253796458244324, "learning_rate": 6.583453008387797e-06, "loss": 0.8255, "step": 261440 }, { "epoch": 1.6703295299183523, "grad_norm": 0.8075445890426636, "learning_rate": 6.580964529206285e-06, "loss": 0.8221, "step": 261450 }, { "epoch": 1.670393417068091, "grad_norm": 1.1005500555038452, "learning_rate": 6.578476487292029e-06, "loss": 0.7249, "step": 261460 }, { "epoch": 1.6704573042178297, "grad_norm": 0.7692040205001831, "learning_rate": 6.575988882670075e-06, "loss": 0.815, "step": 261470 }, { "epoch": 1.6705211913675684, "grad_norm": 1.0020064115524292, "learning_rate": 6.573501715365487e-06, "loss": 0.9109, "step": 261480 }, { "epoch": 1.6705850785173069, "grad_norm": 0.8735017776489258, "learning_rate": 6.571014985403301e-06, "loss": 0.8558, "step": 261490 }, { "epoch": 1.6706489656670458, "grad_norm": 1.1462574005126953, "learning_rate": 6.568528692808568e-06, "loss": 1.2546, "step": 261500 }, { "epoch": 1.6707128528167843, "grad_norm": 0.6212501525878906, "learning_rate": 6.566042837606323e-06, "loss": 0.8025, "step": 261510 }, { "epoch": 1.6707767399665232, "grad_norm": 0.7799587249755859, "learning_rate": 6.563557419821614e-06, "loss": 0.8916, "step": 261520 }, { "epoch": 1.6708406271162617, "grad_norm": 1.1324352025985718, "learning_rate": 6.561072439479443e-06, "loss": 0.7127, "step": 261530 }, { "epoch": 1.6709045142660006, "grad_norm": 0.8433434367179871, "learning_rate": 6.558587896604856e-06, "loss": 0.7443, "step": 261540 }, { "epoch": 1.670968401415739, "grad_norm": 0.7592739462852478, "learning_rate": 6.556103791222879e-06, "loss": 0.7583, "step": 261550 }, { "epoch": 1.671032288565478, "grad_norm": 1.1327989101409912, "learning_rate": 6.553620123358506e-06, "loss": 0.9683, "step": 261560 }, { "epoch": 1.6710961757152165, "grad_norm": 0.8317406177520752, "learning_rate": 6.5511368930367855e-06, "loss": 0.6488, "step": 261570 }, { "epoch": 1.6711600628649554, "grad_norm": 1.7189500331878662, "learning_rate": 6.548654100282686e-06, "loss": 0.9956, "step": 261580 }, { "epoch": 1.671223950014694, "grad_norm": 0.8068856000900269, "learning_rate": 6.546171745121243e-06, "loss": 1.0227, "step": 261590 }, { "epoch": 1.6712878371644329, "grad_norm": 1.6268513202667236, "learning_rate": 6.543689827577432e-06, "loss": 0.8547, "step": 261600 }, { "epoch": 1.6713517243141713, "grad_norm": 1.141613483428955, "learning_rate": 6.541208347676275e-06, "loss": 0.7498, "step": 261610 }, { "epoch": 1.6714156114639103, "grad_norm": 0.9748889207839966, "learning_rate": 6.538727305442732e-06, "loss": 0.9626, "step": 261620 }, { "epoch": 1.6714794986136488, "grad_norm": 1.0086246728897095, "learning_rate": 6.5362467009018154e-06, "loss": 0.9017, "step": 261630 }, { "epoch": 1.6715433857633877, "grad_norm": 1.1621673107147217, "learning_rate": 6.5337665340784835e-06, "loss": 0.6427, "step": 261640 }, { "epoch": 1.6716072729131262, "grad_norm": 0.7008302807807922, "learning_rate": 6.5312868049977434e-06, "loss": 0.7493, "step": 261650 }, { "epoch": 1.671671160062865, "grad_norm": 1.4598950147628784, "learning_rate": 6.528807513684537e-06, "loss": 1.1208, "step": 261660 }, { "epoch": 1.6717350472126036, "grad_norm": 0.8973243832588196, "learning_rate": 6.526328660163855e-06, "loss": 0.9835, "step": 261670 }, { "epoch": 1.6717989343623425, "grad_norm": 1.616610050201416, "learning_rate": 6.523850244460644e-06, "loss": 0.9121, "step": 261680 }, { "epoch": 1.671862821512081, "grad_norm": 0.8376414179801941, "learning_rate": 6.521372266599885e-06, "loss": 0.8662, "step": 261690 }, { "epoch": 1.67192670866182, "grad_norm": 1.0430593490600586, "learning_rate": 6.518894726606511e-06, "loss": 0.813, "step": 261700 }, { "epoch": 1.6719905958115584, "grad_norm": 0.6665481328964233, "learning_rate": 6.516417624505494e-06, "loss": 0.7176, "step": 261710 }, { "epoch": 1.6720544829612973, "grad_norm": 0.7445017695426941, "learning_rate": 6.513940960321757e-06, "loss": 0.8957, "step": 261720 }, { "epoch": 1.6721183701110358, "grad_norm": 1.3266037702560425, "learning_rate": 6.5114647340802695e-06, "loss": 0.823, "step": 261730 }, { "epoch": 1.6721822572607745, "grad_norm": 0.736445963382721, "learning_rate": 6.508988945805944e-06, "loss": 0.9823, "step": 261740 }, { "epoch": 1.6722461444105132, "grad_norm": 0.9925602674484253, "learning_rate": 6.506513595523722e-06, "loss": 1.0593, "step": 261750 }, { "epoch": 1.672310031560252, "grad_norm": 0.9995372295379639, "learning_rate": 6.504038683258551e-06, "loss": 1.0638, "step": 261760 }, { "epoch": 1.6723739187099906, "grad_norm": 0.6971041560173035, "learning_rate": 6.501564209035327e-06, "loss": 1.1433, "step": 261770 }, { "epoch": 1.6724378058597293, "grad_norm": 0.8027698397636414, "learning_rate": 6.499090172878991e-06, "loss": 0.8928, "step": 261780 }, { "epoch": 1.672501693009468, "grad_norm": 0.8169634342193604, "learning_rate": 6.49661657481444e-06, "loss": 0.746, "step": 261790 }, { "epoch": 1.6725655801592068, "grad_norm": 0.7390976548194885, "learning_rate": 6.49414341486661e-06, "loss": 0.9419, "step": 261800 }, { "epoch": 1.6726294673089455, "grad_norm": 1.2797377109527588, "learning_rate": 6.491670693060381e-06, "loss": 1.0843, "step": 261810 }, { "epoch": 1.6726933544586842, "grad_norm": 0.6688075065612793, "learning_rate": 6.489198409420682e-06, "loss": 0.954, "step": 261820 }, { "epoch": 1.6727572416084229, "grad_norm": 0.9320153594017029, "learning_rate": 6.486726563972384e-06, "loss": 0.9742, "step": 261830 }, { "epoch": 1.6728211287581616, "grad_norm": 1.3801181316375732, "learning_rate": 6.4842551567404045e-06, "loss": 0.9538, "step": 261840 }, { "epoch": 1.6728850159079003, "grad_norm": 0.7845503091812134, "learning_rate": 6.481784187749612e-06, "loss": 0.7776, "step": 261850 }, { "epoch": 1.672948903057639, "grad_norm": 1.392819881439209, "learning_rate": 6.47931365702491e-06, "loss": 0.7982, "step": 261860 }, { "epoch": 1.6730127902073777, "grad_norm": 1.0314170122146606, "learning_rate": 6.476843564591162e-06, "loss": 0.8069, "step": 261870 }, { "epoch": 1.6730766773571164, "grad_norm": 0.861655592918396, "learning_rate": 6.474373910473258e-06, "loss": 0.59, "step": 261880 }, { "epoch": 1.673140564506855, "grad_norm": 0.8280917406082153, "learning_rate": 6.471904694696057e-06, "loss": 0.6813, "step": 261890 }, { "epoch": 1.6732044516565938, "grad_norm": 1.4525123834609985, "learning_rate": 6.469435917284445e-06, "loss": 0.6511, "step": 261900 }, { "epoch": 1.6732683388063325, "grad_norm": 1.097639799118042, "learning_rate": 6.466967578263261e-06, "loss": 0.8087, "step": 261910 }, { "epoch": 1.6733322259560712, "grad_norm": 1.2644404172897339, "learning_rate": 6.4644996776573815e-06, "loss": 0.8811, "step": 261920 }, { "epoch": 1.67339611310581, "grad_norm": 0.8408343195915222, "learning_rate": 6.462032215491648e-06, "loss": 0.8309, "step": 261930 }, { "epoch": 1.6734600002555486, "grad_norm": 1.4719483852386475, "learning_rate": 6.4595651917909225e-06, "loss": 0.7323, "step": 261940 }, { "epoch": 1.6735238874052873, "grad_norm": 1.1225919723510742, "learning_rate": 6.457098606580036e-06, "loss": 0.9404, "step": 261950 }, { "epoch": 1.673587774555026, "grad_norm": 1.05169677734375, "learning_rate": 6.454632459883836e-06, "loss": 0.7675, "step": 261960 }, { "epoch": 1.6736516617047648, "grad_norm": 1.0684877634048462, "learning_rate": 6.4521667517271736e-06, "loss": 0.9407, "step": 261970 }, { "epoch": 1.6737155488545032, "grad_norm": 1.4952465295791626, "learning_rate": 6.449701482134851e-06, "loss": 0.6827, "step": 261980 }, { "epoch": 1.6737794360042422, "grad_norm": 2.874864101409912, "learning_rate": 6.447236651131722e-06, "loss": 0.8218, "step": 261990 }, { "epoch": 1.6738433231539807, "grad_norm": 1.047302484512329, "learning_rate": 6.4447722587425985e-06, "loss": 0.7981, "step": 262000 }, { "epoch": 1.6739072103037196, "grad_norm": 1.5692522525787354, "learning_rate": 6.442308304992295e-06, "loss": 1.2054, "step": 262010 }, { "epoch": 1.673971097453458, "grad_norm": 1.0707038640975952, "learning_rate": 6.439844789905625e-06, "loss": 0.8025, "step": 262020 }, { "epoch": 1.674034984603197, "grad_norm": 0.7095916867256165, "learning_rate": 6.437381713507412e-06, "loss": 0.6055, "step": 262030 }, { "epoch": 1.6740988717529355, "grad_norm": 1.031847357749939, "learning_rate": 6.4349190758224375e-06, "loss": 0.8475, "step": 262040 }, { "epoch": 1.6741627589026744, "grad_norm": 0.6809356808662415, "learning_rate": 6.432456876875537e-06, "loss": 0.8638, "step": 262050 }, { "epoch": 1.674226646052413, "grad_norm": 1.3535555601119995, "learning_rate": 6.429995116691468e-06, "loss": 0.7976, "step": 262060 }, { "epoch": 1.6742905332021518, "grad_norm": 1.0233763456344604, "learning_rate": 6.427533795295043e-06, "loss": 1.1708, "step": 262070 }, { "epoch": 1.6743544203518903, "grad_norm": 1.219503402709961, "learning_rate": 6.425072912711061e-06, "loss": 0.8271, "step": 262080 }, { "epoch": 1.6744183075016292, "grad_norm": 1.8418071269989014, "learning_rate": 6.4226124689642784e-06, "loss": 0.9065, "step": 262090 }, { "epoch": 1.6744821946513677, "grad_norm": 1.0959925651550293, "learning_rate": 6.4201524640795045e-06, "loss": 1.0587, "step": 262100 }, { "epoch": 1.6745460818011066, "grad_norm": 0.8647409081459045, "learning_rate": 6.4176928980814785e-06, "loss": 0.9943, "step": 262110 }, { "epoch": 1.6746099689508451, "grad_norm": 1.121370553970337, "learning_rate": 6.415233770995005e-06, "loss": 1.0122, "step": 262120 }, { "epoch": 1.674673856100584, "grad_norm": 0.7462054491043091, "learning_rate": 6.412775082844824e-06, "loss": 0.8022, "step": 262130 }, { "epoch": 1.6747377432503225, "grad_norm": 1.5839956998825073, "learning_rate": 6.410316833655716e-06, "loss": 0.7529, "step": 262140 }, { "epoch": 1.6748016304000615, "grad_norm": 0.8902942538261414, "learning_rate": 6.4078590234524174e-06, "loss": 0.9492, "step": 262150 }, { "epoch": 1.6748655175498, "grad_norm": 1.2881721258163452, "learning_rate": 6.405401652259701e-06, "loss": 0.9617, "step": 262160 }, { "epoch": 1.6749294046995389, "grad_norm": 1.0738145112991333, "learning_rate": 6.402944720102294e-06, "loss": 0.869, "step": 262170 }, { "epoch": 1.6749932918492774, "grad_norm": 1.1131645441055298, "learning_rate": 6.400488227004958e-06, "loss": 0.7081, "step": 262180 }, { "epoch": 1.6750571789990163, "grad_norm": 0.7208460569381714, "learning_rate": 6.398032172992418e-06, "loss": 1.1058, "step": 262190 }, { "epoch": 1.6751210661487548, "grad_norm": 0.9222758412361145, "learning_rate": 6.395576558089428e-06, "loss": 0.8924, "step": 262200 }, { "epoch": 1.6751849532984937, "grad_norm": 1.1875065565109253, "learning_rate": 6.393121382320688e-06, "loss": 0.6993, "step": 262210 }, { "epoch": 1.6752488404482322, "grad_norm": 1.047171950340271, "learning_rate": 6.390666645710958e-06, "loss": 0.7424, "step": 262220 }, { "epoch": 1.675312727597971, "grad_norm": 0.8368127346038818, "learning_rate": 6.388212348284928e-06, "loss": 0.7994, "step": 262230 }, { "epoch": 1.6753766147477096, "grad_norm": 0.6694656014442444, "learning_rate": 6.38575849006734e-06, "loss": 0.8353, "step": 262240 }, { "epoch": 1.6754405018974483, "grad_norm": 1.1006664037704468, "learning_rate": 6.3833050710828845e-06, "loss": 0.7687, "step": 262250 }, { "epoch": 1.675504389047187, "grad_norm": 1.3615388870239258, "learning_rate": 6.380852091356293e-06, "loss": 0.7974, "step": 262260 }, { "epoch": 1.6755682761969257, "grad_norm": 1.3918992280960083, "learning_rate": 6.378399550912245e-06, "loss": 0.8418, "step": 262270 }, { "epoch": 1.6756321633466644, "grad_norm": 0.9409014582633972, "learning_rate": 6.375947449775449e-06, "loss": 0.7166, "step": 262280 }, { "epoch": 1.6756960504964031, "grad_norm": 0.5877164006233215, "learning_rate": 6.373495787970618e-06, "loss": 0.8951, "step": 262290 }, { "epoch": 1.6757599376461418, "grad_norm": 1.2059690952301025, "learning_rate": 6.371044565522416e-06, "loss": 1.0055, "step": 262300 }, { "epoch": 1.6758238247958805, "grad_norm": 1.808434009552002, "learning_rate": 6.368593782455545e-06, "loss": 0.9485, "step": 262310 }, { "epoch": 1.6758877119456193, "grad_norm": 0.8852072954177856, "learning_rate": 6.366143438794675e-06, "loss": 0.9596, "step": 262320 }, { "epoch": 1.675951599095358, "grad_norm": 1.7130441665649414, "learning_rate": 6.363693534564497e-06, "loss": 0.8242, "step": 262330 }, { "epoch": 1.6760154862450967, "grad_norm": 0.662137508392334, "learning_rate": 6.361244069789668e-06, "loss": 0.6081, "step": 262340 }, { "epoch": 1.6760793733948354, "grad_norm": 1.2962076663970947, "learning_rate": 6.358795044494875e-06, "loss": 0.8961, "step": 262350 }, { "epoch": 1.676143260544574, "grad_norm": 1.1145683526992798, "learning_rate": 6.3563464587047596e-06, "loss": 0.9144, "step": 262360 }, { "epoch": 1.6762071476943128, "grad_norm": 0.998020350933075, "learning_rate": 6.353898312444001e-06, "loss": 0.9555, "step": 262370 }, { "epoch": 1.6762710348440515, "grad_norm": 1.1781702041625977, "learning_rate": 6.351450605737236e-06, "loss": 1.0516, "step": 262380 }, { "epoch": 1.6763349219937902, "grad_norm": 0.9183396697044373, "learning_rate": 6.349003338609138e-06, "loss": 0.791, "step": 262390 }, { "epoch": 1.676398809143529, "grad_norm": 1.1460124254226685, "learning_rate": 6.346556511084329e-06, "loss": 0.8932, "step": 262400 }, { "epoch": 1.6764626962932676, "grad_norm": 0.7981510758399963, "learning_rate": 6.344110123187469e-06, "loss": 0.94, "step": 262410 }, { "epoch": 1.6765265834430063, "grad_norm": 0.9553250074386597, "learning_rate": 6.341664174943179e-06, "loss": 1.0921, "step": 262420 }, { "epoch": 1.676590470592745, "grad_norm": 0.48150333762168884, "learning_rate": 6.339218666376106e-06, "loss": 0.5992, "step": 262430 }, { "epoch": 1.6766543577424837, "grad_norm": 1.119012475013733, "learning_rate": 6.33677359751087e-06, "loss": 1.0035, "step": 262440 }, { "epoch": 1.6767182448922224, "grad_norm": 1.050771951675415, "learning_rate": 6.3343289683721055e-06, "loss": 0.9006, "step": 262450 }, { "epoch": 1.6767821320419611, "grad_norm": 0.6806208491325378, "learning_rate": 6.331884778984415e-06, "loss": 0.9743, "step": 262460 }, { "epoch": 1.6768460191916996, "grad_norm": 0.9381636381149292, "learning_rate": 6.329441029372435e-06, "loss": 0.861, "step": 262470 }, { "epoch": 1.6769099063414385, "grad_norm": 1.048666000366211, "learning_rate": 6.326997719560751e-06, "loss": 0.9453, "step": 262480 }, { "epoch": 1.676973793491177, "grad_norm": 0.7955247163772583, "learning_rate": 6.324554849573994e-06, "loss": 0.8905, "step": 262490 }, { "epoch": 1.677037680640916, "grad_norm": 0.6810150742530823, "learning_rate": 6.322112419436754e-06, "loss": 0.6696, "step": 262500 }, { "epoch": 1.6771015677906544, "grad_norm": 1.108661413192749, "learning_rate": 6.319670429173613e-06, "loss": 0.7696, "step": 262510 }, { "epoch": 1.6771654549403934, "grad_norm": 0.9814617037773132, "learning_rate": 6.3172288788092e-06, "loss": 0.8261, "step": 262520 }, { "epoch": 1.6772293420901319, "grad_norm": 1.04248046875, "learning_rate": 6.3147877683680635e-06, "loss": 0.8631, "step": 262530 }, { "epoch": 1.6772932292398708, "grad_norm": 1.0217331647872925, "learning_rate": 6.312347097874821e-06, "loss": 0.8714, "step": 262540 }, { "epoch": 1.6773571163896093, "grad_norm": 1.6767487525939941, "learning_rate": 6.309906867354032e-06, "loss": 1.0093, "step": 262550 }, { "epoch": 1.6774210035393482, "grad_norm": 1.2772984504699707, "learning_rate": 6.307467076830287e-06, "loss": 0.9749, "step": 262560 }, { "epoch": 1.6774848906890867, "grad_norm": 1.5580352544784546, "learning_rate": 6.3050277263281336e-06, "loss": 0.9933, "step": 262570 }, { "epoch": 1.6775487778388256, "grad_norm": 0.8894296884536743, "learning_rate": 6.302588815872168e-06, "loss": 0.6818, "step": 262580 }, { "epoch": 1.677612664988564, "grad_norm": 0.7048232555389404, "learning_rate": 6.300150345486921e-06, "loss": 0.8798, "step": 262590 }, { "epoch": 1.677676552138303, "grad_norm": 0.7692033052444458, "learning_rate": 6.297712315196969e-06, "loss": 0.6859, "step": 262600 }, { "epoch": 1.6777404392880415, "grad_norm": 0.8606083989143372, "learning_rate": 6.295274725026873e-06, "loss": 1.0156, "step": 262610 }, { "epoch": 1.6778043264377804, "grad_norm": 1.0161243677139282, "learning_rate": 6.292837575001159e-06, "loss": 0.8682, "step": 262620 }, { "epoch": 1.677868213587519, "grad_norm": 1.0023568868637085, "learning_rate": 6.290400865144391e-06, "loss": 1.0135, "step": 262630 }, { "epoch": 1.6779321007372578, "grad_norm": 1.5799769163131714, "learning_rate": 6.287964595481094e-06, "loss": 0.8239, "step": 262640 }, { "epoch": 1.6779959878869963, "grad_norm": 1.00106942653656, "learning_rate": 6.285528766035814e-06, "loss": 0.83, "step": 262650 }, { "epoch": 1.6780598750367353, "grad_norm": 0.7219306230545044, "learning_rate": 6.283093376833071e-06, "loss": 0.8355, "step": 262660 }, { "epoch": 1.6781237621864737, "grad_norm": 0.874301552772522, "learning_rate": 6.280658427897413e-06, "loss": 0.7951, "step": 262670 }, { "epoch": 1.6781876493362127, "grad_norm": 0.7081124782562256, "learning_rate": 6.278223919253334e-06, "loss": 0.8836, "step": 262680 }, { "epoch": 1.6782515364859512, "grad_norm": 0.8233230113983154, "learning_rate": 6.275789850925373e-06, "loss": 0.7395, "step": 262690 }, { "epoch": 1.6783154236356899, "grad_norm": 0.8121306896209717, "learning_rate": 6.273356222938026e-06, "loss": 0.9084, "step": 262700 }, { "epoch": 1.6783793107854286, "grad_norm": 1.0645307302474976, "learning_rate": 6.270923035315818e-06, "loss": 1.0377, "step": 262710 }, { "epoch": 1.6784431979351673, "grad_norm": 1.1339856386184692, "learning_rate": 6.268490288083239e-06, "loss": 0.7891, "step": 262720 }, { "epoch": 1.678507085084906, "grad_norm": 0.8035052418708801, "learning_rate": 6.266057981264805e-06, "loss": 0.7972, "step": 262730 }, { "epoch": 1.6785709722346447, "grad_norm": 0.7048985362052917, "learning_rate": 6.263626114884996e-06, "loss": 0.7198, "step": 262740 }, { "epoch": 1.6786348593843834, "grad_norm": 1.036059021949768, "learning_rate": 6.261194688968313e-06, "loss": 0.9324, "step": 262750 }, { "epoch": 1.678698746534122, "grad_norm": 0.9852795004844666, "learning_rate": 6.258763703539233e-06, "loss": 0.7835, "step": 262760 }, { "epoch": 1.6787626336838608, "grad_norm": 0.8867654204368591, "learning_rate": 6.2563331586222574e-06, "loss": 0.8751, "step": 262770 }, { "epoch": 1.6788265208335995, "grad_norm": 1.5037943124771118, "learning_rate": 6.253903054241833e-06, "loss": 0.7739, "step": 262780 }, { "epoch": 1.6788904079833382, "grad_norm": 1.1166731119155884, "learning_rate": 6.251473390422468e-06, "loss": 0.9149, "step": 262790 }, { "epoch": 1.678954295133077, "grad_norm": 1.3753933906555176, "learning_rate": 6.2490441671886e-06, "loss": 0.727, "step": 262800 }, { "epoch": 1.6790181822828156, "grad_norm": 1.3363189697265625, "learning_rate": 6.246615384564702e-06, "loss": 0.7728, "step": 262810 }, { "epoch": 1.6790820694325543, "grad_norm": 0.8456787467002869, "learning_rate": 6.244187042575256e-06, "loss": 0.8522, "step": 262820 }, { "epoch": 1.679145956582293, "grad_norm": 2.5984883308410645, "learning_rate": 6.241759141244691e-06, "loss": 0.9173, "step": 262830 }, { "epoch": 1.6792098437320317, "grad_norm": 0.870244026184082, "learning_rate": 6.239331680597477e-06, "loss": 0.6767, "step": 262840 }, { "epoch": 1.6792737308817705, "grad_norm": 0.7208239436149597, "learning_rate": 6.236904660658039e-06, "loss": 0.9085, "step": 262850 }, { "epoch": 1.6793376180315092, "grad_norm": 3.9711785316467285, "learning_rate": 6.234478081450845e-06, "loss": 0.9046, "step": 262860 }, { "epoch": 1.6794015051812479, "grad_norm": 0.9699562191963196, "learning_rate": 6.232051943000306e-06, "loss": 0.8882, "step": 262870 }, { "epoch": 1.6794653923309866, "grad_norm": 1.0530452728271484, "learning_rate": 6.229626245330877e-06, "loss": 0.9335, "step": 262880 }, { "epoch": 1.6795292794807253, "grad_norm": 1.5956165790557861, "learning_rate": 6.227200988466974e-06, "loss": 1.0207, "step": 262890 }, { "epoch": 1.679593166630464, "grad_norm": 0.7701212763786316, "learning_rate": 6.224776172433033e-06, "loss": 0.9123, "step": 262900 }, { "epoch": 1.6796570537802027, "grad_norm": 1.047654390335083, "learning_rate": 6.222351797253456e-06, "loss": 0.9776, "step": 262910 }, { "epoch": 1.6797209409299414, "grad_norm": 0.7686278820037842, "learning_rate": 6.219927862952679e-06, "loss": 0.797, "step": 262920 }, { "epoch": 1.67978482807968, "grad_norm": 1.1466424465179443, "learning_rate": 6.217504369555094e-06, "loss": 0.7509, "step": 262930 }, { "epoch": 1.6798487152294188, "grad_norm": 0.7692174911499023, "learning_rate": 6.215081317085131e-06, "loss": 0.9404, "step": 262940 }, { "epoch": 1.6799126023791575, "grad_norm": 1.1606929302215576, "learning_rate": 6.212658705567165e-06, "loss": 0.7926, "step": 262950 }, { "epoch": 1.679976489528896, "grad_norm": 0.854450523853302, "learning_rate": 6.2102365350256155e-06, "loss": 0.6672, "step": 262960 }, { "epoch": 1.680040376678635, "grad_norm": 0.9151691198348999, "learning_rate": 6.207814805484863e-06, "loss": 0.7784, "step": 262970 }, { "epoch": 1.6801042638283734, "grad_norm": 1.528312087059021, "learning_rate": 6.205393516969304e-06, "loss": 1.0193, "step": 262980 }, { "epoch": 1.6801681509781123, "grad_norm": 1.977976679801941, "learning_rate": 6.202972669503326e-06, "loss": 1.1171, "step": 262990 }, { "epoch": 1.6802320381278508, "grad_norm": 1.2395837306976318, "learning_rate": 6.200552263111292e-06, "loss": 0.8609, "step": 263000 }, { "epoch": 1.6802959252775898, "grad_norm": 1.0361305475234985, "learning_rate": 6.198132297817599e-06, "loss": 0.8699, "step": 263010 }, { "epoch": 1.6803598124273282, "grad_norm": 0.8011088371276855, "learning_rate": 6.195712773646595e-06, "loss": 0.9936, "step": 263020 }, { "epoch": 1.6804236995770672, "grad_norm": 0.918425440788269, "learning_rate": 6.19329369062267e-06, "loss": 0.9927, "step": 263030 }, { "epoch": 1.6804875867268056, "grad_norm": 1.8617467880249023, "learning_rate": 6.190875048770167e-06, "loss": 0.8369, "step": 263040 }, { "epoch": 1.6805514738765446, "grad_norm": 1.9793410301208496, "learning_rate": 6.188456848113461e-06, "loss": 1.0095, "step": 263050 }, { "epoch": 1.680615361026283, "grad_norm": 1.0629090070724487, "learning_rate": 6.186039088676887e-06, "loss": 1.0583, "step": 263060 }, { "epoch": 1.680679248176022, "grad_norm": 1.4704509973526, "learning_rate": 6.183621770484816e-06, "loss": 0.8265, "step": 263070 }, { "epoch": 1.6807431353257605, "grad_norm": 1.697485327720642, "learning_rate": 6.181204893561571e-06, "loss": 0.7345, "step": 263080 }, { "epoch": 1.6808070224754994, "grad_norm": 0.795141339302063, "learning_rate": 6.178788457931512e-06, "loss": 0.8883, "step": 263090 }, { "epoch": 1.6808709096252379, "grad_norm": 0.849793553352356, "learning_rate": 6.176372463618951e-06, "loss": 1.1035, "step": 263100 }, { "epoch": 1.6809347967749768, "grad_norm": 0.7248162031173706, "learning_rate": 6.173956910648243e-06, "loss": 0.7855, "step": 263110 }, { "epoch": 1.6809986839247153, "grad_norm": 1.4386005401611328, "learning_rate": 6.17154179904369e-06, "loss": 0.6941, "step": 263120 }, { "epoch": 1.6810625710744542, "grad_norm": 0.7645585536956787, "learning_rate": 6.1691271288296324e-06, "loss": 0.9207, "step": 263130 }, { "epoch": 1.6811264582241927, "grad_norm": 0.9256292581558228, "learning_rate": 6.166712900030397e-06, "loss": 0.9551, "step": 263140 }, { "epoch": 1.6811903453739316, "grad_norm": 1.810821294784546, "learning_rate": 6.164299112670269e-06, "loss": 0.9704, "step": 263150 }, { "epoch": 1.6812542325236701, "grad_norm": 1.022916555404663, "learning_rate": 6.161885766773584e-06, "loss": 0.7118, "step": 263160 }, { "epoch": 1.681318119673409, "grad_norm": 4.4502668380737305, "learning_rate": 6.159472862364618e-06, "loss": 0.8046, "step": 263170 }, { "epoch": 1.6813820068231475, "grad_norm": 0.5222861170768738, "learning_rate": 6.157060399467707e-06, "loss": 0.9441, "step": 263180 }, { "epoch": 1.6814458939728862, "grad_norm": 0.9526025056838989, "learning_rate": 6.154648378107114e-06, "loss": 0.8466, "step": 263190 }, { "epoch": 1.681509781122625, "grad_norm": 0.8658684492111206, "learning_rate": 6.152236798307154e-06, "loss": 0.9679, "step": 263200 }, { "epoch": 1.6815736682723637, "grad_norm": 0.6978765726089478, "learning_rate": 6.14982566009209e-06, "loss": 0.7612, "step": 263210 }, { "epoch": 1.6816375554221024, "grad_norm": 0.8792979121208191, "learning_rate": 6.147414963486231e-06, "loss": 0.8342, "step": 263220 }, { "epoch": 1.681701442571841, "grad_norm": 0.8527263402938843, "learning_rate": 6.14500470851383e-06, "loss": 0.9531, "step": 263230 }, { "epoch": 1.6817653297215798, "grad_norm": 1.1380363702774048, "learning_rate": 6.142594895199183e-06, "loss": 0.7972, "step": 263240 }, { "epoch": 1.6818292168713185, "grad_norm": 0.73790043592453, "learning_rate": 6.140185523566533e-06, "loss": 0.8702, "step": 263250 }, { "epoch": 1.6818931040210572, "grad_norm": 0.873717725276947, "learning_rate": 6.1377765936401765e-06, "loss": 1.0954, "step": 263260 }, { "epoch": 1.681956991170796, "grad_norm": 1.1089555025100708, "learning_rate": 6.135368105444339e-06, "loss": 0.9219, "step": 263270 }, { "epoch": 1.6820208783205346, "grad_norm": 2.0165822505950928, "learning_rate": 6.1329600590033064e-06, "loss": 1.0447, "step": 263280 }, { "epoch": 1.6820847654702733, "grad_norm": 0.8352879881858826, "learning_rate": 6.130552454341304e-06, "loss": 1.0405, "step": 263290 }, { "epoch": 1.682148652620012, "grad_norm": 2.2425537109375, "learning_rate": 6.128145291482601e-06, "loss": 0.7285, "step": 263300 }, { "epoch": 1.6822125397697507, "grad_norm": 0.6852495670318604, "learning_rate": 6.125738570451422e-06, "loss": 0.8092, "step": 263310 }, { "epoch": 1.6822764269194894, "grad_norm": 0.9322900772094727, "learning_rate": 6.123332291272021e-06, "loss": 0.8753, "step": 263320 }, { "epoch": 1.6823403140692281, "grad_norm": 1.0719099044799805, "learning_rate": 6.120926453968612e-06, "loss": 0.9331, "step": 263330 }, { "epoch": 1.6824042012189668, "grad_norm": 1.2141951322555542, "learning_rate": 6.118521058565435e-06, "loss": 0.9405, "step": 263340 }, { "epoch": 1.6824680883687055, "grad_norm": 0.8354449272155762, "learning_rate": 6.116116105086728e-06, "loss": 0.9036, "step": 263350 }, { "epoch": 1.6825319755184442, "grad_norm": 0.6558452248573303, "learning_rate": 6.1137115935566815e-06, "loss": 0.8491, "step": 263360 }, { "epoch": 1.682595862668183, "grad_norm": 1.2620372772216797, "learning_rate": 6.1115479110657845e-06, "loss": 0.8819, "step": 263370 }, { "epoch": 1.6826597498179217, "grad_norm": 0.8358511328697205, "learning_rate": 6.109144239304932e-06, "loss": 0.8308, "step": 263380 }, { "epoch": 1.6827236369676604, "grad_norm": 0.8426769375801086, "learning_rate": 6.1067410095629825e-06, "loss": 0.8139, "step": 263390 }, { "epoch": 1.682787524117399, "grad_norm": 0.7442500591278076, "learning_rate": 6.104338221864109e-06, "loss": 0.684, "step": 263400 }, { "epoch": 1.6828514112671378, "grad_norm": 0.9698424935340881, "learning_rate": 6.101935876232534e-06, "loss": 0.9235, "step": 263410 }, { "epoch": 1.6829152984168765, "grad_norm": 0.6997389793395996, "learning_rate": 6.099533972692439e-06, "loss": 0.8268, "step": 263420 }, { "epoch": 1.682979185566615, "grad_norm": 0.8244001269340515, "learning_rate": 6.097132511268022e-06, "loss": 0.949, "step": 263430 }, { "epoch": 1.683043072716354, "grad_norm": 0.6640798449516296, "learning_rate": 6.094731491983446e-06, "loss": 1.0296, "step": 263440 }, { "epoch": 1.6831069598660924, "grad_norm": 2.9415547847747803, "learning_rate": 6.092330914862915e-06, "loss": 1.0044, "step": 263450 }, { "epoch": 1.6831708470158313, "grad_norm": 1.5144875049591064, "learning_rate": 6.089930779930608e-06, "loss": 0.8793, "step": 263460 }, { "epoch": 1.6832347341655698, "grad_norm": 1.1107733249664307, "learning_rate": 6.0875310872106736e-06, "loss": 1.1556, "step": 263470 }, { "epoch": 1.6832986213153087, "grad_norm": 0.9494339227676392, "learning_rate": 6.085131836727298e-06, "loss": 0.7324, "step": 263480 }, { "epoch": 1.6833625084650472, "grad_norm": 1.08641517162323, "learning_rate": 6.082733028504628e-06, "loss": 0.6353, "step": 263490 }, { "epoch": 1.6834263956147861, "grad_norm": 1.4187917709350586, "learning_rate": 6.080334662566839e-06, "loss": 0.8269, "step": 263500 }, { "epoch": 1.6834902827645246, "grad_norm": 0.7869080901145935, "learning_rate": 6.077936738938073e-06, "loss": 0.7547, "step": 263510 }, { "epoch": 1.6835541699142635, "grad_norm": 0.8862486481666565, "learning_rate": 6.075539257642482e-06, "loss": 0.8882, "step": 263520 }, { "epoch": 1.683618057064002, "grad_norm": 0.8505191802978516, "learning_rate": 6.073142218704209e-06, "loss": 0.725, "step": 263530 }, { "epoch": 1.683681944213741, "grad_norm": 1.587677240371704, "learning_rate": 6.0707456221474005e-06, "loss": 0.5713, "step": 263540 }, { "epoch": 1.6837458313634794, "grad_norm": 1.114354133605957, "learning_rate": 6.068349467996182e-06, "loss": 0.8215, "step": 263550 }, { "epoch": 1.6838097185132184, "grad_norm": 0.7740732431411743, "learning_rate": 6.065953756274695e-06, "loss": 0.8324, "step": 263560 }, { "epoch": 1.6838736056629569, "grad_norm": 2.6367008686065674, "learning_rate": 6.06355848700706e-06, "loss": 0.995, "step": 263570 }, { "epoch": 1.6839374928126958, "grad_norm": 0.7278146147727966, "learning_rate": 6.061163660217406e-06, "loss": 0.6742, "step": 263580 }, { "epoch": 1.6840013799624343, "grad_norm": 1.1852619647979736, "learning_rate": 6.058769275929837e-06, "loss": 0.8899, "step": 263590 }, { "epoch": 1.6840652671121732, "grad_norm": 1.1105660200119019, "learning_rate": 6.056375334168485e-06, "loss": 0.9496, "step": 263600 }, { "epoch": 1.6841291542619117, "grad_norm": 0.8752860426902771, "learning_rate": 6.05398183495745e-06, "loss": 0.6881, "step": 263610 }, { "epoch": 1.6841930414116506, "grad_norm": 0.7431759238243103, "learning_rate": 6.051588778320833e-06, "loss": 0.6131, "step": 263620 }, { "epoch": 1.684256928561389, "grad_norm": 1.1750792264938354, "learning_rate": 6.0491961642827384e-06, "loss": 0.7242, "step": 263630 }, { "epoch": 1.684320815711128, "grad_norm": 1.0727992057800293, "learning_rate": 6.046803992867256e-06, "loss": 0.8961, "step": 263640 }, { "epoch": 1.6843847028608665, "grad_norm": 0.9775357842445374, "learning_rate": 6.044412264098493e-06, "loss": 0.7858, "step": 263650 }, { "epoch": 1.6844485900106054, "grad_norm": 1.4500555992126465, "learning_rate": 6.042020978000518e-06, "loss": 0.8259, "step": 263660 }, { "epoch": 1.684512477160344, "grad_norm": 1.6051480770111084, "learning_rate": 6.039630134597424e-06, "loss": 0.9529, "step": 263670 }, { "epoch": 1.6845763643100826, "grad_norm": 1.0749129056930542, "learning_rate": 6.03723973391328e-06, "loss": 0.7638, "step": 263680 }, { "epoch": 1.6846402514598213, "grad_norm": 1.262317180633545, "learning_rate": 6.034849775972174e-06, "loss": 0.7457, "step": 263690 }, { "epoch": 1.68470413860956, "grad_norm": 1.0481128692626953, "learning_rate": 6.032460260798156e-06, "loss": 0.9739, "step": 263700 }, { "epoch": 1.6847680257592987, "grad_norm": 0.7693667411804199, "learning_rate": 6.03007118841531e-06, "loss": 0.92, "step": 263710 }, { "epoch": 1.6848319129090374, "grad_norm": 0.777385413646698, "learning_rate": 6.0276825588476805e-06, "loss": 1.0272, "step": 263720 }, { "epoch": 1.6848958000587761, "grad_norm": 1.0397416353225708, "learning_rate": 6.0252943721193334e-06, "loss": 0.8549, "step": 263730 }, { "epoch": 1.6849596872085149, "grad_norm": 0.6522778272628784, "learning_rate": 6.02290662825431e-06, "loss": 0.8055, "step": 263740 }, { "epoch": 1.6850235743582536, "grad_norm": 0.704421877861023, "learning_rate": 6.0205193272766695e-06, "loss": 0.7099, "step": 263750 }, { "epoch": 1.6850874615079923, "grad_norm": 0.9648008942604065, "learning_rate": 6.018132469210436e-06, "loss": 0.9972, "step": 263760 }, { "epoch": 1.685151348657731, "grad_norm": 0.9081729054450989, "learning_rate": 6.015746054079663e-06, "loss": 0.6949, "step": 263770 }, { "epoch": 1.6852152358074697, "grad_norm": 1.4176228046417236, "learning_rate": 6.013360081908387e-06, "loss": 0.8783, "step": 263780 }, { "epoch": 1.6852791229572084, "grad_norm": 1.1324151754379272, "learning_rate": 6.010974552720616e-06, "loss": 0.8746, "step": 263790 }, { "epoch": 1.685343010106947, "grad_norm": 1.1132959127426147, "learning_rate": 6.008589466540399e-06, "loss": 0.9005, "step": 263800 }, { "epoch": 1.6854068972566858, "grad_norm": 0.9364607930183411, "learning_rate": 6.006204823391731e-06, "loss": 0.7132, "step": 263810 }, { "epoch": 1.6854707844064245, "grad_norm": 1.3173259496688843, "learning_rate": 6.003820623298656e-06, "loss": 1.0, "step": 263820 }, { "epoch": 1.6855346715561632, "grad_norm": 1.278419017791748, "learning_rate": 6.001436866285159e-06, "loss": 0.8025, "step": 263830 }, { "epoch": 1.685598558705902, "grad_norm": 0.7469710111618042, "learning_rate": 5.999053552375267e-06, "loss": 0.7053, "step": 263840 }, { "epoch": 1.6856624458556406, "grad_norm": 1.4082916975021362, "learning_rate": 5.996670681592958e-06, "loss": 1.2888, "step": 263850 }, { "epoch": 1.6857263330053793, "grad_norm": 0.9647468328475952, "learning_rate": 5.994288253962255e-06, "loss": 0.8287, "step": 263860 }, { "epoch": 1.685790220155118, "grad_norm": 0.743918240070343, "learning_rate": 5.9919062695071304e-06, "loss": 0.7825, "step": 263870 }, { "epoch": 1.6858541073048567, "grad_norm": 0.9181013703346252, "learning_rate": 5.989524728251594e-06, "loss": 0.8352, "step": 263880 }, { "epoch": 1.6859179944545954, "grad_norm": 0.8533596992492676, "learning_rate": 5.987143630219605e-06, "loss": 1.0479, "step": 263890 }, { "epoch": 1.6859818816043342, "grad_norm": 1.0133002996444702, "learning_rate": 5.984762975435166e-06, "loss": 0.8272, "step": 263900 }, { "epoch": 1.6860457687540729, "grad_norm": 0.8661429286003113, "learning_rate": 5.982382763922234e-06, "loss": 1.0064, "step": 263910 }, { "epoch": 1.6861096559038113, "grad_norm": 0.8174116611480713, "learning_rate": 5.980002995704798e-06, "loss": 1.0821, "step": 263920 }, { "epoch": 1.6861735430535503, "grad_norm": 0.9533768892288208, "learning_rate": 5.977623670806804e-06, "loss": 0.7653, "step": 263930 }, { "epoch": 1.6862374302032888, "grad_norm": 1.140524983406067, "learning_rate": 5.975244789252238e-06, "loss": 0.7114, "step": 263940 }, { "epoch": 1.6863013173530277, "grad_norm": 1.5895030498504639, "learning_rate": 5.972866351065026e-06, "loss": 0.8785, "step": 263950 }, { "epoch": 1.6863652045027662, "grad_norm": 0.9356734752655029, "learning_rate": 5.970488356269155e-06, "loss": 1.0909, "step": 263960 }, { "epoch": 1.686429091652505, "grad_norm": 0.9342418909072876, "learning_rate": 5.968110804888544e-06, "loss": 0.7535, "step": 263970 }, { "epoch": 1.6864929788022436, "grad_norm": 0.8574849963188171, "learning_rate": 5.965733696947151e-06, "loss": 0.7482, "step": 263980 }, { "epoch": 1.6865568659519825, "grad_norm": 0.6715033650398254, "learning_rate": 5.9633570324689246e-06, "loss": 0.7679, "step": 263990 }, { "epoch": 1.686620753101721, "grad_norm": 0.6406641006469727, "learning_rate": 5.960980811477784e-06, "loss": 0.793, "step": 264000 }, { "epoch": 1.68668464025146, "grad_norm": 1.4869370460510254, "learning_rate": 5.958605033997672e-06, "loss": 0.912, "step": 264010 }, { "epoch": 1.6867485274011984, "grad_norm": 1.958756923675537, "learning_rate": 5.956229700052501e-06, "loss": 0.9161, "step": 264020 }, { "epoch": 1.6868124145509373, "grad_norm": 0.8519002795219421, "learning_rate": 5.95385480966621e-06, "loss": 0.8742, "step": 264030 }, { "epoch": 1.6868763017006758, "grad_norm": 1.1269562244415283, "learning_rate": 5.951480362862694e-06, "loss": 0.7672, "step": 264040 }, { "epoch": 1.6869401888504147, "grad_norm": 3.209676504135132, "learning_rate": 5.949106359665885e-06, "loss": 0.9413, "step": 264050 }, { "epoch": 1.6870040760001532, "grad_norm": 1.4073954820632935, "learning_rate": 5.94673280009968e-06, "loss": 1.1358, "step": 264060 }, { "epoch": 1.6870679631498922, "grad_norm": 1.4022773504257202, "learning_rate": 5.944359684187995e-06, "loss": 1.0455, "step": 264070 }, { "epoch": 1.6871318502996306, "grad_norm": 0.8840337991714478, "learning_rate": 5.941987011954714e-06, "loss": 0.7272, "step": 264080 }, { "epoch": 1.6871957374493696, "grad_norm": 0.6392529010772705, "learning_rate": 5.939614783423747e-06, "loss": 0.7665, "step": 264090 }, { "epoch": 1.687259624599108, "grad_norm": 2.5018985271453857, "learning_rate": 5.937242998618974e-06, "loss": 0.7378, "step": 264100 }, { "epoch": 1.687323511748847, "grad_norm": 3.1915085315704346, "learning_rate": 5.934871657564278e-06, "loss": 1.0742, "step": 264110 }, { "epoch": 1.6873873988985855, "grad_norm": 1.1561402082443237, "learning_rate": 5.93250076028356e-06, "loss": 0.9284, "step": 264120 }, { "epoch": 1.6874512860483244, "grad_norm": 0.9742777943611145, "learning_rate": 5.930130306800669e-06, "loss": 1.1178, "step": 264130 }, { "epoch": 1.6875151731980629, "grad_norm": 1.0389258861541748, "learning_rate": 5.927760297139501e-06, "loss": 0.8429, "step": 264140 }, { "epoch": 1.6875790603478018, "grad_norm": 1.2347203493118286, "learning_rate": 5.925390731323904e-06, "loss": 0.9766, "step": 264150 }, { "epoch": 1.6876429474975403, "grad_norm": 1.1833796501159668, "learning_rate": 5.92302160937776e-06, "loss": 0.8574, "step": 264160 }, { "epoch": 1.687706834647279, "grad_norm": 1.1623376607894897, "learning_rate": 5.920652931324916e-06, "loss": 0.8113, "step": 264170 }, { "epoch": 1.6877707217970177, "grad_norm": 0.9164713621139526, "learning_rate": 5.918284697189236e-06, "loss": 0.8357, "step": 264180 }, { "epoch": 1.6878346089467564, "grad_norm": 0.7683905959129333, "learning_rate": 5.915916906994556e-06, "loss": 0.8485, "step": 264190 }, { "epoch": 1.6878984960964951, "grad_norm": 0.6126852631568909, "learning_rate": 5.9135495607647475e-06, "loss": 0.9963, "step": 264200 }, { "epoch": 1.6879623832462338, "grad_norm": 3.0341567993164062, "learning_rate": 5.911182658523618e-06, "loss": 0.9031, "step": 264210 }, { "epoch": 1.6880262703959725, "grad_norm": 1.1758440732955933, "learning_rate": 5.908816200295036e-06, "loss": 0.8179, "step": 264220 }, { "epoch": 1.6880901575457112, "grad_norm": 2.492748975753784, "learning_rate": 5.906450186102802e-06, "loss": 0.8721, "step": 264230 }, { "epoch": 1.68815404469545, "grad_norm": 0.7346659302711487, "learning_rate": 5.904084615970778e-06, "loss": 0.7518, "step": 264240 }, { "epoch": 1.6882179318451886, "grad_norm": 0.5643122792243958, "learning_rate": 5.901719489922758e-06, "loss": 0.8097, "step": 264250 }, { "epoch": 1.6882818189949274, "grad_norm": 1.1059579849243164, "learning_rate": 5.899354807982582e-06, "loss": 1.0127, "step": 264260 }, { "epoch": 1.688345706144666, "grad_norm": 0.9043902158737183, "learning_rate": 5.896990570174049e-06, "loss": 0.9114, "step": 264270 }, { "epoch": 1.6884095932944048, "grad_norm": 1.016475796699524, "learning_rate": 5.894626776520984e-06, "loss": 0.8536, "step": 264280 }, { "epoch": 1.6884734804441435, "grad_norm": 1.6651692390441895, "learning_rate": 5.892263427047173e-06, "loss": 0.7038, "step": 264290 }, { "epoch": 1.6885373675938822, "grad_norm": 0.9237732887268066, "learning_rate": 5.889900521776426e-06, "loss": 0.9019, "step": 264300 }, { "epoch": 1.6886012547436209, "grad_norm": 0.8759250640869141, "learning_rate": 5.887538060732556e-06, "loss": 0.7557, "step": 264310 }, { "epoch": 1.6886651418933596, "grad_norm": 1.015563726425171, "learning_rate": 5.885176043939328e-06, "loss": 0.8072, "step": 264320 }, { "epoch": 1.6887290290430983, "grad_norm": 1.165315866470337, "learning_rate": 5.8828144714205505e-06, "loss": 1.0102, "step": 264330 }, { "epoch": 1.688792916192837, "grad_norm": 1.189011812210083, "learning_rate": 5.8804533431999935e-06, "loss": 0.9371, "step": 264340 }, { "epoch": 1.6888568033425757, "grad_norm": 1.0117441415786743, "learning_rate": 5.878092659301443e-06, "loss": 1.2065, "step": 264350 }, { "epoch": 1.6889206904923144, "grad_norm": 0.6729750633239746, "learning_rate": 5.875732419748664e-06, "loss": 0.7066, "step": 264360 }, { "epoch": 1.6889845776420531, "grad_norm": 2.093863010406494, "learning_rate": 5.873372624565443e-06, "loss": 0.7922, "step": 264370 }, { "epoch": 1.6890484647917918, "grad_norm": 0.9474301338195801, "learning_rate": 5.871013273775522e-06, "loss": 1.0108, "step": 264380 }, { "epoch": 1.6891123519415305, "grad_norm": 0.9864105582237244, "learning_rate": 5.8686543674026875e-06, "loss": 0.9154, "step": 264390 }, { "epoch": 1.6891762390912692, "grad_norm": 0.9760420918464661, "learning_rate": 5.86629590547067e-06, "loss": 1.0111, "step": 264400 }, { "epoch": 1.6892401262410077, "grad_norm": 1.5423452854156494, "learning_rate": 5.863937888003246e-06, "loss": 1.1758, "step": 264410 }, { "epoch": 1.6893040133907467, "grad_norm": 0.9814441800117493, "learning_rate": 5.86158031502414e-06, "loss": 0.7018, "step": 264420 }, { "epoch": 1.6893679005404851, "grad_norm": 0.909810483455658, "learning_rate": 5.859223186557111e-06, "loss": 0.9151, "step": 264430 }, { "epoch": 1.689431787690224, "grad_norm": 0.6530123949050903, "learning_rate": 5.856866502625891e-06, "loss": 0.8051, "step": 264440 }, { "epoch": 1.6894956748399625, "grad_norm": 0.843220591545105, "learning_rate": 5.854510263254215e-06, "loss": 0.9839, "step": 264450 }, { "epoch": 1.6895595619897015, "grad_norm": 1.1965805292129517, "learning_rate": 5.852154468465809e-06, "loss": 1.0437, "step": 264460 }, { "epoch": 1.68962344913944, "grad_norm": 1.018641471862793, "learning_rate": 5.849799118284405e-06, "loss": 0.9625, "step": 264470 }, { "epoch": 1.6896873362891789, "grad_norm": 1.1121151447296143, "learning_rate": 5.847444212733716e-06, "loss": 0.9465, "step": 264480 }, { "epoch": 1.6897512234389174, "grad_norm": 1.031753659248352, "learning_rate": 5.845089751837462e-06, "loss": 0.7985, "step": 264490 }, { "epoch": 1.6898151105886563, "grad_norm": 0.7224723100662231, "learning_rate": 5.8427357356193536e-06, "loss": 0.8471, "step": 264500 }, { "epoch": 1.6898789977383948, "grad_norm": 2.845839738845825, "learning_rate": 5.840382164103092e-06, "loss": 1.1273, "step": 264510 }, { "epoch": 1.6899428848881337, "grad_norm": 0.8431565761566162, "learning_rate": 5.838029037312398e-06, "loss": 0.7059, "step": 264520 }, { "epoch": 1.6900067720378722, "grad_norm": 1.1503245830535889, "learning_rate": 5.835676355270942e-06, "loss": 0.9751, "step": 264530 }, { "epoch": 1.6900706591876111, "grad_norm": 1.0872777700424194, "learning_rate": 5.833324118002448e-06, "loss": 1.5552, "step": 264540 }, { "epoch": 1.6901345463373496, "grad_norm": 1.0153592824935913, "learning_rate": 5.8309723255305815e-06, "loss": 1.005, "step": 264550 }, { "epoch": 1.6901984334870885, "grad_norm": 1.078887939453125, "learning_rate": 5.82862097787904e-06, "loss": 0.9291, "step": 264560 }, { "epoch": 1.690262320636827, "grad_norm": 1.0092707872390747, "learning_rate": 5.826270075071488e-06, "loss": 0.8592, "step": 264570 }, { "epoch": 1.690326207786566, "grad_norm": 3.004528045654297, "learning_rate": 5.8239196171316405e-06, "loss": 0.9122, "step": 264580 }, { "epoch": 1.6903900949363044, "grad_norm": 0.9294689297676086, "learning_rate": 5.821569604083111e-06, "loss": 0.7162, "step": 264590 }, { "epoch": 1.6904539820860434, "grad_norm": 0.7767539024353027, "learning_rate": 5.819220035949613e-06, "loss": 0.7823, "step": 264600 }, { "epoch": 1.6905178692357818, "grad_norm": 1.2645741701126099, "learning_rate": 5.816870912754774e-06, "loss": 0.9638, "step": 264610 }, { "epoch": 1.6905817563855208, "grad_norm": 1.0132741928100586, "learning_rate": 5.814522234522274e-06, "loss": 0.7176, "step": 264620 }, { "epoch": 1.6906456435352593, "grad_norm": 0.9678375720977783, "learning_rate": 5.812174001275766e-06, "loss": 1.2492, "step": 264630 }, { "epoch": 1.6907095306849982, "grad_norm": 1.0396130084991455, "learning_rate": 5.809826213038888e-06, "loss": 0.8539, "step": 264640 }, { "epoch": 1.6907734178347367, "grad_norm": 0.7152589559555054, "learning_rate": 5.8074788698352975e-06, "loss": 0.8661, "step": 264650 }, { "epoch": 1.6908373049844754, "grad_norm": 1.435221791267395, "learning_rate": 5.805131971688621e-06, "loss": 0.9256, "step": 264660 }, { "epoch": 1.690901192134214, "grad_norm": 0.7107662558555603, "learning_rate": 5.802785518622506e-06, "loss": 1.0261, "step": 264670 }, { "epoch": 1.6909650792839528, "grad_norm": 0.7626693844795227, "learning_rate": 5.800439510660566e-06, "loss": 0.9455, "step": 264680 }, { "epoch": 1.6910289664336915, "grad_norm": 0.8588293194770813, "learning_rate": 5.79809394782645e-06, "loss": 0.7354, "step": 264690 }, { "epoch": 1.6910928535834302, "grad_norm": 0.9404697418212891, "learning_rate": 5.795748830143755e-06, "loss": 1.0988, "step": 264700 }, { "epoch": 1.691156740733169, "grad_norm": 0.4883400797843933, "learning_rate": 5.7934041576361285e-06, "loss": 1.0692, "step": 264710 }, { "epoch": 1.6912206278829076, "grad_norm": 1.085710883140564, "learning_rate": 5.7910599303271475e-06, "loss": 1.007, "step": 264720 }, { "epoch": 1.6912845150326463, "grad_norm": 0.68525630235672, "learning_rate": 5.788716148240458e-06, "loss": 0.6872, "step": 264730 }, { "epoch": 1.691348402182385, "grad_norm": 1.4903061389923096, "learning_rate": 5.786372811399627e-06, "loss": 0.7983, "step": 264740 }, { "epoch": 1.6914122893321237, "grad_norm": 1.2236237525939941, "learning_rate": 5.784029919828288e-06, "loss": 1.0272, "step": 264750 }, { "epoch": 1.6914761764818624, "grad_norm": 1.0498038530349731, "learning_rate": 5.781687473550007e-06, "loss": 0.8841, "step": 264760 }, { "epoch": 1.6915400636316011, "grad_norm": 1.0530762672424316, "learning_rate": 5.779345472588399e-06, "loss": 0.8159, "step": 264770 }, { "epoch": 1.6916039507813398, "grad_norm": 1.0849366188049316, "learning_rate": 5.777003916967027e-06, "loss": 0.924, "step": 264780 }, { "epoch": 1.6916678379310786, "grad_norm": 1.0517401695251465, "learning_rate": 5.774662806709491e-06, "loss": 1.0696, "step": 264790 }, { "epoch": 1.6917317250808173, "grad_norm": 1.0167468786239624, "learning_rate": 5.772322141839353e-06, "loss": 0.8677, "step": 264800 }, { "epoch": 1.691795612230556, "grad_norm": 1.034781813621521, "learning_rate": 5.769981922380208e-06, "loss": 0.7875, "step": 264810 }, { "epoch": 1.6918594993802947, "grad_norm": 0.9005605578422546, "learning_rate": 5.767642148355595e-06, "loss": 0.7586, "step": 264820 }, { "epoch": 1.6919233865300334, "grad_norm": 0.707309901714325, "learning_rate": 5.765302819789092e-06, "loss": 0.8886, "step": 264830 }, { "epoch": 1.691987273679772, "grad_norm": 0.6730127334594727, "learning_rate": 5.762963936704269e-06, "loss": 0.9539, "step": 264840 }, { "epoch": 1.6920511608295108, "grad_norm": 0.8145616054534912, "learning_rate": 5.760625499124661e-06, "loss": 0.813, "step": 264850 }, { "epoch": 1.6921150479792495, "grad_norm": 0.7684074640274048, "learning_rate": 5.758287507073834e-06, "loss": 0.7269, "step": 264860 }, { "epoch": 1.6921789351289882, "grad_norm": 2.0174615383148193, "learning_rate": 5.7559499605753185e-06, "loss": 0.877, "step": 264870 }, { "epoch": 1.692242822278727, "grad_norm": 0.8217144012451172, "learning_rate": 5.753612859652674e-06, "loss": 0.706, "step": 264880 }, { "epoch": 1.6923067094284656, "grad_norm": 1.2375004291534424, "learning_rate": 5.7512762043294145e-06, "loss": 1.0963, "step": 264890 }, { "epoch": 1.692370596578204, "grad_norm": 2.328045129776001, "learning_rate": 5.748939994629093e-06, "loss": 0.7973, "step": 264900 }, { "epoch": 1.692434483727943, "grad_norm": 1.1463247537612915, "learning_rate": 5.74660423057522e-06, "loss": 0.7571, "step": 264910 }, { "epoch": 1.6924983708776815, "grad_norm": 1.045371413230896, "learning_rate": 5.7442689121913415e-06, "loss": 0.871, "step": 264920 }, { "epoch": 1.6925622580274204, "grad_norm": 0.9297313690185547, "learning_rate": 5.741934039500946e-06, "loss": 0.79, "step": 264930 }, { "epoch": 1.692626145177159, "grad_norm": 0.9317049980163574, "learning_rate": 5.739599612527574e-06, "loss": 0.8123, "step": 264940 }, { "epoch": 1.6926900323268979, "grad_norm": 0.9831722378730774, "learning_rate": 5.737265631294714e-06, "loss": 1.0598, "step": 264950 }, { "epoch": 1.6927539194766363, "grad_norm": 2.586610794067383, "learning_rate": 5.734932095825895e-06, "loss": 0.7519, "step": 264960 }, { "epoch": 1.6928178066263753, "grad_norm": 1.0624761581420898, "learning_rate": 5.732599006144595e-06, "loss": 1.0614, "step": 264970 }, { "epoch": 1.6928816937761137, "grad_norm": 0.8045127391815186, "learning_rate": 5.730266362274328e-06, "loss": 0.8182, "step": 264980 }, { "epoch": 1.6929455809258527, "grad_norm": 0.8507513403892517, "learning_rate": 5.727934164238563e-06, "loss": 1.1099, "step": 264990 }, { "epoch": 1.6930094680755912, "grad_norm": 1.320726990699768, "learning_rate": 5.725602412060821e-06, "loss": 0.9069, "step": 265000 }, { "epoch": 1.69307335522533, "grad_norm": 0.8424960374832153, "learning_rate": 5.723271105764549e-06, "loss": 0.6733, "step": 265010 }, { "epoch": 1.6931372423750686, "grad_norm": 0.7862983345985413, "learning_rate": 5.720940245373252e-06, "loss": 0.8787, "step": 265020 }, { "epoch": 1.6932011295248075, "grad_norm": 1.225223183631897, "learning_rate": 5.718609830910388e-06, "loss": 0.9061, "step": 265030 }, { "epoch": 1.693265016674546, "grad_norm": 1.9154473543167114, "learning_rate": 5.716279862399427e-06, "loss": 1.0028, "step": 265040 }, { "epoch": 1.693328903824285, "grad_norm": 1.0388178825378418, "learning_rate": 5.713950339863849e-06, "loss": 0.9553, "step": 265050 }, { "epoch": 1.6933927909740234, "grad_norm": 1.0727187395095825, "learning_rate": 5.711621263327094e-06, "loss": 0.7793, "step": 265060 }, { "epoch": 1.6934566781237623, "grad_norm": 0.8072197437286377, "learning_rate": 5.709292632812652e-06, "loss": 0.711, "step": 265070 }, { "epoch": 1.6935205652735008, "grad_norm": 0.7687093019485474, "learning_rate": 5.706964448343926e-06, "loss": 0.8484, "step": 265080 }, { "epoch": 1.6935844524232397, "grad_norm": 0.8324954509735107, "learning_rate": 5.7046367099444e-06, "loss": 0.9244, "step": 265090 }, { "epoch": 1.6936483395729782, "grad_norm": 0.781612753868103, "learning_rate": 5.702309417637492e-06, "loss": 1.0521, "step": 265100 }, { "epoch": 1.6937122267227172, "grad_norm": 1.3374513387680054, "learning_rate": 5.699982571446655e-06, "loss": 1.1758, "step": 265110 }, { "epoch": 1.6937761138724556, "grad_norm": 1.0523484945297241, "learning_rate": 5.697656171395316e-06, "loss": 0.8768, "step": 265120 }, { "epoch": 1.6938400010221943, "grad_norm": 0.8894367218017578, "learning_rate": 5.695330217506916e-06, "loss": 0.6643, "step": 265130 }, { "epoch": 1.693903888171933, "grad_norm": 2.262582778930664, "learning_rate": 5.693004709804855e-06, "loss": 0.9333, "step": 265140 }, { "epoch": 1.6939677753216718, "grad_norm": 0.9840685725212097, "learning_rate": 5.690679648312575e-06, "loss": 0.839, "step": 265150 }, { "epoch": 1.6940316624714105, "grad_norm": 0.9524529576301575, "learning_rate": 5.688355033053489e-06, "loss": 0.8996, "step": 265160 }, { "epoch": 1.6940955496211492, "grad_norm": 1.0004558563232422, "learning_rate": 5.686030864050989e-06, "loss": 0.7685, "step": 265170 }, { "epoch": 1.6941594367708879, "grad_norm": 1.045327067375183, "learning_rate": 5.683707141328515e-06, "loss": 0.7907, "step": 265180 }, { "epoch": 1.6942233239206266, "grad_norm": 1.2737689018249512, "learning_rate": 5.681383864909429e-06, "loss": 0.6774, "step": 265190 }, { "epoch": 1.6942872110703653, "grad_norm": 0.997908890247345, "learning_rate": 5.679061034817168e-06, "loss": 1.0813, "step": 265200 }, { "epoch": 1.694351098220104, "grad_norm": 1.0616806745529175, "learning_rate": 5.676738651075086e-06, "loss": 0.9101, "step": 265210 }, { "epoch": 1.6944149853698427, "grad_norm": 1.175321102142334, "learning_rate": 5.6744167137066095e-06, "loss": 0.6915, "step": 265220 }, { "epoch": 1.6944788725195814, "grad_norm": 0.9594330191612244, "learning_rate": 5.672095222735086e-06, "loss": 0.8794, "step": 265230 }, { "epoch": 1.69454275966932, "grad_norm": 1.3879942893981934, "learning_rate": 5.669774178183929e-06, "loss": 0.7531, "step": 265240 }, { "epoch": 1.6946066468190588, "grad_norm": 0.6630569696426392, "learning_rate": 5.667453580076487e-06, "loss": 0.8141, "step": 265250 }, { "epoch": 1.6946705339687975, "grad_norm": 0.8384342789649963, "learning_rate": 5.665133428436148e-06, "loss": 1.0865, "step": 265260 }, { "epoch": 1.6947344211185362, "grad_norm": 0.8924210071563721, "learning_rate": 5.662813723286259e-06, "loss": 0.7966, "step": 265270 }, { "epoch": 1.694798308268275, "grad_norm": 1.0226231813430786, "learning_rate": 5.660494464650207e-06, "loss": 0.9807, "step": 265280 }, { "epoch": 1.6948621954180136, "grad_norm": 1.0482666492462158, "learning_rate": 5.658175652551317e-06, "loss": 0.7901, "step": 265290 }, { "epoch": 1.6949260825677523, "grad_norm": 0.8355343341827393, "learning_rate": 5.6558572870129775e-06, "loss": 0.9319, "step": 265300 }, { "epoch": 1.694989969717491, "grad_norm": 1.1238371133804321, "learning_rate": 5.653539368058508e-06, "loss": 0.8083, "step": 265310 }, { "epoch": 1.6950538568672298, "grad_norm": 1.0383145809173584, "learning_rate": 5.651221895711268e-06, "loss": 0.6148, "step": 265320 }, { "epoch": 1.6951177440169685, "grad_norm": 1.1226203441619873, "learning_rate": 5.648904869994581e-06, "loss": 1.0053, "step": 265330 }, { "epoch": 1.6951816311667072, "grad_norm": 0.8662714958190918, "learning_rate": 5.646588290931804e-06, "loss": 0.9971, "step": 265340 }, { "epoch": 1.6952455183164459, "grad_norm": 0.6560549139976501, "learning_rate": 5.644272158546243e-06, "loss": 0.9397, "step": 265350 }, { "epoch": 1.6953094054661846, "grad_norm": 0.9533705115318298, "learning_rate": 5.641956472861232e-06, "loss": 0.7692, "step": 265360 }, { "epoch": 1.6953732926159233, "grad_norm": 3.250364065170288, "learning_rate": 5.6396412339001116e-06, "loss": 0.9262, "step": 265370 }, { "epoch": 1.695437179765662, "grad_norm": 1.1853001117706299, "learning_rate": 5.6373264416861635e-06, "loss": 0.746, "step": 265380 }, { "epoch": 1.6955010669154005, "grad_norm": 1.0173426866531372, "learning_rate": 5.635012096242731e-06, "loss": 0.7435, "step": 265390 }, { "epoch": 1.6955649540651394, "grad_norm": 1.1551456451416016, "learning_rate": 5.632698197593095e-06, "loss": 0.8924, "step": 265400 }, { "epoch": 1.695628841214878, "grad_norm": 0.8069493770599365, "learning_rate": 5.630384745760586e-06, "loss": 0.8283, "step": 265410 }, { "epoch": 1.6956927283646168, "grad_norm": 0.8276717662811279, "learning_rate": 5.628303021159209e-06, "loss": 0.8786, "step": 265420 }, { "epoch": 1.6957566155143553, "grad_norm": 0.9626677632331848, "learning_rate": 5.625990418343391e-06, "loss": 0.9655, "step": 265430 }, { "epoch": 1.6958205026640942, "grad_norm": 1.4208831787109375, "learning_rate": 5.62367826241223e-06, "loss": 0.8715, "step": 265440 }, { "epoch": 1.6958843898138327, "grad_norm": 0.8281898498535156, "learning_rate": 5.621366553389035e-06, "loss": 1.3512, "step": 265450 }, { "epoch": 1.6959482769635716, "grad_norm": 1.0248709917068481, "learning_rate": 5.619055291297059e-06, "loss": 0.8385, "step": 265460 }, { "epoch": 1.6960121641133101, "grad_norm": 1.3580818176269531, "learning_rate": 5.616744476159591e-06, "loss": 0.8225, "step": 265470 }, { "epoch": 1.696076051263049, "grad_norm": 0.7210913300514221, "learning_rate": 5.614434107999911e-06, "loss": 0.7541, "step": 265480 }, { "epoch": 1.6961399384127875, "grad_norm": 1.2958444356918335, "learning_rate": 5.6121241868412726e-06, "loss": 0.7449, "step": 265490 }, { "epoch": 1.6962038255625265, "grad_norm": 1.102543830871582, "learning_rate": 5.6098147127069515e-06, "loss": 1.1964, "step": 265500 }, { "epoch": 1.696267712712265, "grad_norm": 1.0483332872390747, "learning_rate": 5.60750568562019e-06, "loss": 0.8113, "step": 265510 }, { "epoch": 1.6963315998620039, "grad_norm": 0.9912049770355225, "learning_rate": 5.60519710560426e-06, "loss": 0.6473, "step": 265520 }, { "epoch": 1.6963954870117424, "grad_norm": 0.705596387386322, "learning_rate": 5.6028889726823905e-06, "loss": 0.6631, "step": 265530 }, { "epoch": 1.6964593741614813, "grad_norm": 0.6908966302871704, "learning_rate": 5.600581286877854e-06, "loss": 0.7591, "step": 265540 }, { "epoch": 1.6965232613112198, "grad_norm": 0.9993156790733337, "learning_rate": 5.598274048213858e-06, "loss": 1.0082, "step": 265550 }, { "epoch": 1.6965871484609587, "grad_norm": 0.7562658786773682, "learning_rate": 5.5959672567136745e-06, "loss": 0.9683, "step": 265560 }, { "epoch": 1.6966510356106972, "grad_norm": 1.1996532678604126, "learning_rate": 5.5936609124005e-06, "loss": 1.0089, "step": 265570 }, { "epoch": 1.6967149227604361, "grad_norm": 0.7490407228469849, "learning_rate": 5.591355015297583e-06, "loss": 0.7151, "step": 265580 }, { "epoch": 1.6967788099101746, "grad_norm": 0.9578597545623779, "learning_rate": 5.589049565428134e-06, "loss": 0.7907, "step": 265590 }, { "epoch": 1.6968426970599135, "grad_norm": 1.636884331703186, "learning_rate": 5.586744562815388e-06, "loss": 1.1151, "step": 265600 }, { "epoch": 1.696906584209652, "grad_norm": 0.9623717069625854, "learning_rate": 5.584440007482539e-06, "loss": 1.0491, "step": 265610 }, { "epoch": 1.6969704713593907, "grad_norm": 0.6296240091323853, "learning_rate": 5.582135899452811e-06, "loss": 0.979, "step": 265620 }, { "epoch": 1.6970343585091294, "grad_norm": 0.869094729423523, "learning_rate": 5.5798322387493884e-06, "loss": 1.0489, "step": 265630 }, { "epoch": 1.6970982456588681, "grad_norm": 0.6812463402748108, "learning_rate": 5.5775290253955e-06, "loss": 0.7392, "step": 265640 }, { "epoch": 1.6971621328086068, "grad_norm": 1.0079282522201538, "learning_rate": 5.575226259414313e-06, "loss": 0.8945, "step": 265650 }, { "epoch": 1.6972260199583455, "grad_norm": 4.871733665466309, "learning_rate": 5.572923940829039e-06, "loss": 1.029, "step": 265660 }, { "epoch": 1.6972899071080843, "grad_norm": 0.808641791343689, "learning_rate": 5.570622069662846e-06, "loss": 0.9738, "step": 265670 }, { "epoch": 1.697353794257823, "grad_norm": 0.7088136076927185, "learning_rate": 5.568320645938929e-06, "loss": 0.8631, "step": 265680 }, { "epoch": 1.6974176814075617, "grad_norm": 0.9034357070922852, "learning_rate": 5.566019669680467e-06, "loss": 0.9508, "step": 265690 }, { "epoch": 1.6974815685573004, "grad_norm": 1.4830907583236694, "learning_rate": 5.563719140910628e-06, "loss": 1.0575, "step": 265700 }, { "epoch": 1.697545455707039, "grad_norm": 0.7605845332145691, "learning_rate": 5.561419059652584e-06, "loss": 0.7593, "step": 265710 }, { "epoch": 1.6976093428567778, "grad_norm": 0.7586846351623535, "learning_rate": 5.559119425929482e-06, "loss": 0.7799, "step": 265720 }, { "epoch": 1.6976732300065165, "grad_norm": 0.8755923509597778, "learning_rate": 5.55682023976451e-06, "loss": 0.8196, "step": 265730 }, { "epoch": 1.6977371171562552, "grad_norm": 0.7772778868675232, "learning_rate": 5.554521501180793e-06, "loss": 0.7469, "step": 265740 }, { "epoch": 1.697801004305994, "grad_norm": 1.2870303392410278, "learning_rate": 5.552223210201502e-06, "loss": 1.0656, "step": 265750 }, { "epoch": 1.6978648914557326, "grad_norm": 0.8444591164588928, "learning_rate": 5.549925366849767e-06, "loss": 1.065, "step": 265760 }, { "epoch": 1.6979287786054713, "grad_norm": 1.0897783041000366, "learning_rate": 5.54762797114875e-06, "loss": 0.8331, "step": 265770 }, { "epoch": 1.69799266575521, "grad_norm": 0.7298462986946106, "learning_rate": 5.545331023121569e-06, "loss": 0.6569, "step": 265780 }, { "epoch": 1.6980565529049487, "grad_norm": 0.9896948933601379, "learning_rate": 5.543034522791362e-06, "loss": 1.0471, "step": 265790 }, { "epoch": 1.6981204400546874, "grad_norm": 2.4735770225524902, "learning_rate": 5.540738470181267e-06, "loss": 0.9096, "step": 265800 }, { "epoch": 1.6981843272044261, "grad_norm": 0.7311177253723145, "learning_rate": 5.538442865314386e-06, "loss": 1.044, "step": 265810 }, { "epoch": 1.6982482143541648, "grad_norm": 0.8349324464797974, "learning_rate": 5.536147708213862e-06, "loss": 0.8147, "step": 265820 }, { "epoch": 1.6983121015039035, "grad_norm": 1.0947216749191284, "learning_rate": 5.5338529989027885e-06, "loss": 0.7788, "step": 265830 }, { "epoch": 1.6983759886536423, "grad_norm": 1.4398168325424194, "learning_rate": 5.531558737404291e-06, "loss": 0.9125, "step": 265840 }, { "epoch": 1.698439875803381, "grad_norm": 0.9863196611404419, "learning_rate": 5.529264923741462e-06, "loss": 1.1985, "step": 265850 }, { "epoch": 1.6985037629531194, "grad_norm": 1.338010549545288, "learning_rate": 5.527200874363519e-06, "loss": 0.8456, "step": 265860 }, { "epoch": 1.6985676501028584, "grad_norm": 1.2365350723266602, "learning_rate": 5.524907911652111e-06, "loss": 0.7249, "step": 265870 }, { "epoch": 1.6986315372525969, "grad_norm": 0.6513351202011108, "learning_rate": 5.522615396843362e-06, "loss": 0.9132, "step": 265880 }, { "epoch": 1.6986954244023358, "grad_norm": 1.3514388799667358, "learning_rate": 5.520323329960347e-06, "loss": 0.9574, "step": 265890 }, { "epoch": 1.6987593115520743, "grad_norm": 1.5558171272277832, "learning_rate": 5.518031711026161e-06, "loss": 0.7067, "step": 265900 }, { "epoch": 1.6988231987018132, "grad_norm": 1.4183495044708252, "learning_rate": 5.5157405400638736e-06, "loss": 0.834, "step": 265910 }, { "epoch": 1.6988870858515517, "grad_norm": 1.1658267974853516, "learning_rate": 5.513449817096561e-06, "loss": 0.609, "step": 265920 }, { "epoch": 1.6989509730012906, "grad_norm": 0.8308910131454468, "learning_rate": 5.511159542147304e-06, "loss": 0.6832, "step": 265930 }, { "epoch": 1.699014860151029, "grad_norm": 1.268141508102417, "learning_rate": 5.508869715239151e-06, "loss": 0.8501, "step": 265940 }, { "epoch": 1.699078747300768, "grad_norm": 0.9764382243156433, "learning_rate": 5.506580336395179e-06, "loss": 0.9117, "step": 265950 }, { "epoch": 1.6991426344505065, "grad_norm": 0.9711419939994812, "learning_rate": 5.504291405638429e-06, "loss": 0.8632, "step": 265960 }, { "epoch": 1.6992065216002454, "grad_norm": 0.8464389443397522, "learning_rate": 5.5020029229919664e-06, "loss": 0.9455, "step": 265970 }, { "epoch": 1.699270408749984, "grad_norm": 1.472284197807312, "learning_rate": 5.499714888478818e-06, "loss": 0.8437, "step": 265980 }, { "epoch": 1.6993342958997228, "grad_norm": 0.63400799036026, "learning_rate": 5.497427302122054e-06, "loss": 0.6856, "step": 265990 }, { "epoch": 1.6993981830494613, "grad_norm": 0.9483168125152588, "learning_rate": 5.495140163944684e-06, "loss": 0.9194, "step": 266000 }, { "epoch": 1.6994620701992003, "grad_norm": 1.7425835132598877, "learning_rate": 5.492853473969761e-06, "loss": 0.9406, "step": 266010 }, { "epoch": 1.6995259573489387, "grad_norm": 1.738411545753479, "learning_rate": 5.490567232220306e-06, "loss": 0.941, "step": 266020 }, { "epoch": 1.6995898444986777, "grad_norm": 1.1537902355194092, "learning_rate": 5.488281438719351e-06, "loss": 0.7692, "step": 266030 }, { "epoch": 1.6996537316484162, "grad_norm": 0.5871503353118896, "learning_rate": 5.485996093489898e-06, "loss": 0.8314, "step": 266040 }, { "epoch": 1.699717618798155, "grad_norm": 1.1925203800201416, "learning_rate": 5.483711196554986e-06, "loss": 0.8419, "step": 266050 }, { "epoch": 1.6997815059478936, "grad_norm": 1.2385332584381104, "learning_rate": 5.481426747937601e-06, "loss": 1.017, "step": 266060 }, { "epoch": 1.6998453930976325, "grad_norm": 1.0228863954544067, "learning_rate": 5.479142747660781e-06, "loss": 0.963, "step": 266070 }, { "epoch": 1.699909280247371, "grad_norm": 0.7386691570281982, "learning_rate": 5.476859195747492e-06, "loss": 0.8636, "step": 266080 }, { "epoch": 1.69997316739711, "grad_norm": 0.9049159288406372, "learning_rate": 5.474576092220762e-06, "loss": 1.1555, "step": 266090 }, { "epoch": 1.7000370545468484, "grad_norm": 0.757064700126648, "learning_rate": 5.47229343710356e-06, "loss": 0.8304, "step": 266100 }, { "epoch": 1.700100941696587, "grad_norm": 0.7645605206489563, "learning_rate": 5.470011230418887e-06, "loss": 1.001, "step": 266110 }, { "epoch": 1.7001648288463258, "grad_norm": 1.0574803352355957, "learning_rate": 5.467729472189731e-06, "loss": 0.9994, "step": 266120 }, { "epoch": 1.7002287159960645, "grad_norm": 1.198251485824585, "learning_rate": 5.465448162439057e-06, "loss": 0.9769, "step": 266130 }, { "epoch": 1.7002926031458032, "grad_norm": 1.3360443115234375, "learning_rate": 5.4631673011898585e-06, "loss": 1.074, "step": 266140 }, { "epoch": 1.700356490295542, "grad_norm": 0.8937990069389343, "learning_rate": 5.460886888465088e-06, "loss": 1.0213, "step": 266150 }, { "epoch": 1.7004203774452806, "grad_norm": 0.8572983145713806, "learning_rate": 5.458606924287723e-06, "loss": 0.8882, "step": 266160 }, { "epoch": 1.7004842645950193, "grad_norm": 0.6053618788719177, "learning_rate": 5.456327408680711e-06, "loss": 0.6702, "step": 266170 }, { "epoch": 1.700548151744758, "grad_norm": 1.9757890701293945, "learning_rate": 5.454048341667034e-06, "loss": 0.6938, "step": 266180 }, { "epoch": 1.7006120388944967, "grad_norm": 0.7852265238761902, "learning_rate": 5.451769723269612e-06, "loss": 1.011, "step": 266190 }, { "epoch": 1.7006759260442355, "grad_norm": 0.7186400890350342, "learning_rate": 5.449491553511416e-06, "loss": 0.6203, "step": 266200 }, { "epoch": 1.7007398131939742, "grad_norm": 0.8351746201515198, "learning_rate": 5.447213832415377e-06, "loss": 0.8689, "step": 266210 }, { "epoch": 1.7008037003437129, "grad_norm": 0.8336975574493408, "learning_rate": 5.444936560004449e-06, "loss": 0.8293, "step": 266220 }, { "epoch": 1.7008675874934516, "grad_norm": 0.5873013734817505, "learning_rate": 5.442659736301542e-06, "loss": 0.7018, "step": 266230 }, { "epoch": 1.7009314746431903, "grad_norm": 6.3691582679748535, "learning_rate": 5.44038336132961e-06, "loss": 1.0403, "step": 266240 }, { "epoch": 1.700995361792929, "grad_norm": 0.9431154131889343, "learning_rate": 5.4381074351115556e-06, "loss": 0.8367, "step": 266250 }, { "epoch": 1.7010592489426677, "grad_norm": 3.6684696674346924, "learning_rate": 5.4358319576703236e-06, "loss": 0.8574, "step": 266260 }, { "epoch": 1.7011231360924064, "grad_norm": 0.8904080986976624, "learning_rate": 5.433556929028805e-06, "loss": 1.121, "step": 266270 }, { "epoch": 1.701187023242145, "grad_norm": 0.8094809651374817, "learning_rate": 5.431282349209937e-06, "loss": 0.8136, "step": 266280 }, { "epoch": 1.7012509103918838, "grad_norm": 1.1437190771102905, "learning_rate": 5.4290082182365975e-06, "loss": 0.8366, "step": 266290 }, { "epoch": 1.7013147975416225, "grad_norm": 1.4045140743255615, "learning_rate": 5.426734536131722e-06, "loss": 0.695, "step": 266300 }, { "epoch": 1.7013786846913612, "grad_norm": 0.8198695182800293, "learning_rate": 5.424461302918177e-06, "loss": 0.76, "step": 266310 }, { "epoch": 1.7014425718411, "grad_norm": 0.7523419260978699, "learning_rate": 5.422188518618871e-06, "loss": 0.5761, "step": 266320 }, { "epoch": 1.7015064589908386, "grad_norm": 0.9797813296318054, "learning_rate": 5.419916183256707e-06, "loss": 0.995, "step": 266330 }, { "epoch": 1.7015703461405773, "grad_norm": 1.0118427276611328, "learning_rate": 5.41764429685454e-06, "loss": 1.1202, "step": 266340 }, { "epoch": 1.7016342332903158, "grad_norm": 0.7771697640419006, "learning_rate": 5.415372859435275e-06, "loss": 0.8985, "step": 266350 }, { "epoch": 1.7016981204400548, "grad_norm": 1.2752560377120972, "learning_rate": 5.413101871021764e-06, "loss": 1.101, "step": 266360 }, { "epoch": 1.7017620075897932, "grad_norm": 0.8424556255340576, "learning_rate": 5.410831331636895e-06, "loss": 0.8552, "step": 266370 }, { "epoch": 1.7018258947395322, "grad_norm": 0.8436341881752014, "learning_rate": 5.408561241303528e-06, "loss": 0.9248, "step": 266380 }, { "epoch": 1.7018897818892706, "grad_norm": 0.9995906949043274, "learning_rate": 5.406291600044533e-06, "loss": 0.8248, "step": 266390 }, { "epoch": 1.7019536690390096, "grad_norm": 0.8756603598594666, "learning_rate": 5.404022407882753e-06, "loss": 1.2169, "step": 266400 }, { "epoch": 1.702017556188748, "grad_norm": 0.9120706915855408, "learning_rate": 5.401753664841053e-06, "loss": 0.8411, "step": 266410 }, { "epoch": 1.702081443338487, "grad_norm": 0.8383538126945496, "learning_rate": 5.39948537094227e-06, "loss": 0.8805, "step": 266420 }, { "epoch": 1.7021453304882255, "grad_norm": 1.04457688331604, "learning_rate": 5.397217526209253e-06, "loss": 0.9677, "step": 266430 }, { "epoch": 1.7022092176379644, "grad_norm": 1.101974368095398, "learning_rate": 5.394950130664855e-06, "loss": 0.6521, "step": 266440 }, { "epoch": 1.7022731047877029, "grad_norm": 0.7874002456665039, "learning_rate": 5.392683184331887e-06, "loss": 1.0725, "step": 266450 }, { "epoch": 1.7023369919374418, "grad_norm": 0.8857718110084534, "learning_rate": 5.390416687233202e-06, "loss": 0.9898, "step": 266460 }, { "epoch": 1.7024008790871803, "grad_norm": 1.0780762434005737, "learning_rate": 5.388150639391598e-06, "loss": 0.7795, "step": 266470 }, { "epoch": 1.7024647662369192, "grad_norm": 0.5900570154190063, "learning_rate": 5.38588504082993e-06, "loss": 0.8662, "step": 266480 }, { "epoch": 1.7025286533866577, "grad_norm": 0.7792630195617676, "learning_rate": 5.383619891570979e-06, "loss": 1.0849, "step": 266490 }, { "epoch": 1.7025925405363966, "grad_norm": 0.9480690360069275, "learning_rate": 5.381355191637588e-06, "loss": 0.8941, "step": 266500 }, { "epoch": 1.7026564276861351, "grad_norm": 1.1644970178604126, "learning_rate": 5.379090941052539e-06, "loss": 0.9368, "step": 266510 }, { "epoch": 1.702720314835874, "grad_norm": 1.4844380617141724, "learning_rate": 5.3768271398386585e-06, "loss": 0.8222, "step": 266520 }, { "epoch": 1.7027842019856125, "grad_norm": 1.1840111017227173, "learning_rate": 5.374563788018722e-06, "loss": 0.7043, "step": 266530 }, { "epoch": 1.7028480891353515, "grad_norm": 0.8633972406387329, "learning_rate": 5.372300885615545e-06, "loss": 0.8175, "step": 266540 }, { "epoch": 1.70291197628509, "grad_norm": 1.0917376279830933, "learning_rate": 5.370038432651897e-06, "loss": 1.0007, "step": 266550 }, { "epoch": 1.7029758634348289, "grad_norm": 0.8144697546958923, "learning_rate": 5.367776429150584e-06, "loss": 0.8722, "step": 266560 }, { "epoch": 1.7030397505845674, "grad_norm": 0.6899762153625488, "learning_rate": 5.3655148751343585e-06, "loss": 0.7992, "step": 266570 }, { "epoch": 1.7031036377343063, "grad_norm": 1.0007309913635254, "learning_rate": 5.363253770626026e-06, "loss": 0.8943, "step": 266580 }, { "epoch": 1.7031675248840448, "grad_norm": 0.7084468007087708, "learning_rate": 5.360993115648338e-06, "loss": 1.023, "step": 266590 }, { "epoch": 1.7032314120337835, "grad_norm": 1.3472901582717896, "learning_rate": 5.3587329102240735e-06, "loss": 1.3554, "step": 266600 }, { "epoch": 1.7032952991835222, "grad_norm": 1.2952237129211426, "learning_rate": 5.356473154375979e-06, "loss": 1.0382, "step": 266610 }, { "epoch": 1.7033591863332609, "grad_norm": 0.8648388385772705, "learning_rate": 5.354213848126832e-06, "loss": 0.7916, "step": 266620 }, { "epoch": 1.7034230734829996, "grad_norm": 0.8645228147506714, "learning_rate": 5.35195499149937e-06, "loss": 0.8126, "step": 266630 }, { "epoch": 1.7034869606327383, "grad_norm": 0.6788387894630432, "learning_rate": 5.349696584516345e-06, "loss": 0.8482, "step": 266640 }, { "epoch": 1.703550847782477, "grad_norm": 0.8210328817367554, "learning_rate": 5.3474386272005125e-06, "loss": 0.8949, "step": 266650 }, { "epoch": 1.7036147349322157, "grad_norm": 0.9088141918182373, "learning_rate": 5.345181119574588e-06, "loss": 0.8878, "step": 266660 }, { "epoch": 1.7036786220819544, "grad_norm": 1.4037779569625854, "learning_rate": 5.342924061661336e-06, "loss": 0.9856, "step": 266670 }, { "epoch": 1.7037425092316931, "grad_norm": 1.1558412313461304, "learning_rate": 5.340667453483467e-06, "loss": 0.7853, "step": 266680 }, { "epoch": 1.7038063963814318, "grad_norm": 0.6464347243309021, "learning_rate": 5.338411295063717e-06, "loss": 0.7154, "step": 266690 }, { "epoch": 1.7038702835311705, "grad_norm": 0.820220947265625, "learning_rate": 5.336155586424796e-06, "loss": 1.1589, "step": 266700 }, { "epoch": 1.7039341706809092, "grad_norm": 0.7956279516220093, "learning_rate": 5.333900327589436e-06, "loss": 1.0678, "step": 266710 }, { "epoch": 1.703998057830648, "grad_norm": 1.414975881576538, "learning_rate": 5.33164551858033e-06, "loss": 0.8709, "step": 266720 }, { "epoch": 1.7040619449803867, "grad_norm": 1.05629301071167, "learning_rate": 5.3293911594202105e-06, "loss": 0.6103, "step": 266730 }, { "epoch": 1.7041258321301254, "grad_norm": 1.6264910697937012, "learning_rate": 5.327137250131753e-06, "loss": 0.8797, "step": 266740 }, { "epoch": 1.704189719279864, "grad_norm": 1.1576440334320068, "learning_rate": 5.324883790737684e-06, "loss": 0.9157, "step": 266750 }, { "epoch": 1.7042536064296028, "grad_norm": 0.9160033464431763, "learning_rate": 5.322630781260679e-06, "loss": 0.7215, "step": 266760 }, { "epoch": 1.7043174935793415, "grad_norm": 1.0382755994796753, "learning_rate": 5.320378221723438e-06, "loss": 1.0271, "step": 266770 }, { "epoch": 1.7043813807290802, "grad_norm": 0.8948391675949097, "learning_rate": 5.31812611214863e-06, "loss": 0.7832, "step": 266780 }, { "epoch": 1.704445267878819, "grad_norm": 0.992671549320221, "learning_rate": 5.315874452558961e-06, "loss": 0.7864, "step": 266790 }, { "epoch": 1.7045091550285576, "grad_norm": 0.9801939725875854, "learning_rate": 5.3136232429770835e-06, "loss": 1.0619, "step": 266800 }, { "epoch": 1.7045730421782963, "grad_norm": 0.9736009240150452, "learning_rate": 5.3113724834256916e-06, "loss": 0.747, "step": 266810 }, { "epoch": 1.704636929328035, "grad_norm": 1.2591882944107056, "learning_rate": 5.309122173927433e-06, "loss": 0.9616, "step": 266820 }, { "epoch": 1.7047008164777737, "grad_norm": 0.9540112614631653, "learning_rate": 5.306872314504974e-06, "loss": 0.8396, "step": 266830 }, { "epoch": 1.7047647036275122, "grad_norm": 1.0584015846252441, "learning_rate": 5.304622905180983e-06, "loss": 1.0238, "step": 266840 }, { "epoch": 1.7048285907772511, "grad_norm": 1.0345137119293213, "learning_rate": 5.302373945978095e-06, "loss": 0.884, "step": 266850 }, { "epoch": 1.7048924779269896, "grad_norm": 0.7487316727638245, "learning_rate": 5.300125436918979e-06, "loss": 1.0031, "step": 266860 }, { "epoch": 1.7049563650767285, "grad_norm": 1.27576744556427, "learning_rate": 5.297877378026267e-06, "loss": 0.6429, "step": 266870 }, { "epoch": 1.705020252226467, "grad_norm": 1.08945631980896, "learning_rate": 5.295629769322607e-06, "loss": 0.8816, "step": 266880 }, { "epoch": 1.705084139376206, "grad_norm": 1.1191450357437134, "learning_rate": 5.293382610830622e-06, "loss": 0.8933, "step": 266890 }, { "epoch": 1.7051480265259444, "grad_norm": 0.895653247833252, "learning_rate": 5.291135902572964e-06, "loss": 0.8744, "step": 266900 }, { "epoch": 1.7052119136756834, "grad_norm": 0.926418662071228, "learning_rate": 5.288889644572231e-06, "loss": 0.8063, "step": 266910 }, { "epoch": 1.7052758008254219, "grad_norm": 0.8096438050270081, "learning_rate": 5.286643836851069e-06, "loss": 0.7903, "step": 266920 }, { "epoch": 1.7053396879751608, "grad_norm": 1.0355230569839478, "learning_rate": 5.284398479432079e-06, "loss": 0.9991, "step": 266930 }, { "epoch": 1.7054035751248993, "grad_norm": 0.9099029302597046, "learning_rate": 5.282153572337895e-06, "loss": 0.9656, "step": 266940 }, { "epoch": 1.7054674622746382, "grad_norm": 0.8696787357330322, "learning_rate": 5.279909115591092e-06, "loss": 0.9073, "step": 266950 }, { "epoch": 1.7055313494243767, "grad_norm": 0.9176293611526489, "learning_rate": 5.277665109214297e-06, "loss": 0.8014, "step": 266960 }, { "epoch": 1.7055952365741156, "grad_norm": 1.0077580213546753, "learning_rate": 5.27542155323012e-06, "loss": 1.0027, "step": 266970 }, { "epoch": 1.705659123723854, "grad_norm": 2.419217824935913, "learning_rate": 5.273178447661125e-06, "loss": 0.8749, "step": 266980 }, { "epoch": 1.705723010873593, "grad_norm": 1.0237098932266235, "learning_rate": 5.270935792529924e-06, "loss": 0.8011, "step": 266990 }, { "epoch": 1.7057868980233315, "grad_norm": 0.925464391708374, "learning_rate": 5.268693587859092e-06, "loss": 0.8459, "step": 267000 }, { "epoch": 1.7058507851730704, "grad_norm": 0.8839012980461121, "learning_rate": 5.266451833671221e-06, "loss": 0.8634, "step": 267010 }, { "epoch": 1.705914672322809, "grad_norm": 0.9144042134284973, "learning_rate": 5.264210529988867e-06, "loss": 1.0476, "step": 267020 }, { "epoch": 1.7059785594725478, "grad_norm": 0.9054514765739441, "learning_rate": 5.261969676834627e-06, "loss": 0.79, "step": 267030 }, { "epoch": 1.7060424466222863, "grad_norm": 1.2751846313476562, "learning_rate": 5.259729274231051e-06, "loss": 0.943, "step": 267040 }, { "epoch": 1.7061063337720253, "grad_norm": 1.316709041595459, "learning_rate": 5.25748932220071e-06, "loss": 0.8292, "step": 267050 }, { "epoch": 1.7061702209217637, "grad_norm": 1.0444445610046387, "learning_rate": 5.255249820766156e-06, "loss": 0.9404, "step": 267060 }, { "epoch": 1.7062341080715027, "grad_norm": 0.821792483329773, "learning_rate": 5.253010769949951e-06, "loss": 0.8681, "step": 267070 }, { "epoch": 1.7062979952212411, "grad_norm": 1.1909120082855225, "learning_rate": 5.250772169774632e-06, "loss": 1.0925, "step": 267080 }, { "epoch": 1.7063618823709799, "grad_norm": 1.0737441778182983, "learning_rate": 5.248534020262757e-06, "loss": 0.868, "step": 267090 }, { "epoch": 1.7064257695207186, "grad_norm": 0.8983291387557983, "learning_rate": 5.246296321436855e-06, "loss": 0.8025, "step": 267100 }, { "epoch": 1.7064896566704573, "grad_norm": 2.5274810791015625, "learning_rate": 5.244059073319474e-06, "loss": 1.0225, "step": 267110 }, { "epoch": 1.706553543820196, "grad_norm": 0.5547528266906738, "learning_rate": 5.241822275933123e-06, "loss": 1.078, "step": 267120 }, { "epoch": 1.7066174309699347, "grad_norm": 1.2405059337615967, "learning_rate": 5.239585929300361e-06, "loss": 0.9678, "step": 267130 }, { "epoch": 1.7066813181196734, "grad_norm": 0.9723836183547974, "learning_rate": 5.237350033443678e-06, "loss": 0.8917, "step": 267140 }, { "epoch": 1.706745205269412, "grad_norm": 0.9931864738464355, "learning_rate": 5.235114588385614e-06, "loss": 0.4923, "step": 267150 }, { "epoch": 1.7068090924191508, "grad_norm": 1.0213977098464966, "learning_rate": 5.232879594148665e-06, "loss": 0.9429, "step": 267160 }, { "epoch": 1.7068729795688895, "grad_norm": 1.1262112855911255, "learning_rate": 5.23064505075535e-06, "loss": 0.9091, "step": 267170 }, { "epoch": 1.7069368667186282, "grad_norm": 1.1662230491638184, "learning_rate": 5.2284109582281745e-06, "loss": 0.9259, "step": 267180 }, { "epoch": 1.707000753868367, "grad_norm": 1.5848547220230103, "learning_rate": 5.226177316589631e-06, "loss": 0.7578, "step": 267190 }, { "epoch": 1.7070646410181056, "grad_norm": 1.2645552158355713, "learning_rate": 5.223944125862224e-06, "loss": 0.927, "step": 267200 }, { "epoch": 1.7071285281678443, "grad_norm": 0.8651253581047058, "learning_rate": 5.221711386068423e-06, "loss": 1.0176, "step": 267210 }, { "epoch": 1.707192415317583, "grad_norm": 1.1839468479156494, "learning_rate": 5.21947909723074e-06, "loss": 0.6918, "step": 267220 }, { "epoch": 1.7072563024673217, "grad_norm": 1.1756478548049927, "learning_rate": 5.2172472593716325e-06, "loss": 0.7856, "step": 267230 }, { "epoch": 1.7073201896170604, "grad_norm": 1.2259622812271118, "learning_rate": 5.215015872513596e-06, "loss": 1.0075, "step": 267240 }, { "epoch": 1.7073840767667992, "grad_norm": 0.8537092208862305, "learning_rate": 5.212784936679088e-06, "loss": 0.8574, "step": 267250 }, { "epoch": 1.7074479639165379, "grad_norm": 0.8326482772827148, "learning_rate": 5.210554451890587e-06, "loss": 0.9249, "step": 267260 }, { "epoch": 1.7075118510662766, "grad_norm": 1.3121106624603271, "learning_rate": 5.208324418170546e-06, "loss": 0.7788, "step": 267270 }, { "epoch": 1.7075757382160153, "grad_norm": 0.8690970540046692, "learning_rate": 5.206094835541436e-06, "loss": 0.9285, "step": 267280 }, { "epoch": 1.707639625365754, "grad_norm": 1.135063886642456, "learning_rate": 5.203865704025695e-06, "loss": 0.9247, "step": 267290 }, { "epoch": 1.7077035125154927, "grad_norm": 0.7573182582855225, "learning_rate": 5.201637023645789e-06, "loss": 0.7598, "step": 267300 }, { "epoch": 1.7077673996652314, "grad_norm": 1.081274390220642, "learning_rate": 5.199408794424154e-06, "loss": 0.8923, "step": 267310 }, { "epoch": 1.70783128681497, "grad_norm": 0.9698079228401184, "learning_rate": 5.197181016383224e-06, "loss": 0.898, "step": 267320 }, { "epoch": 1.7078951739647086, "grad_norm": 1.0417249202728271, "learning_rate": 5.1949536895454454e-06, "loss": 0.731, "step": 267330 }, { "epoch": 1.7079590611144475, "grad_norm": 0.9164946675300598, "learning_rate": 5.1927268139332355e-06, "loss": 1.1054, "step": 267340 }, { "epoch": 1.708022948264186, "grad_norm": 0.632905125617981, "learning_rate": 5.190500389569047e-06, "loss": 0.7001, "step": 267350 }, { "epoch": 1.708086835413925, "grad_norm": 1.301917314529419, "learning_rate": 5.18827441647527e-06, "loss": 0.8591, "step": 267360 }, { "epoch": 1.7081507225636634, "grad_norm": 0.6161066293716431, "learning_rate": 5.186048894674345e-06, "loss": 0.9312, "step": 267370 }, { "epoch": 1.7082146097134023, "grad_norm": 1.6107707023620605, "learning_rate": 5.183823824188672e-06, "loss": 0.7691, "step": 267380 }, { "epoch": 1.7082784968631408, "grad_norm": 0.7146519422531128, "learning_rate": 5.181599205040671e-06, "loss": 0.6657, "step": 267390 }, { "epoch": 1.7083423840128797, "grad_norm": 0.8204352855682373, "learning_rate": 5.1793750372527376e-06, "loss": 1.0253, "step": 267400 }, { "epoch": 1.7084062711626182, "grad_norm": 0.8192850351333618, "learning_rate": 5.177151320847273e-06, "loss": 0.7452, "step": 267410 }, { "epoch": 1.7084701583123572, "grad_norm": 1.2230883836746216, "learning_rate": 5.174928055846667e-06, "loss": 0.8296, "step": 267420 }, { "epoch": 1.7085340454620956, "grad_norm": 0.8661963939666748, "learning_rate": 5.172705242273324e-06, "loss": 0.8515, "step": 267430 }, { "epoch": 1.7085979326118346, "grad_norm": 0.7626175880432129, "learning_rate": 5.170482880149608e-06, "loss": 0.7143, "step": 267440 }, { "epoch": 1.708661819761573, "grad_norm": 0.7892603874206543, "learning_rate": 5.1682609694979236e-06, "loss": 0.8103, "step": 267450 }, { "epoch": 1.708725706911312, "grad_norm": 0.9617498517036438, "learning_rate": 5.1660395103406255e-06, "loss": 0.8888, "step": 267460 }, { "epoch": 1.7087895940610505, "grad_norm": 1.0265048742294312, "learning_rate": 5.1638185027001125e-06, "loss": 0.9709, "step": 267470 }, { "epoch": 1.7088534812107894, "grad_norm": 1.4228737354278564, "learning_rate": 5.161597946598717e-06, "loss": 0.8084, "step": 267480 }, { "epoch": 1.7089173683605279, "grad_norm": 1.0275636911392212, "learning_rate": 5.159377842058826e-06, "loss": 0.7827, "step": 267490 }, { "epoch": 1.7089812555102668, "grad_norm": 0.8710563778877258, "learning_rate": 5.157158189102801e-06, "loss": 0.9638, "step": 267500 }, { "epoch": 1.7090451426600053, "grad_norm": 1.2731144428253174, "learning_rate": 5.154938987752983e-06, "loss": 0.8887, "step": 267510 }, { "epoch": 1.7091090298097442, "grad_norm": 0.8427534103393555, "learning_rate": 5.152720238031727e-06, "loss": 0.8941, "step": 267520 }, { "epoch": 1.7091729169594827, "grad_norm": 1.2673020362854004, "learning_rate": 5.150501939961372e-06, "loss": 0.7282, "step": 267530 }, { "epoch": 1.7092368041092216, "grad_norm": 0.9547025561332703, "learning_rate": 5.1482840935642765e-06, "loss": 0.813, "step": 267540 }, { "epoch": 1.7093006912589601, "grad_norm": 0.8429440855979919, "learning_rate": 5.146066698862745e-06, "loss": 0.7106, "step": 267550 }, { "epoch": 1.7093645784086988, "grad_norm": 0.9609322547912598, "learning_rate": 5.143849755879138e-06, "loss": 1.0052, "step": 267560 }, { "epoch": 1.7094284655584375, "grad_norm": 0.7594335079193115, "learning_rate": 5.141633264635765e-06, "loss": 1.0813, "step": 267570 }, { "epoch": 1.7094923527081762, "grad_norm": 0.7355329990386963, "learning_rate": 5.1394172251549575e-06, "loss": 0.7209, "step": 267580 }, { "epoch": 1.709556239857915, "grad_norm": 1.0001919269561768, "learning_rate": 5.137201637459016e-06, "loss": 0.7211, "step": 267590 }, { "epoch": 1.7096201270076536, "grad_norm": 1.5241398811340332, "learning_rate": 5.134986501570283e-06, "loss": 0.8565, "step": 267600 }, { "epoch": 1.7096840141573924, "grad_norm": 1.0557806491851807, "learning_rate": 5.1327718175110336e-06, "loss": 0.8543, "step": 267610 }, { "epoch": 1.709747901307131, "grad_norm": 0.9856024980545044, "learning_rate": 5.130557585303602e-06, "loss": 0.8925, "step": 267620 }, { "epoch": 1.7098117884568698, "grad_norm": 1.1883968114852905, "learning_rate": 5.128343804970265e-06, "loss": 0.8502, "step": 267630 }, { "epoch": 1.7098756756066085, "grad_norm": 0.9160344004631042, "learning_rate": 5.126130476533331e-06, "loss": 0.761, "step": 267640 }, { "epoch": 1.7099395627563472, "grad_norm": 0.901768147945404, "learning_rate": 5.123917600015071e-06, "loss": 1.0517, "step": 267650 }, { "epoch": 1.7100034499060859, "grad_norm": 0.9727572202682495, "learning_rate": 5.121705175437802e-06, "loss": 0.8152, "step": 267660 }, { "epoch": 1.7100673370558246, "grad_norm": 0.9629524350166321, "learning_rate": 5.119493202823772e-06, "loss": 0.7479, "step": 267670 }, { "epoch": 1.7101312242055633, "grad_norm": 0.6062182188034058, "learning_rate": 5.1172816821952855e-06, "loss": 0.7897, "step": 267680 }, { "epoch": 1.710195111355302, "grad_norm": 1.1127554178237915, "learning_rate": 5.115070613574591e-06, "loss": 0.9108, "step": 267690 }, { "epoch": 1.7102589985050407, "grad_norm": 1.275471806526184, "learning_rate": 5.112859996983965e-06, "loss": 0.7668, "step": 267700 }, { "epoch": 1.7103228856547794, "grad_norm": 1.0158495903015137, "learning_rate": 5.110649832445685e-06, "loss": 1.028, "step": 267710 }, { "epoch": 1.7103867728045181, "grad_norm": 0.7695732116699219, "learning_rate": 5.1084401199819835e-06, "loss": 0.8964, "step": 267720 }, { "epoch": 1.7104506599542568, "grad_norm": 1.0707507133483887, "learning_rate": 5.106230859615135e-06, "loss": 0.7492, "step": 267730 }, { "epoch": 1.7105145471039955, "grad_norm": 0.9138712286949158, "learning_rate": 5.1040220513673745e-06, "loss": 0.8334, "step": 267740 }, { "epoch": 1.7105784342537342, "grad_norm": 0.6058676242828369, "learning_rate": 5.101813695260959e-06, "loss": 0.6874, "step": 267750 }, { "epoch": 1.710642321403473, "grad_norm": 0.9319490790367126, "learning_rate": 5.099605791318113e-06, "loss": 0.6997, "step": 267760 }, { "epoch": 1.7107062085532116, "grad_norm": 1.1302191019058228, "learning_rate": 5.097398339561088e-06, "loss": 1.0276, "step": 267770 }, { "epoch": 1.7107700957029504, "grad_norm": 0.9411876201629639, "learning_rate": 5.095191340012101e-06, "loss": 0.9568, "step": 267780 }, { "epoch": 1.710833982852689, "grad_norm": 1.1420869827270508, "learning_rate": 5.092984792693406e-06, "loss": 1.1395, "step": 267790 }, { "epoch": 1.7108978700024275, "grad_norm": 0.8047879338264465, "learning_rate": 5.090778697627185e-06, "loss": 0.912, "step": 267800 }, { "epoch": 1.7109617571521665, "grad_norm": 4.064144611358643, "learning_rate": 5.0885730548356765e-06, "loss": 0.7981, "step": 267810 }, { "epoch": 1.711025644301905, "grad_norm": 0.9391950964927673, "learning_rate": 5.086367864341096e-06, "loss": 0.9571, "step": 267820 }, { "epoch": 1.7110895314516439, "grad_norm": 0.7875942587852478, "learning_rate": 5.084163126165642e-06, "loss": 0.96, "step": 267830 }, { "epoch": 1.7111534186013824, "grad_norm": 0.9419967532157898, "learning_rate": 5.08195884033153e-06, "loss": 0.6922, "step": 267840 }, { "epoch": 1.7112173057511213, "grad_norm": 0.8862299919128418, "learning_rate": 5.079755006860943e-06, "loss": 0.6511, "step": 267850 }, { "epoch": 1.7112811929008598, "grad_norm": 0.6669632792472839, "learning_rate": 5.077551625776089e-06, "loss": 1.0208, "step": 267860 }, { "epoch": 1.7113450800505987, "grad_norm": 0.6567518711090088, "learning_rate": 5.075348697099152e-06, "loss": 0.8282, "step": 267870 }, { "epoch": 1.7114089672003372, "grad_norm": 0.8719609379768372, "learning_rate": 5.073146220852326e-06, "loss": 0.7571, "step": 267880 }, { "epoch": 1.7114728543500761, "grad_norm": 1.4976168870925903, "learning_rate": 5.0709441970577695e-06, "loss": 0.897, "step": 267890 }, { "epoch": 1.7115367414998146, "grad_norm": 4.700047016143799, "learning_rate": 5.068742625737694e-06, "loss": 1.1389, "step": 267900 }, { "epoch": 1.7116006286495535, "grad_norm": 0.707741916179657, "learning_rate": 5.066541506914235e-06, "loss": 0.7893, "step": 267910 }, { "epoch": 1.711664515799292, "grad_norm": 0.6900407075881958, "learning_rate": 5.064340840609588e-06, "loss": 0.7887, "step": 267920 }, { "epoch": 1.711728402949031, "grad_norm": 1.4416577816009521, "learning_rate": 5.06214062684589e-06, "loss": 0.7995, "step": 267930 }, { "epoch": 1.7117922900987694, "grad_norm": 0.8847475647926331, "learning_rate": 5.059940865645324e-06, "loss": 0.6996, "step": 267940 }, { "epoch": 1.7118561772485084, "grad_norm": 1.253787636756897, "learning_rate": 5.057741557030027e-06, "loss": 0.9299, "step": 267950 }, { "epoch": 1.7119200643982468, "grad_norm": 0.7740673422813416, "learning_rate": 5.055542701022159e-06, "loss": 0.7667, "step": 267960 }, { "epoch": 1.7119839515479858, "grad_norm": 1.0420721769332886, "learning_rate": 5.053344297643847e-06, "loss": 0.9228, "step": 267970 }, { "epoch": 1.7120478386977243, "grad_norm": 0.8889881372451782, "learning_rate": 5.051146346917257e-06, "loss": 1.0123, "step": 267980 }, { "epoch": 1.7121117258474632, "grad_norm": 1.4052612781524658, "learning_rate": 5.048948848864499e-06, "loss": 0.9129, "step": 267990 }, { "epoch": 1.7121756129972017, "grad_norm": 1.0282213687896729, "learning_rate": 5.046751803507721e-06, "loss": 0.8117, "step": 268000 }, { "epoch": 1.7122395001469406, "grad_norm": 1.0799697637557983, "learning_rate": 5.0445552108690345e-06, "loss": 0.8317, "step": 268010 }, { "epoch": 1.712303387296679, "grad_norm": 0.5775130987167358, "learning_rate": 5.042359070970564e-06, "loss": 0.7675, "step": 268020 }, { "epoch": 1.712367274446418, "grad_norm": 0.9590504765510559, "learning_rate": 5.0401633838344506e-06, "loss": 0.9477, "step": 268030 }, { "epoch": 1.7124311615961565, "grad_norm": 0.8965784907341003, "learning_rate": 5.037968149482769e-06, "loss": 0.8345, "step": 268040 }, { "epoch": 1.7124950487458952, "grad_norm": 0.86485755443573, "learning_rate": 5.035773367937663e-06, "loss": 0.9179, "step": 268050 }, { "epoch": 1.712558935895634, "grad_norm": 1.9492807388305664, "learning_rate": 5.033579039221204e-06, "loss": 0.7574, "step": 268060 }, { "epoch": 1.7126228230453726, "grad_norm": 0.7462204694747925, "learning_rate": 5.031385163355518e-06, "loss": 0.9084, "step": 268070 }, { "epoch": 1.7126867101951113, "grad_norm": 0.8131275177001953, "learning_rate": 5.029191740362677e-06, "loss": 0.8372, "step": 268080 }, { "epoch": 1.71275059734485, "grad_norm": 1.006062388420105, "learning_rate": 5.02699877026479e-06, "loss": 0.9251, "step": 268090 }, { "epoch": 1.7128144844945887, "grad_norm": 0.8451302647590637, "learning_rate": 5.024806253083919e-06, "loss": 0.9558, "step": 268100 }, { "epoch": 1.7128783716443274, "grad_norm": 1.225652813911438, "learning_rate": 5.022614188842173e-06, "loss": 0.7124, "step": 268110 }, { "epoch": 1.7129422587940661, "grad_norm": 0.7671294212341309, "learning_rate": 5.020422577561601e-06, "loss": 0.8769, "step": 268120 }, { "epoch": 1.7130061459438048, "grad_norm": 2.2277419567108154, "learning_rate": 5.0182314192642965e-06, "loss": 0.8576, "step": 268130 }, { "epoch": 1.7130700330935436, "grad_norm": 1.1126054525375366, "learning_rate": 5.016040713972309e-06, "loss": 0.9303, "step": 268140 }, { "epoch": 1.7131339202432823, "grad_norm": 1.0343739986419678, "learning_rate": 5.013850461707714e-06, "loss": 0.9315, "step": 268150 }, { "epoch": 1.713197807393021, "grad_norm": 1.4181034564971924, "learning_rate": 5.011660662492557e-06, "loss": 0.8053, "step": 268160 }, { "epoch": 1.7132616945427597, "grad_norm": 0.5171089768409729, "learning_rate": 5.009471316348902e-06, "loss": 0.9135, "step": 268170 }, { "epoch": 1.7133255816924984, "grad_norm": 0.8013474941253662, "learning_rate": 5.007282423298787e-06, "loss": 0.8237, "step": 268180 }, { "epoch": 1.713389468842237, "grad_norm": 2.410977602005005, "learning_rate": 5.005093983364273e-06, "loss": 0.8935, "step": 268190 }, { "epoch": 1.7134533559919758, "grad_norm": 1.1010009050369263, "learning_rate": 5.002905996567381e-06, "loss": 0.7858, "step": 268200 }, { "epoch": 1.7135172431417145, "grad_norm": 1.1124086380004883, "learning_rate": 5.00071846293016e-06, "loss": 0.685, "step": 268210 }, { "epoch": 1.7135811302914532, "grad_norm": 0.676537036895752, "learning_rate": 4.998531382474625e-06, "loss": 0.789, "step": 268220 }, { "epoch": 1.713645017441192, "grad_norm": 0.8748432397842407, "learning_rate": 4.996344755222809e-06, "loss": 0.715, "step": 268230 }, { "epoch": 1.7137089045909306, "grad_norm": 1.8939337730407715, "learning_rate": 4.994158581196745e-06, "loss": 0.7261, "step": 268240 }, { "epoch": 1.7137727917406693, "grad_norm": 0.611475944519043, "learning_rate": 4.991972860418431e-06, "loss": 1.2012, "step": 268250 }, { "epoch": 1.713836678890408, "grad_norm": 0.9227023720741272, "learning_rate": 4.9897875929099005e-06, "loss": 1.025, "step": 268260 }, { "epoch": 1.7139005660401467, "grad_norm": 1.0684404373168945, "learning_rate": 4.987602778693146e-06, "loss": 0.8479, "step": 268270 }, { "epoch": 1.7139644531898854, "grad_norm": 0.8674401044845581, "learning_rate": 4.9854184177901716e-06, "loss": 0.8763, "step": 268280 }, { "epoch": 1.714028340339624, "grad_norm": 0.8571915626525879, "learning_rate": 4.983234510222967e-06, "loss": 0.7797, "step": 268290 }, { "epoch": 1.7140922274893629, "grad_norm": 0.41056373715400696, "learning_rate": 4.981051056013547e-06, "loss": 0.7954, "step": 268300 }, { "epoch": 1.7141561146391013, "grad_norm": 1.1498006582260132, "learning_rate": 4.978868055183877e-06, "loss": 0.9702, "step": 268310 }, { "epoch": 1.7142200017888403, "grad_norm": 2.003131628036499, "learning_rate": 4.976685507755969e-06, "loss": 0.7933, "step": 268320 }, { "epoch": 1.7142838889385787, "grad_norm": 0.7004699110984802, "learning_rate": 4.974503413751774e-06, "loss": 0.7621, "step": 268330 }, { "epoch": 1.7143477760883177, "grad_norm": 2.447214365005493, "learning_rate": 4.9723217731932894e-06, "loss": 1.0202, "step": 268340 }, { "epoch": 1.7144116632380562, "grad_norm": 1.0208044052124023, "learning_rate": 4.970140586102484e-06, "loss": 0.9783, "step": 268350 }, { "epoch": 1.714475550387795, "grad_norm": 0.8741488456726074, "learning_rate": 4.967959852501308e-06, "loss": 0.9445, "step": 268360 }, { "epoch": 1.7145394375375336, "grad_norm": 1.2652167081832886, "learning_rate": 4.965779572411744e-06, "loss": 0.8427, "step": 268370 }, { "epoch": 1.7146033246872725, "grad_norm": 0.9176733493804932, "learning_rate": 4.963817708101692e-06, "loss": 1.1657, "step": 268380 }, { "epoch": 1.714667211837011, "grad_norm": 1.2837026119232178, "learning_rate": 4.9616382897446634e-06, "loss": 0.8361, "step": 268390 }, { "epoch": 1.71473109898675, "grad_norm": 1.0069950819015503, "learning_rate": 4.959459324962895e-06, "loss": 0.8575, "step": 268400 }, { "epoch": 1.7147949861364884, "grad_norm": 1.258731722831726, "learning_rate": 4.9572808137783425e-06, "loss": 0.8353, "step": 268410 }, { "epoch": 1.7148588732862273, "grad_norm": 0.8530985116958618, "learning_rate": 4.955102756212937e-06, "loss": 0.9056, "step": 268420 }, { "epoch": 1.7149227604359658, "grad_norm": 1.0610921382904053, "learning_rate": 4.952925152288623e-06, "loss": 0.9901, "step": 268430 }, { "epoch": 1.7149866475857047, "grad_norm": 1.0828012228012085, "learning_rate": 4.950748002027311e-06, "loss": 0.7618, "step": 268440 }, { "epoch": 1.7150505347354432, "grad_norm": 0.7953183650970459, "learning_rate": 4.948571305450938e-06, "loss": 0.9028, "step": 268450 }, { "epoch": 1.7151144218851821, "grad_norm": 1.4064747095108032, "learning_rate": 4.946395062581438e-06, "loss": 0.9099, "step": 268460 }, { "epoch": 1.7151783090349206, "grad_norm": 1.049475073814392, "learning_rate": 4.944219273440709e-06, "loss": 0.7286, "step": 268470 }, { "epoch": 1.7152421961846596, "grad_norm": 0.9824262261390686, "learning_rate": 4.942043938050678e-06, "loss": 1.0499, "step": 268480 }, { "epoch": 1.715306083334398, "grad_norm": 0.9291768074035645, "learning_rate": 4.939869056433233e-06, "loss": 0.8185, "step": 268490 }, { "epoch": 1.715369970484137, "grad_norm": 2.494187831878662, "learning_rate": 4.937694628610301e-06, "loss": 0.9579, "step": 268500 }, { "epoch": 1.7154338576338755, "grad_norm": 0.866105318069458, "learning_rate": 4.935520654603759e-06, "loss": 0.7383, "step": 268510 }, { "epoch": 1.7154977447836144, "grad_norm": 0.681208610534668, "learning_rate": 4.933347134435523e-06, "loss": 0.7087, "step": 268520 }, { "epoch": 1.7155616319333529, "grad_norm": 0.7366973757743835, "learning_rate": 4.931174068127459e-06, "loss": 0.7085, "step": 268530 }, { "epoch": 1.7156255190830916, "grad_norm": 0.9923181533813477, "learning_rate": 4.929001455701471e-06, "loss": 0.6017, "step": 268540 }, { "epoch": 1.7156894062328303, "grad_norm": 0.7506087422370911, "learning_rate": 4.926829297179419e-06, "loss": 0.7351, "step": 268550 }, { "epoch": 1.715753293382569, "grad_norm": 0.7987860441207886, "learning_rate": 4.924657592583198e-06, "loss": 0.9795, "step": 268560 }, { "epoch": 1.7158171805323077, "grad_norm": 0.9374541640281677, "learning_rate": 4.922486341934662e-06, "loss": 0.9364, "step": 268570 }, { "epoch": 1.7158810676820464, "grad_norm": 1.0733833312988281, "learning_rate": 4.920315545255699e-06, "loss": 0.768, "step": 268580 }, { "epoch": 1.715944954831785, "grad_norm": 0.8189911842346191, "learning_rate": 4.918145202568147e-06, "loss": 0.9444, "step": 268590 }, { "epoch": 1.7160088419815238, "grad_norm": 1.0195735692977905, "learning_rate": 4.915975313893884e-06, "loss": 0.8306, "step": 268600 }, { "epoch": 1.7160727291312625, "grad_norm": 0.9979621171951294, "learning_rate": 4.913805879254746e-06, "loss": 1.1678, "step": 268610 }, { "epoch": 1.7161366162810012, "grad_norm": 0.8876195549964905, "learning_rate": 4.911636898672589e-06, "loss": 0.8957, "step": 268620 }, { "epoch": 1.71620050343074, "grad_norm": 5.677492618560791, "learning_rate": 4.909468372169251e-06, "loss": 0.8721, "step": 268630 }, { "epoch": 1.7162643905804786, "grad_norm": 0.6202548742294312, "learning_rate": 4.907300299766588e-06, "loss": 0.7232, "step": 268640 }, { "epoch": 1.7163282777302173, "grad_norm": 0.7246150374412537, "learning_rate": 4.905132681486407e-06, "loss": 0.7844, "step": 268650 }, { "epoch": 1.716392164879956, "grad_norm": 1.0750226974487305, "learning_rate": 4.902965517350555e-06, "loss": 1.0428, "step": 268660 }, { "epoch": 1.7164560520296948, "grad_norm": 0.8959199786186218, "learning_rate": 4.9007988073808635e-06, "loss": 0.9407, "step": 268670 }, { "epoch": 1.7165199391794335, "grad_norm": 0.5611230134963989, "learning_rate": 4.898632551599136e-06, "loss": 0.8953, "step": 268680 }, { "epoch": 1.7165838263291722, "grad_norm": 1.1511446237564087, "learning_rate": 4.896466750027206e-06, "loss": 0.8638, "step": 268690 }, { "epoch": 1.7166477134789109, "grad_norm": 1.0932697057724, "learning_rate": 4.894301402686868e-06, "loss": 0.8832, "step": 268700 }, { "epoch": 1.7167116006286496, "grad_norm": 1.048201322555542, "learning_rate": 4.892136509599943e-06, "loss": 0.824, "step": 268710 }, { "epoch": 1.7167754877783883, "grad_norm": 1.5334866046905518, "learning_rate": 4.889972070788218e-06, "loss": 0.8154, "step": 268720 }, { "epoch": 1.716839374928127, "grad_norm": 0.6228106617927551, "learning_rate": 4.88780808627351e-06, "loss": 0.7755, "step": 268730 }, { "epoch": 1.7169032620778657, "grad_norm": 1.0742732286453247, "learning_rate": 4.8856445560775955e-06, "loss": 0.7916, "step": 268740 }, { "epoch": 1.7169671492276044, "grad_norm": 0.5827720165252686, "learning_rate": 4.88348148022228e-06, "loss": 0.7855, "step": 268750 }, { "epoch": 1.7170310363773431, "grad_norm": 0.817838191986084, "learning_rate": 4.881318858729322e-06, "loss": 0.8654, "step": 268760 }, { "epoch": 1.7170949235270818, "grad_norm": 1.219152569770813, "learning_rate": 4.879156691620529e-06, "loss": 1.1188, "step": 268770 }, { "epoch": 1.7171588106768203, "grad_norm": 0.955420732498169, "learning_rate": 4.876994978917654e-06, "loss": 0.8037, "step": 268780 }, { "epoch": 1.7172226978265592, "grad_norm": 1.0800939798355103, "learning_rate": 4.874833720642485e-06, "loss": 0.9102, "step": 268790 }, { "epoch": 1.7172865849762977, "grad_norm": 1.5440417528152466, "learning_rate": 4.872672916816767e-06, "loss": 0.8647, "step": 268800 }, { "epoch": 1.7173504721260366, "grad_norm": 1.195446252822876, "learning_rate": 4.8705125674622875e-06, "loss": 0.8166, "step": 268810 }, { "epoch": 1.7174143592757751, "grad_norm": 1.2400975227355957, "learning_rate": 4.8683526726007786e-06, "loss": 0.9157, "step": 268820 }, { "epoch": 1.717478246425514, "grad_norm": 1.1519098281860352, "learning_rate": 4.866193232254013e-06, "loss": 0.8856, "step": 268830 }, { "epoch": 1.7175421335752525, "grad_norm": 1.1491273641586304, "learning_rate": 4.864034246443716e-06, "loss": 0.8573, "step": 268840 }, { "epoch": 1.7176060207249915, "grad_norm": 1.2674658298492432, "learning_rate": 4.861875715191655e-06, "loss": 1.0786, "step": 268850 }, { "epoch": 1.71766990787473, "grad_norm": 0.8837629556655884, "learning_rate": 4.8597176385195455e-06, "loss": 0.8156, "step": 268860 }, { "epoch": 1.7177337950244689, "grad_norm": 1.1899993419647217, "learning_rate": 4.857560016449125e-06, "loss": 0.7566, "step": 268870 }, { "epoch": 1.7177976821742074, "grad_norm": 0.659336268901825, "learning_rate": 4.8554028490021445e-06, "loss": 0.8262, "step": 268880 }, { "epoch": 1.7178615693239463, "grad_norm": 0.8648936152458191, "learning_rate": 4.853246136200301e-06, "loss": 0.8686, "step": 268890 }, { "epoch": 1.7179254564736848, "grad_norm": 1.1850214004516602, "learning_rate": 4.85108987806534e-06, "loss": 0.7712, "step": 268900 }, { "epoch": 1.7179893436234237, "grad_norm": 1.073351502418518, "learning_rate": 4.84893407461896e-06, "loss": 1.0364, "step": 268910 }, { "epoch": 1.7180532307731622, "grad_norm": 0.8370411992073059, "learning_rate": 4.846778725882878e-06, "loss": 0.7372, "step": 268920 }, { "epoch": 1.7181171179229011, "grad_norm": 0.6169006824493408, "learning_rate": 4.8446238318787805e-06, "loss": 0.7435, "step": 268930 }, { "epoch": 1.7181810050726396, "grad_norm": 1.106313705444336, "learning_rate": 4.842469392628402e-06, "loss": 0.9299, "step": 268940 }, { "epoch": 1.7182448922223785, "grad_norm": 1.41398024559021, "learning_rate": 4.840315408153412e-06, "loss": 0.9412, "step": 268950 }, { "epoch": 1.718308779372117, "grad_norm": 1.2122138738632202, "learning_rate": 4.838161878475528e-06, "loss": 0.9142, "step": 268960 }, { "epoch": 1.718372666521856, "grad_norm": 1.6231672763824463, "learning_rate": 4.836008803616409e-06, "loss": 0.7854, "step": 268970 }, { "epoch": 1.7184365536715944, "grad_norm": 0.95048987865448, "learning_rate": 4.833856183597757e-06, "loss": 0.7186, "step": 268980 }, { "epoch": 1.7185004408213334, "grad_norm": 1.2387018203735352, "learning_rate": 4.831704018441252e-06, "loss": 0.8779, "step": 268990 }, { "epoch": 1.7185643279710718, "grad_norm": 1.0356347560882568, "learning_rate": 4.829552308168561e-06, "loss": 0.706, "step": 269000 }, { "epoch": 1.7186282151208108, "grad_norm": 1.0229053497314453, "learning_rate": 4.8274010528013615e-06, "loss": 0.782, "step": 269010 }, { "epoch": 1.7186921022705492, "grad_norm": 0.9773396849632263, "learning_rate": 4.8252502523613076e-06, "loss": 0.785, "step": 269020 }, { "epoch": 1.718755989420288, "grad_norm": 0.7615492939949036, "learning_rate": 4.823099906870071e-06, "loss": 1.1049, "step": 269030 }, { "epoch": 1.7188198765700267, "grad_norm": 0.9052096605300903, "learning_rate": 4.820950016349296e-06, "loss": 0.7574, "step": 269040 }, { "epoch": 1.7188837637197654, "grad_norm": 0.8551297187805176, "learning_rate": 4.818800580820642e-06, "loss": 1.1223, "step": 269050 }, { "epoch": 1.718947650869504, "grad_norm": 0.8088269829750061, "learning_rate": 4.816651600305755e-06, "loss": 0.7293, "step": 269060 }, { "epoch": 1.7190115380192428, "grad_norm": 0.7828858494758606, "learning_rate": 4.8145030748262766e-06, "loss": 0.7911, "step": 269070 }, { "epoch": 1.7190754251689815, "grad_norm": 0.9467841386795044, "learning_rate": 4.8123550044038345e-06, "loss": 0.859, "step": 269080 }, { "epoch": 1.7191393123187202, "grad_norm": 1.5727406740188599, "learning_rate": 4.810207389060084e-06, "loss": 1.0195, "step": 269090 }, { "epoch": 1.719203199468459, "grad_norm": 1.2755558490753174, "learning_rate": 4.80806022881663e-06, "loss": 1.0695, "step": 269100 }, { "epoch": 1.7192670866181976, "grad_norm": 1.217269778251648, "learning_rate": 4.80591352369511e-06, "loss": 0.7818, "step": 269110 }, { "epoch": 1.7193309737679363, "grad_norm": 0.8727430701255798, "learning_rate": 4.803767273717136e-06, "loss": 0.5781, "step": 269120 }, { "epoch": 1.719394860917675, "grad_norm": 1.093943476676941, "learning_rate": 4.801621478904333e-06, "loss": 0.9036, "step": 269130 }, { "epoch": 1.7194587480674137, "grad_norm": 0.843802809715271, "learning_rate": 4.799476139278291e-06, "loss": 0.859, "step": 269140 }, { "epoch": 1.7195226352171524, "grad_norm": 1.0293015241622925, "learning_rate": 4.797331254860643e-06, "loss": 0.9742, "step": 269150 }, { "epoch": 1.7195865223668911, "grad_norm": 1.540500283241272, "learning_rate": 4.7951868256729645e-06, "loss": 1.136, "step": 269160 }, { "epoch": 1.7196504095166298, "grad_norm": 1.239723801612854, "learning_rate": 4.793042851736867e-06, "loss": 0.6302, "step": 269170 }, { "epoch": 1.7197142966663685, "grad_norm": 0.7998931407928467, "learning_rate": 4.7908993330739334e-06, "loss": 0.9544, "step": 269180 }, { "epoch": 1.7197781838161073, "grad_norm": 0.8161479234695435, "learning_rate": 4.7887562697057575e-06, "loss": 0.7468, "step": 269190 }, { "epoch": 1.719842070965846, "grad_norm": 1.1992876529693604, "learning_rate": 4.786613661653922e-06, "loss": 0.8672, "step": 269200 }, { "epoch": 1.7199059581155847, "grad_norm": 1.1844300031661987, "learning_rate": 4.7844715089399974e-06, "loss": 0.9564, "step": 269210 }, { "epoch": 1.7199698452653234, "grad_norm": 0.971801221370697, "learning_rate": 4.782329811585567e-06, "loss": 0.7125, "step": 269220 }, { "epoch": 1.720033732415062, "grad_norm": 0.8483443260192871, "learning_rate": 4.780188569612187e-06, "loss": 0.9674, "step": 269230 }, { "epoch": 1.7200976195648008, "grad_norm": 1.1602776050567627, "learning_rate": 4.7780477830414394e-06, "loss": 1.4011, "step": 269240 }, { "epoch": 1.7201615067145395, "grad_norm": 1.1706547737121582, "learning_rate": 4.775907451894862e-06, "loss": 0.9959, "step": 269250 }, { "epoch": 1.7202253938642782, "grad_norm": 1.2372910976409912, "learning_rate": 4.7737675761940324e-06, "loss": 0.8608, "step": 269260 }, { "epoch": 1.7202892810140167, "grad_norm": 1.0723450183868408, "learning_rate": 4.771628155960478e-06, "loss": 0.9406, "step": 269270 }, { "epoch": 1.7203531681637556, "grad_norm": 1.5556002855300903, "learning_rate": 4.769489191215765e-06, "loss": 0.7714, "step": 269280 }, { "epoch": 1.720417055313494, "grad_norm": 2.422309160232544, "learning_rate": 4.767350681981419e-06, "loss": 0.8202, "step": 269290 }, { "epoch": 1.720480942463233, "grad_norm": 0.886701762676239, "learning_rate": 4.7652126282789925e-06, "loss": 0.9533, "step": 269300 }, { "epoch": 1.7205448296129715, "grad_norm": 0.765505850315094, "learning_rate": 4.763075030129999e-06, "loss": 0.8945, "step": 269310 }, { "epoch": 1.7206087167627104, "grad_norm": 1.1220277547836304, "learning_rate": 4.760937887555983e-06, "loss": 0.8573, "step": 269320 }, { "epoch": 1.720672603912449, "grad_norm": 0.831519365310669, "learning_rate": 4.75880120057845e-06, "loss": 0.9408, "step": 269330 }, { "epoch": 1.7207364910621878, "grad_norm": 1.3372504711151123, "learning_rate": 4.756664969218938e-06, "loss": 0.7414, "step": 269340 }, { "epoch": 1.7208003782119263, "grad_norm": 1.2090286016464233, "learning_rate": 4.754529193498947e-06, "loss": 0.8006, "step": 269350 }, { "epoch": 1.7208642653616653, "grad_norm": 0.7840335965156555, "learning_rate": 4.752393873439992e-06, "loss": 1.1769, "step": 269360 }, { "epoch": 1.7209281525114037, "grad_norm": 0.844284176826477, "learning_rate": 4.750259009063568e-06, "loss": 0.7698, "step": 269370 }, { "epoch": 1.7209920396611427, "grad_norm": 1.0220428705215454, "learning_rate": 4.748124600391191e-06, "loss": 0.9851, "step": 269380 }, { "epoch": 1.7210559268108812, "grad_norm": 0.8599318265914917, "learning_rate": 4.745990647444354e-06, "loss": 0.7958, "step": 269390 }, { "epoch": 1.72111981396062, "grad_norm": 0.9162972569465637, "learning_rate": 4.743857150244524e-06, "loss": 0.8317, "step": 269400 }, { "epoch": 1.7211837011103586, "grad_norm": 0.9127917289733887, "learning_rate": 4.741724108813217e-06, "loss": 1.0748, "step": 269410 }, { "epoch": 1.7212475882600975, "grad_norm": 0.7067804932594299, "learning_rate": 4.739591523171894e-06, "loss": 0.7738, "step": 269420 }, { "epoch": 1.721311475409836, "grad_norm": 1.1091209650039673, "learning_rate": 4.7374593933420475e-06, "loss": 0.6752, "step": 269430 }, { "epoch": 1.721375362559575, "grad_norm": 1.3698811531066895, "learning_rate": 4.735327719345134e-06, "loss": 0.6906, "step": 269440 }, { "epoch": 1.7214392497093134, "grad_norm": 0.8478043079376221, "learning_rate": 4.733196501202641e-06, "loss": 0.8665, "step": 269450 }, { "epoch": 1.7215031368590523, "grad_norm": 1.7327885627746582, "learning_rate": 4.731065738936008e-06, "loss": 1.2341, "step": 269460 }, { "epoch": 1.7215670240087908, "grad_norm": 0.9931337833404541, "learning_rate": 4.728935432566722e-06, "loss": 1.0581, "step": 269470 }, { "epoch": 1.7216309111585297, "grad_norm": 1.094643473625183, "learning_rate": 4.726805582116206e-06, "loss": 1.039, "step": 269480 }, { "epoch": 1.7216947983082682, "grad_norm": 0.7432576417922974, "learning_rate": 4.724676187605937e-06, "loss": 0.75, "step": 269490 }, { "epoch": 1.721758685458007, "grad_norm": 1.0011460781097412, "learning_rate": 4.7225472490573355e-06, "loss": 0.8445, "step": 269500 }, { "epoch": 1.7218225726077456, "grad_norm": 1.158536434173584, "learning_rate": 4.720418766491852e-06, "loss": 1.1112, "step": 269510 }, { "epoch": 1.7218864597574843, "grad_norm": 0.8979527354240417, "learning_rate": 4.718290739930936e-06, "loss": 1.0536, "step": 269520 }, { "epoch": 1.721950346907223, "grad_norm": 1.2593528032302856, "learning_rate": 4.7161631693959985e-06, "loss": 0.9534, "step": 269530 }, { "epoch": 1.7220142340569617, "grad_norm": 0.8251490592956543, "learning_rate": 4.7140360549084825e-06, "loss": 0.637, "step": 269540 }, { "epoch": 1.7220781212067005, "grad_norm": 1.0493764877319336, "learning_rate": 4.711909396489795e-06, "loss": 0.7499, "step": 269550 }, { "epoch": 1.7221420083564392, "grad_norm": 1.0737330913543701, "learning_rate": 4.70978319416136e-06, "loss": 0.8281, "step": 269560 }, { "epoch": 1.7222058955061779, "grad_norm": 1.0529547929763794, "learning_rate": 4.707657447944591e-06, "loss": 0.6255, "step": 269570 }, { "epoch": 1.7222697826559166, "grad_norm": 1.3771990537643433, "learning_rate": 4.705532157860898e-06, "loss": 1.1111, "step": 269580 }, { "epoch": 1.7223336698056553, "grad_norm": 0.7908219695091248, "learning_rate": 4.7034073239316745e-06, "loss": 0.8958, "step": 269590 }, { "epoch": 1.722397556955394, "grad_norm": 0.8063262701034546, "learning_rate": 4.701282946178332e-06, "loss": 0.8439, "step": 269600 }, { "epoch": 1.7224614441051327, "grad_norm": 0.8116989135742188, "learning_rate": 4.699159024622252e-06, "loss": 0.8012, "step": 269610 }, { "epoch": 1.7225253312548714, "grad_norm": 1.1216683387756348, "learning_rate": 4.69703555928484e-06, "loss": 0.8178, "step": 269620 }, { "epoch": 1.72258921840461, "grad_norm": 1.1037665605545044, "learning_rate": 4.694912550187469e-06, "loss": 1.1767, "step": 269630 }, { "epoch": 1.7226531055543488, "grad_norm": 0.8387174606323242, "learning_rate": 4.6927899973515265e-06, "loss": 0.9968, "step": 269640 }, { "epoch": 1.7227169927040875, "grad_norm": 0.756056010723114, "learning_rate": 4.690667900798379e-06, "loss": 0.7008, "step": 269650 }, { "epoch": 1.7227808798538262, "grad_norm": 1.05203378200531, "learning_rate": 4.688546260549409e-06, "loss": 1.0279, "step": 269660 }, { "epoch": 1.722844767003565, "grad_norm": 0.9116624593734741, "learning_rate": 4.686425076625972e-06, "loss": 0.7539, "step": 269670 }, { "epoch": 1.7229086541533036, "grad_norm": 2.241201639175415, "learning_rate": 4.684304349049445e-06, "loss": 0.7772, "step": 269680 }, { "epoch": 1.7229725413030423, "grad_norm": 0.8135285377502441, "learning_rate": 4.682184077841168e-06, "loss": 0.8999, "step": 269690 }, { "epoch": 1.723036428452781, "grad_norm": 1.494868278503418, "learning_rate": 4.68006426302251e-06, "loss": 1.0202, "step": 269700 }, { "epoch": 1.7231003156025197, "grad_norm": 2.18159556388855, "learning_rate": 4.677944904614806e-06, "loss": 0.9514, "step": 269710 }, { "epoch": 1.7231642027522585, "grad_norm": 1.3357326984405518, "learning_rate": 4.675826002639405e-06, "loss": 0.8269, "step": 269720 }, { "epoch": 1.7232280899019972, "grad_norm": 1.1173053979873657, "learning_rate": 4.673707557117657e-06, "loss": 0.9594, "step": 269730 }, { "epoch": 1.7232919770517359, "grad_norm": 1.1313519477844238, "learning_rate": 4.671589568070872e-06, "loss": 0.8786, "step": 269740 }, { "epoch": 1.7233558642014746, "grad_norm": 0.8571352958679199, "learning_rate": 4.669472035520411e-06, "loss": 0.9914, "step": 269750 }, { "epoch": 1.723419751351213, "grad_norm": 0.6213200688362122, "learning_rate": 4.6673549594875684e-06, "loss": 1.0952, "step": 269760 }, { "epoch": 1.723483638500952, "grad_norm": 1.1289409399032593, "learning_rate": 4.665238339993694e-06, "loss": 0.7568, "step": 269770 }, { "epoch": 1.7235475256506905, "grad_norm": 1.6800681352615356, "learning_rate": 4.663122177060081e-06, "loss": 0.8188, "step": 269780 }, { "epoch": 1.7236114128004294, "grad_norm": 1.429936170578003, "learning_rate": 4.661006470708052e-06, "loss": 0.9426, "step": 269790 }, { "epoch": 1.7236752999501679, "grad_norm": 1.010558843612671, "learning_rate": 4.658891220958905e-06, "loss": 1.11, "step": 269800 }, { "epoch": 1.7237391870999068, "grad_norm": 0.7443026304244995, "learning_rate": 4.656776427833964e-06, "loss": 1.108, "step": 269810 }, { "epoch": 1.7238030742496453, "grad_norm": 1.2368794679641724, "learning_rate": 4.654662091354495e-06, "loss": 0.8759, "step": 269820 }, { "epoch": 1.7238669613993842, "grad_norm": 1.9988175630569458, "learning_rate": 4.652548211541824e-06, "loss": 0.8806, "step": 269830 }, { "epoch": 1.7239308485491227, "grad_norm": 0.7888319492340088, "learning_rate": 4.650434788417207e-06, "loss": 0.8124, "step": 269840 }, { "epoch": 1.7239947356988616, "grad_norm": 0.6178026795387268, "learning_rate": 4.648321822001961e-06, "loss": 0.8807, "step": 269850 }, { "epoch": 1.7240586228486001, "grad_norm": 1.3354192972183228, "learning_rate": 4.6462093123173345e-06, "loss": 0.9558, "step": 269860 }, { "epoch": 1.724122509998339, "grad_norm": 1.0010740756988525, "learning_rate": 4.644097259384628e-06, "loss": 0.6324, "step": 269870 }, { "epoch": 1.7241863971480775, "grad_norm": 1.5502755641937256, "learning_rate": 4.6419856632251015e-06, "loss": 0.8098, "step": 269880 }, { "epoch": 1.7242502842978165, "grad_norm": 1.1586580276489258, "learning_rate": 4.639874523860011e-06, "loss": 0.7353, "step": 269890 }, { "epoch": 1.724314171447555, "grad_norm": 0.6563423275947571, "learning_rate": 4.637763841310633e-06, "loss": 0.7076, "step": 269900 }, { "epoch": 1.7243780585972939, "grad_norm": 2.1584129333496094, "learning_rate": 4.635653615598206e-06, "loss": 0.8794, "step": 269910 }, { "epoch": 1.7244419457470324, "grad_norm": 1.012452244758606, "learning_rate": 4.63354384674401e-06, "loss": 0.9036, "step": 269920 }, { "epoch": 1.7245058328967713, "grad_norm": 1.451828122138977, "learning_rate": 4.631434534769258e-06, "loss": 1.0659, "step": 269930 }, { "epoch": 1.7245697200465098, "grad_norm": 0.7395955920219421, "learning_rate": 4.629325679695223e-06, "loss": 0.7284, "step": 269940 }, { "epoch": 1.7246336071962487, "grad_norm": 1.9066523313522339, "learning_rate": 4.627217281543117e-06, "loss": 0.7908, "step": 269950 }, { "epoch": 1.7246974943459872, "grad_norm": 0.6626289486885071, "learning_rate": 4.6251093403342e-06, "loss": 0.8238, "step": 269960 }, { "epoch": 1.724761381495726, "grad_norm": 1.87825345993042, "learning_rate": 4.623001856089676e-06, "loss": 1.1427, "step": 269970 }, { "epoch": 1.7248252686454646, "grad_norm": 0.829743504524231, "learning_rate": 4.6208948288307855e-06, "loss": 0.9348, "step": 269980 }, { "epoch": 1.7248891557952033, "grad_norm": 0.6813519597053528, "learning_rate": 4.618788258578738e-06, "loss": 0.934, "step": 269990 }, { "epoch": 1.724953042944942, "grad_norm": 0.6842412948608398, "learning_rate": 4.616682145354756e-06, "loss": 0.9531, "step": 270000 }, { "epoch": 1.7250169300946807, "grad_norm": 1.5858936309814453, "learning_rate": 4.614576489180045e-06, "loss": 0.882, "step": 270010 }, { "epoch": 1.7250808172444194, "grad_norm": 1.5098166465759277, "learning_rate": 4.612471290075821e-06, "loss": 0.8706, "step": 270020 }, { "epoch": 1.7251447043941581, "grad_norm": 0.6777145266532898, "learning_rate": 4.610366548063261e-06, "loss": 0.7599, "step": 270030 }, { "epoch": 1.7252085915438968, "grad_norm": 1.5332057476043701, "learning_rate": 4.608262263163582e-06, "loss": 1.1588, "step": 270040 }, { "epoch": 1.7252724786936355, "grad_norm": 2.3782873153686523, "learning_rate": 4.606158435397984e-06, "loss": 0.7293, "step": 270050 }, { "epoch": 1.7253363658433742, "grad_norm": 0.5773844718933105, "learning_rate": 4.6040550647876264e-06, "loss": 0.8931, "step": 270060 }, { "epoch": 1.725400252993113, "grad_norm": 1.5350877046585083, "learning_rate": 4.6019521513537226e-06, "loss": 1.0427, "step": 270070 }, { "epoch": 1.7254641401428517, "grad_norm": 1.2986094951629639, "learning_rate": 4.59984969511742e-06, "loss": 1.1083, "step": 270080 }, { "epoch": 1.7255280272925904, "grad_norm": 1.510398268699646, "learning_rate": 4.597747696099919e-06, "loss": 1.1534, "step": 270090 }, { "epoch": 1.725591914442329, "grad_norm": 1.6239967346191406, "learning_rate": 4.59564615432237e-06, "loss": 0.9943, "step": 270100 }, { "epoch": 1.7256558015920678, "grad_norm": 1.4070252180099487, "learning_rate": 4.5935450698059544e-06, "loss": 0.927, "step": 270110 }, { "epoch": 1.7257196887418065, "grad_norm": 1.0951735973358154, "learning_rate": 4.591444442571813e-06, "loss": 0.8948, "step": 270120 }, { "epoch": 1.7257835758915452, "grad_norm": 1.776873230934143, "learning_rate": 4.589344272641117e-06, "loss": 0.8, "step": 270130 }, { "epoch": 1.725847463041284, "grad_norm": 0.7132924795150757, "learning_rate": 4.587244560035003e-06, "loss": 0.8365, "step": 270140 }, { "epoch": 1.7259113501910226, "grad_norm": 1.0497385263442993, "learning_rate": 4.58514530477463e-06, "loss": 1.0442, "step": 270150 }, { "epoch": 1.7259752373407613, "grad_norm": 1.2083861827850342, "learning_rate": 4.583046506881128e-06, "loss": 1.1931, "step": 270160 }, { "epoch": 1.7260391244905, "grad_norm": 1.1868774890899658, "learning_rate": 4.580948166375642e-06, "loss": 0.8016, "step": 270170 }, { "epoch": 1.7261030116402387, "grad_norm": 0.8388951420783997, "learning_rate": 4.578850283279296e-06, "loss": 0.6478, "step": 270180 }, { "epoch": 1.7261668987899774, "grad_norm": 1.142733097076416, "learning_rate": 4.576752857613231e-06, "loss": 1.1856, "step": 270190 }, { "epoch": 1.7262307859397161, "grad_norm": 5.006933689117432, "learning_rate": 4.574655889398554e-06, "loss": 0.7749, "step": 270200 }, { "epoch": 1.7262946730894548, "grad_norm": 1.5445818901062012, "learning_rate": 4.572559378656399e-06, "loss": 0.8089, "step": 270210 }, { "epoch": 1.7263585602391935, "grad_norm": 0.9058724641799927, "learning_rate": 4.570463325407859e-06, "loss": 0.8403, "step": 270220 }, { "epoch": 1.726422447388932, "grad_norm": 0.807124674320221, "learning_rate": 4.568367729674067e-06, "loss": 1.0011, "step": 270230 }, { "epoch": 1.726486334538671, "grad_norm": 0.8807920217514038, "learning_rate": 4.566272591476112e-06, "loss": 0.9613, "step": 270240 }, { "epoch": 1.7265502216884094, "grad_norm": 1.1456222534179688, "learning_rate": 4.564177910835088e-06, "loss": 1.14, "step": 270250 }, { "epoch": 1.7266141088381484, "grad_norm": 0.9585066437721252, "learning_rate": 4.562083687772118e-06, "loss": 0.9113, "step": 270260 }, { "epoch": 1.7266779959878868, "grad_norm": 0.6454988718032837, "learning_rate": 4.559989922308261e-06, "loss": 0.8286, "step": 270270 }, { "epoch": 1.7267418831376258, "grad_norm": 1.003688931465149, "learning_rate": 4.557896614464624e-06, "loss": 0.9706, "step": 270280 }, { "epoch": 1.7268057702873643, "grad_norm": 0.7934787273406982, "learning_rate": 4.555803764262273e-06, "loss": 0.9545, "step": 270290 }, { "epoch": 1.7268696574371032, "grad_norm": 1.3392540216445923, "learning_rate": 4.553711371722308e-06, "loss": 0.7657, "step": 270300 }, { "epoch": 1.7269335445868417, "grad_norm": 0.7471747994422913, "learning_rate": 4.551619436865767e-06, "loss": 0.7288, "step": 270310 }, { "epoch": 1.7269974317365806, "grad_norm": 2.051301956176758, "learning_rate": 4.549527959713756e-06, "loss": 0.6624, "step": 270320 }, { "epoch": 1.727061318886319, "grad_norm": 1.2567614316940308, "learning_rate": 4.547436940287303e-06, "loss": 0.7613, "step": 270330 }, { "epoch": 1.727125206036058, "grad_norm": 0.9878590106964111, "learning_rate": 4.54534637860749e-06, "loss": 1.0794, "step": 270340 }, { "epoch": 1.7271890931857965, "grad_norm": 0.7812796831130981, "learning_rate": 4.543256274695362e-06, "loss": 0.6185, "step": 270350 }, { "epoch": 1.7272529803355354, "grad_norm": 0.7838647365570068, "learning_rate": 4.541166628571969e-06, "loss": 1.0459, "step": 270360 }, { "epoch": 1.727316867485274, "grad_norm": 0.5343107581138611, "learning_rate": 4.53907744025836e-06, "loss": 0.5513, "step": 270370 }, { "epoch": 1.7273807546350128, "grad_norm": 0.9320610761642456, "learning_rate": 4.536988709775564e-06, "loss": 0.9895, "step": 270380 }, { "epoch": 1.7274446417847513, "grad_norm": 1.0904432535171509, "learning_rate": 4.5349004371446234e-06, "loss": 0.7556, "step": 270390 }, { "epoch": 1.7275085289344903, "grad_norm": 1.0576446056365967, "learning_rate": 4.5328126223865675e-06, "loss": 0.7109, "step": 270400 }, { "epoch": 1.7275724160842287, "grad_norm": 1.0743281841278076, "learning_rate": 4.530725265522429e-06, "loss": 0.7066, "step": 270410 }, { "epoch": 1.7276363032339677, "grad_norm": 1.1610684394836426, "learning_rate": 4.528638366573212e-06, "loss": 0.6315, "step": 270420 }, { "epoch": 1.7277001903837061, "grad_norm": 0.5903398990631104, "learning_rate": 4.526551925559957e-06, "loss": 0.9977, "step": 270430 }, { "epoch": 1.727764077533445, "grad_norm": 1.0846974849700928, "learning_rate": 4.5244659425036585e-06, "loss": 0.7972, "step": 270440 }, { "epoch": 1.7278279646831836, "grad_norm": 0.6714950203895569, "learning_rate": 4.522380417425331e-06, "loss": 0.9027, "step": 270450 }, { "epoch": 1.7278918518329225, "grad_norm": 1.0459481477737427, "learning_rate": 4.5202953503459766e-06, "loss": 0.9928, "step": 270460 }, { "epoch": 1.727955738982661, "grad_norm": 0.9128748178482056, "learning_rate": 4.518210741286594e-06, "loss": 0.9621, "step": 270470 }, { "epoch": 1.7280196261323997, "grad_norm": 0.890533983707428, "learning_rate": 4.5161265902681714e-06, "loss": 0.8388, "step": 270480 }, { "epoch": 1.7280835132821384, "grad_norm": 0.6935803890228271, "learning_rate": 4.514042897311715e-06, "loss": 1.0362, "step": 270490 }, { "epoch": 1.728147400431877, "grad_norm": 0.8639132380485535, "learning_rate": 4.511959662438187e-06, "loss": 0.6503, "step": 270500 }, { "epoch": 1.7282112875816158, "grad_norm": 1.1737576723098755, "learning_rate": 4.509876885668585e-06, "loss": 0.8988, "step": 270510 }, { "epoch": 1.7282751747313545, "grad_norm": 1.220122218132019, "learning_rate": 4.507794567023865e-06, "loss": 1.0354, "step": 270520 }, { "epoch": 1.7283390618810932, "grad_norm": 0.9614644646644592, "learning_rate": 4.505712706525028e-06, "loss": 0.8705, "step": 270530 }, { "epoch": 1.728402949030832, "grad_norm": 0.4436861276626587, "learning_rate": 4.503631304193007e-06, "loss": 0.7628, "step": 270540 }, { "epoch": 1.7284668361805706, "grad_norm": 0.8033667802810669, "learning_rate": 4.50155036004879e-06, "loss": 0.941, "step": 270550 }, { "epoch": 1.7285307233303093, "grad_norm": 0.8721498250961304, "learning_rate": 4.49946987411331e-06, "loss": 0.8562, "step": 270560 }, { "epoch": 1.728594610480048, "grad_norm": 0.6049971580505371, "learning_rate": 4.497389846407535e-06, "loss": 0.7328, "step": 270570 }, { "epoch": 1.7286584976297867, "grad_norm": 0.9980120658874512, "learning_rate": 4.4953102769524195e-06, "loss": 0.9632, "step": 270580 }, { "epoch": 1.7287223847795254, "grad_norm": 1.7371565103530884, "learning_rate": 4.493231165768886e-06, "loss": 1.2044, "step": 270590 }, { "epoch": 1.7287862719292642, "grad_norm": 1.1560472249984741, "learning_rate": 4.491152512877895e-06, "loss": 0.7284, "step": 270600 }, { "epoch": 1.7288501590790029, "grad_norm": 0.8397563099861145, "learning_rate": 4.489074318300357e-06, "loss": 0.7156, "step": 270610 }, { "epoch": 1.7289140462287416, "grad_norm": 1.4910930395126343, "learning_rate": 4.486996582057224e-06, "loss": 0.9487, "step": 270620 }, { "epoch": 1.7289779333784803, "grad_norm": 0.9516547322273254, "learning_rate": 4.4849193041693996e-06, "loss": 0.8044, "step": 270630 }, { "epoch": 1.729041820528219, "grad_norm": 1.6026740074157715, "learning_rate": 4.482842484657824e-06, "loss": 0.7698, "step": 270640 }, { "epoch": 1.7291057076779577, "grad_norm": 0.6650766134262085, "learning_rate": 4.48076612354339e-06, "loss": 0.8774, "step": 270650 }, { "epoch": 1.7291695948276964, "grad_norm": 0.9393697381019592, "learning_rate": 4.478690220847032e-06, "loss": 0.9469, "step": 270660 }, { "epoch": 1.729233481977435, "grad_norm": 1.0290288925170898, "learning_rate": 4.476614776589638e-06, "loss": 0.9257, "step": 270670 }, { "epoch": 1.7292973691271738, "grad_norm": 1.147005319595337, "learning_rate": 4.4745397907921205e-06, "loss": 0.7659, "step": 270680 }, { "epoch": 1.7293612562769125, "grad_norm": 1.7700022459030151, "learning_rate": 4.4724652634753674e-06, "loss": 0.7812, "step": 270690 }, { "epoch": 1.7294251434266512, "grad_norm": 1.0767414569854736, "learning_rate": 4.47039119466029e-06, "loss": 0.8533, "step": 270700 }, { "epoch": 1.72948903057639, "grad_norm": 1.2778061628341675, "learning_rate": 4.468317584367743e-06, "loss": 0.7759, "step": 270710 }, { "epoch": 1.7295529177261284, "grad_norm": 1.1343021392822266, "learning_rate": 4.466244432618644e-06, "loss": 0.8308, "step": 270720 }, { "epoch": 1.7296168048758673, "grad_norm": 0.6189510226249695, "learning_rate": 4.464171739433842e-06, "loss": 0.7996, "step": 270730 }, { "epoch": 1.7296806920256058, "grad_norm": 0.9053893089294434, "learning_rate": 4.4620995048342384e-06, "loss": 1.0211, "step": 270740 }, { "epoch": 1.7297445791753447, "grad_norm": 0.9649641513824463, "learning_rate": 4.460027728840676e-06, "loss": 0.8243, "step": 270750 }, { "epoch": 1.7298084663250832, "grad_norm": 0.9888417720794678, "learning_rate": 4.457956411474046e-06, "loss": 0.818, "step": 270760 }, { "epoch": 1.7298723534748222, "grad_norm": 2.9465692043304443, "learning_rate": 4.455885552755185e-06, "loss": 0.988, "step": 270770 }, { "epoch": 1.7299362406245606, "grad_norm": 0.9890803694725037, "learning_rate": 4.4538151527049545e-06, "loss": 0.9858, "step": 270780 }, { "epoch": 1.7300001277742996, "grad_norm": 1.079901933670044, "learning_rate": 4.451745211344216e-06, "loss": 0.8648, "step": 270790 }, { "epoch": 1.730064014924038, "grad_norm": 0.6345334053039551, "learning_rate": 4.449675728693803e-06, "loss": 0.7422, "step": 270800 }, { "epoch": 1.730127902073777, "grad_norm": 0.6401709318161011, "learning_rate": 4.447606704774565e-06, "loss": 0.67, "step": 270810 }, { "epoch": 1.7301917892235155, "grad_norm": 1.0097521543502808, "learning_rate": 4.445538139607336e-06, "loss": 0.943, "step": 270820 }, { "epoch": 1.7302556763732544, "grad_norm": 0.9358034133911133, "learning_rate": 4.443470033212948e-06, "loss": 0.8079, "step": 270830 }, { "epoch": 1.7303195635229929, "grad_norm": 0.8332350254058838, "learning_rate": 4.441402385612225e-06, "loss": 0.9739, "step": 270840 }, { "epoch": 1.7303834506727318, "grad_norm": 0.3689040243625641, "learning_rate": 4.4393351968260115e-06, "loss": 0.8645, "step": 270850 }, { "epoch": 1.7304473378224703, "grad_norm": 0.7520756125450134, "learning_rate": 4.437268466875089e-06, "loss": 0.6531, "step": 270860 }, { "epoch": 1.7305112249722092, "grad_norm": 0.8130874633789062, "learning_rate": 4.435202195780303e-06, "loss": 0.8444, "step": 270870 }, { "epoch": 1.7305751121219477, "grad_norm": 0.807141900062561, "learning_rate": 4.433136383562436e-06, "loss": 1.1926, "step": 270880 }, { "epoch": 1.7306389992716866, "grad_norm": 0.6432427167892456, "learning_rate": 4.4310710302423064e-06, "loss": 0.7599, "step": 270890 }, { "epoch": 1.7307028864214251, "grad_norm": 1.7837029695510864, "learning_rate": 4.4290061358407295e-06, "loss": 0.8501, "step": 270900 }, { "epoch": 1.730766773571164, "grad_norm": 1.2138731479644775, "learning_rate": 4.426941700378473e-06, "loss": 0.9177, "step": 270910 }, { "epoch": 1.7308306607209025, "grad_norm": 1.1648060083389282, "learning_rate": 4.4248777238763474e-06, "loss": 0.9268, "step": 270920 }, { "epoch": 1.7308945478706415, "grad_norm": 1.0720033645629883, "learning_rate": 4.422814206355119e-06, "loss": 0.8604, "step": 270930 }, { "epoch": 1.73095843502038, "grad_norm": 1.0889008045196533, "learning_rate": 4.420751147835595e-06, "loss": 0.9616, "step": 270940 }, { "epoch": 1.7310223221701189, "grad_norm": 1.2953075170516968, "learning_rate": 4.418688548338529e-06, "loss": 0.8462, "step": 270950 }, { "epoch": 1.7310862093198573, "grad_norm": 0.790887713432312, "learning_rate": 4.416626407884711e-06, "loss": 0.8467, "step": 270960 }, { "epoch": 1.731150096469596, "grad_norm": 0.937939465045929, "learning_rate": 4.414564726494896e-06, "loss": 0.9561, "step": 270970 }, { "epoch": 1.7312139836193348, "grad_norm": 0.899032711982727, "learning_rate": 4.412503504189852e-06, "loss": 0.9822, "step": 270980 }, { "epoch": 1.7312778707690735, "grad_norm": 1.0886012315750122, "learning_rate": 4.410442740990334e-06, "loss": 0.8803, "step": 270990 }, { "epoch": 1.7313417579188122, "grad_norm": 1.7397100925445557, "learning_rate": 4.408382436917108e-06, "loss": 0.6794, "step": 271000 }, { "epoch": 1.7314056450685509, "grad_norm": 0.8017283082008362, "learning_rate": 4.406322591990897e-06, "loss": 1.1767, "step": 271010 }, { "epoch": 1.7314695322182896, "grad_norm": 1.061044454574585, "learning_rate": 4.404263206232478e-06, "loss": 0.8546, "step": 271020 }, { "epoch": 1.7315334193680283, "grad_norm": 0.7479444742202759, "learning_rate": 4.402204279662558e-06, "loss": 1.0758, "step": 271030 }, { "epoch": 1.731597306517767, "grad_norm": 1.4993268251419067, "learning_rate": 4.400145812301904e-06, "loss": 0.6991, "step": 271040 }, { "epoch": 1.7316611936675057, "grad_norm": 2.6139063835144043, "learning_rate": 4.398293584318353e-06, "loss": 1.0884, "step": 271050 }, { "epoch": 1.7317250808172444, "grad_norm": 0.9762659668922424, "learning_rate": 4.396235989512371e-06, "loss": 1.2984, "step": 271060 }, { "epoch": 1.7317889679669831, "grad_norm": 0.7708228230476379, "learning_rate": 4.394178853975755e-06, "loss": 1.0642, "step": 271070 }, { "epoch": 1.7318528551167218, "grad_norm": 2.804189920425415, "learning_rate": 4.392122177729207e-06, "loss": 1.2598, "step": 271080 }, { "epoch": 1.7319167422664605, "grad_norm": 1.322363018989563, "learning_rate": 4.390065960793455e-06, "loss": 1.2285, "step": 271090 }, { "epoch": 1.7319806294161992, "grad_norm": 1.2776496410369873, "learning_rate": 4.38801020318918e-06, "loss": 0.8559, "step": 271100 }, { "epoch": 1.732044516565938, "grad_norm": 0.9014029502868652, "learning_rate": 4.385954904937117e-06, "loss": 0.8942, "step": 271110 }, { "epoch": 1.7321084037156766, "grad_norm": 0.7205672860145569, "learning_rate": 4.383900066057944e-06, "loss": 0.7632, "step": 271120 }, { "epoch": 1.7321722908654154, "grad_norm": 2.3227195739746094, "learning_rate": 4.3818456865723665e-06, "loss": 0.9524, "step": 271130 }, { "epoch": 1.732236178015154, "grad_norm": 0.7535308003425598, "learning_rate": 4.379791766501057e-06, "loss": 0.7515, "step": 271140 }, { "epoch": 1.7323000651648928, "grad_norm": 1.2400829792022705, "learning_rate": 4.377738305864721e-06, "loss": 0.9976, "step": 271150 }, { "epoch": 1.7323639523146315, "grad_norm": 0.676798939704895, "learning_rate": 4.375685304684024e-06, "loss": 0.834, "step": 271160 }, { "epoch": 1.7324278394643702, "grad_norm": 1.0050621032714844, "learning_rate": 4.373632762979651e-06, "loss": 0.9509, "step": 271170 }, { "epoch": 1.7324917266141089, "grad_norm": 0.9960595369338989, "learning_rate": 4.371580680772264e-06, "loss": 0.8235, "step": 271180 }, { "epoch": 1.7325556137638476, "grad_norm": 1.1460243463516235, "learning_rate": 4.369529058082539e-06, "loss": 0.8149, "step": 271190 }, { "epoch": 1.7326195009135863, "grad_norm": 0.9614496827125549, "learning_rate": 4.367477894931121e-06, "loss": 0.9482, "step": 271200 }, { "epoch": 1.7326833880633248, "grad_norm": 0.5980064272880554, "learning_rate": 4.365427191338683e-06, "loss": 0.8493, "step": 271210 }, { "epoch": 1.7327472752130637, "grad_norm": 0.5648938417434692, "learning_rate": 4.3633769473258815e-06, "loss": 0.8194, "step": 271220 }, { "epoch": 1.7328111623628022, "grad_norm": 0.7305797338485718, "learning_rate": 4.361327162913348e-06, "loss": 0.8932, "step": 271230 }, { "epoch": 1.7328750495125411, "grad_norm": 1.1702698469161987, "learning_rate": 4.359277838121739e-06, "loss": 0.9584, "step": 271240 }, { "epoch": 1.7329389366622796, "grad_norm": 1.350797414779663, "learning_rate": 4.357228972971677e-06, "loss": 0.9944, "step": 271250 }, { "epoch": 1.7330028238120185, "grad_norm": 0.7506915926933289, "learning_rate": 4.355180567483819e-06, "loss": 0.7929, "step": 271260 }, { "epoch": 1.733066710961757, "grad_norm": 0.7021801471710205, "learning_rate": 4.3531326216787685e-06, "loss": 0.9577, "step": 271270 }, { "epoch": 1.733130598111496, "grad_norm": 1.2569390535354614, "learning_rate": 4.3510851355771705e-06, "loss": 0.9093, "step": 271280 }, { "epoch": 1.7331944852612344, "grad_norm": 0.826291024684906, "learning_rate": 4.349038109199632e-06, "loss": 0.8654, "step": 271290 }, { "epoch": 1.7332583724109734, "grad_norm": 0.7624689340591431, "learning_rate": 4.34699154256678e-06, "loss": 0.9783, "step": 271300 }, { "epoch": 1.7333222595607118, "grad_norm": 1.0965466499328613, "learning_rate": 4.344945435699211e-06, "loss": 0.8737, "step": 271310 }, { "epoch": 1.7333861467104508, "grad_norm": 0.9986264705657959, "learning_rate": 4.34289978861755e-06, "loss": 0.916, "step": 271320 }, { "epoch": 1.7334500338601893, "grad_norm": 0.7352056503295898, "learning_rate": 4.34085460134237e-06, "loss": 0.9011, "step": 271330 }, { "epoch": 1.7335139210099282, "grad_norm": 2.520620584487915, "learning_rate": 4.3388098738943e-06, "loss": 0.7457, "step": 271340 }, { "epoch": 1.7335778081596667, "grad_norm": 0.8847737908363342, "learning_rate": 4.336765606293907e-06, "loss": 1.1116, "step": 271350 }, { "epoch": 1.7336416953094056, "grad_norm": 1.127189040184021, "learning_rate": 4.334721798561803e-06, "loss": 0.9713, "step": 271360 }, { "epoch": 1.733705582459144, "grad_norm": 0.7761907577514648, "learning_rate": 4.332678450718541e-06, "loss": 0.9633, "step": 271370 }, { "epoch": 1.733769469608883, "grad_norm": 1.4676905870437622, "learning_rate": 4.330635562784724e-06, "loss": 0.9541, "step": 271380 }, { "epoch": 1.7338333567586215, "grad_norm": 0.9573052525520325, "learning_rate": 4.328593134780911e-06, "loss": 0.7551, "step": 271390 }, { "epoch": 1.7338972439083604, "grad_norm": 1.7084014415740967, "learning_rate": 4.326551166727683e-06, "loss": 1.0257, "step": 271400 }, { "epoch": 1.733961131058099, "grad_norm": 1.5131990909576416, "learning_rate": 4.324509658645587e-06, "loss": 0.8531, "step": 271410 }, { "epoch": 1.7340250182078378, "grad_norm": 0.8419858813285828, "learning_rate": 4.322468610555197e-06, "loss": 1.057, "step": 271420 }, { "epoch": 1.7340889053575763, "grad_norm": 0.9144232273101807, "learning_rate": 4.320428022477075e-06, "loss": 1.0028, "step": 271430 }, { "epoch": 1.7341527925073152, "grad_norm": 1.0411046743392944, "learning_rate": 4.318387894431747e-06, "loss": 1.1744, "step": 271440 }, { "epoch": 1.7342166796570537, "grad_norm": 0.8902087807655334, "learning_rate": 4.316348226439787e-06, "loss": 0.8054, "step": 271450 }, { "epoch": 1.7342805668067924, "grad_norm": 0.8370457291603088, "learning_rate": 4.314309018521712e-06, "loss": 0.9544, "step": 271460 }, { "epoch": 1.7343444539565311, "grad_norm": 1.351797103881836, "learning_rate": 4.312270270698076e-06, "loss": 0.7003, "step": 271470 }, { "epoch": 1.7344083411062698, "grad_norm": 0.8992185592651367, "learning_rate": 4.310231982989405e-06, "loss": 0.6515, "step": 271480 }, { "epoch": 1.7344722282560086, "grad_norm": 1.824882984161377, "learning_rate": 4.308194155416228e-06, "loss": 0.845, "step": 271490 }, { "epoch": 1.7345361154057473, "grad_norm": 0.7211819291114807, "learning_rate": 4.3061567879990495e-06, "loss": 0.716, "step": 271500 }, { "epoch": 1.734600002555486, "grad_norm": 1.2011065483093262, "learning_rate": 4.304119880758417e-06, "loss": 0.9571, "step": 271510 }, { "epoch": 1.7346638897052247, "grad_norm": 1.1488710641860962, "learning_rate": 4.302083433714821e-06, "loss": 1.1657, "step": 271520 }, { "epoch": 1.7347277768549634, "grad_norm": 0.9128865599632263, "learning_rate": 4.300047446888777e-06, "loss": 0.9392, "step": 271530 }, { "epoch": 1.734791664004702, "grad_norm": 0.9169224500656128, "learning_rate": 4.298011920300804e-06, "loss": 0.8583, "step": 271540 }, { "epoch": 1.7348555511544408, "grad_norm": 0.6351904273033142, "learning_rate": 4.295976853971373e-06, "loss": 0.6869, "step": 271550 }, { "epoch": 1.7349194383041795, "grad_norm": 0.9080760478973389, "learning_rate": 4.293942247921012e-06, "loss": 0.8738, "step": 271560 }, { "epoch": 1.7349833254539182, "grad_norm": 4.88934850692749, "learning_rate": 4.291908102170178e-06, "loss": 1.0908, "step": 271570 }, { "epoch": 1.735047212603657, "grad_norm": 1.1222317218780518, "learning_rate": 4.289874416739387e-06, "loss": 0.8656, "step": 271580 }, { "epoch": 1.7351110997533956, "grad_norm": 0.6947523951530457, "learning_rate": 4.287841191649095e-06, "loss": 0.9174, "step": 271590 }, { "epoch": 1.7351749869031343, "grad_norm": 1.1124070882797241, "learning_rate": 4.285808426919791e-06, "loss": 0.855, "step": 271600 }, { "epoch": 1.735238874052873, "grad_norm": 1.0762344598770142, "learning_rate": 4.283776122571942e-06, "loss": 0.8326, "step": 271610 }, { "epoch": 1.7353027612026117, "grad_norm": 0.9816240072250366, "learning_rate": 4.281744278626027e-06, "loss": 0.7708, "step": 271620 }, { "epoch": 1.7353666483523504, "grad_norm": 0.8895612955093384, "learning_rate": 4.279712895102489e-06, "loss": 0.7371, "step": 271630 }, { "epoch": 1.7354305355020891, "grad_norm": 0.6587034463882446, "learning_rate": 4.277681972021808e-06, "loss": 1.0135, "step": 271640 }, { "epoch": 1.7354944226518279, "grad_norm": 0.7945663332939148, "learning_rate": 4.2756515094044105e-06, "loss": 1.0117, "step": 271650 }, { "epoch": 1.7355583098015666, "grad_norm": 1.7672518491744995, "learning_rate": 4.2736215072707696e-06, "loss": 1.2048, "step": 271660 }, { "epoch": 1.7356221969513053, "grad_norm": 1.1510045528411865, "learning_rate": 4.271591965641308e-06, "loss": 0.7788, "step": 271670 }, { "epoch": 1.735686084101044, "grad_norm": 0.8241963386535645, "learning_rate": 4.269562884536493e-06, "loss": 1.0226, "step": 271680 }, { "epoch": 1.7357499712507827, "grad_norm": 0.9838940501213074, "learning_rate": 4.267534263976724e-06, "loss": 1.2249, "step": 271690 }, { "epoch": 1.7358138584005212, "grad_norm": 3.085310459136963, "learning_rate": 4.265506103982464e-06, "loss": 1.2433, "step": 271700 }, { "epoch": 1.73587774555026, "grad_norm": 0.7970023155212402, "learning_rate": 4.263478404574111e-06, "loss": 0.8654, "step": 271710 }, { "epoch": 1.7359416326999986, "grad_norm": 1.1733765602111816, "learning_rate": 4.2614511657721124e-06, "loss": 0.8471, "step": 271720 }, { "epoch": 1.7360055198497375, "grad_norm": 0.8512540459632874, "learning_rate": 4.259424387596856e-06, "loss": 1.1134, "step": 271730 }, { "epoch": 1.736069406999476, "grad_norm": 0.8959993124008179, "learning_rate": 4.257398070068769e-06, "loss": 0.7911, "step": 271740 }, { "epoch": 1.736133294149215, "grad_norm": 1.955090045928955, "learning_rate": 4.255372213208264e-06, "loss": 1.0413, "step": 271750 }, { "epoch": 1.7361971812989534, "grad_norm": 0.7032087445259094, "learning_rate": 4.253346817035731e-06, "loss": 0.8343, "step": 271760 }, { "epoch": 1.7362610684486923, "grad_norm": 1.0131257772445679, "learning_rate": 4.251321881571579e-06, "loss": 0.8446, "step": 271770 }, { "epoch": 1.7363249555984308, "grad_norm": 0.958465039730072, "learning_rate": 4.2492974068361835e-06, "loss": 1.0861, "step": 271780 }, { "epoch": 1.7363888427481697, "grad_norm": 1.5234622955322266, "learning_rate": 4.2472733928499485e-06, "loss": 0.7778, "step": 271790 }, { "epoch": 1.7364527298979082, "grad_norm": 1.2108267545700073, "learning_rate": 4.245249839633247e-06, "loss": 0.9079, "step": 271800 }, { "epoch": 1.7365166170476471, "grad_norm": 1.0668816566467285, "learning_rate": 4.2432267472064745e-06, "loss": 1.101, "step": 271810 }, { "epoch": 1.7365805041973856, "grad_norm": 0.7074256539344788, "learning_rate": 4.241204115589986e-06, "loss": 1.0892, "step": 271820 }, { "epoch": 1.7366443913471246, "grad_norm": 1.3085336685180664, "learning_rate": 4.23918194480416e-06, "loss": 0.977, "step": 271830 }, { "epoch": 1.736708278496863, "grad_norm": 1.2348912954330444, "learning_rate": 4.2371602348693584e-06, "loss": 0.6361, "step": 271840 }, { "epoch": 1.736772165646602, "grad_norm": 0.6151230931282043, "learning_rate": 4.235138985805953e-06, "loss": 0.8245, "step": 271850 }, { "epoch": 1.7368360527963405, "grad_norm": 1.233837366104126, "learning_rate": 4.233118197634279e-06, "loss": 0.7507, "step": 271860 }, { "epoch": 1.7368999399460794, "grad_norm": 1.0764507055282593, "learning_rate": 4.231097870374706e-06, "loss": 1.1995, "step": 271870 }, { "epoch": 1.7369638270958179, "grad_norm": 1.1469213962554932, "learning_rate": 4.229078004047565e-06, "loss": 0.7324, "step": 271880 }, { "epoch": 1.7370277142455568, "grad_norm": 0.8744142651557922, "learning_rate": 4.227058598673217e-06, "loss": 0.8402, "step": 271890 }, { "epoch": 1.7370916013952953, "grad_norm": 0.9857259392738342, "learning_rate": 4.225039654271978e-06, "loss": 1.0731, "step": 271900 }, { "epoch": 1.7371554885450342, "grad_norm": 1.1654436588287354, "learning_rate": 4.2230211708642e-06, "loss": 0.6894, "step": 271910 }, { "epoch": 1.7372193756947727, "grad_norm": 0.9708123803138733, "learning_rate": 4.221003148470193e-06, "loss": 0.8785, "step": 271920 }, { "epoch": 1.7372832628445114, "grad_norm": 0.9018045663833618, "learning_rate": 4.2189855871102965e-06, "loss": 0.75, "step": 271930 }, { "epoch": 1.73734714999425, "grad_norm": 0.7483968138694763, "learning_rate": 4.2169684868048124e-06, "loss": 0.8853, "step": 271940 }, { "epoch": 1.7374110371439888, "grad_norm": 1.216666579246521, "learning_rate": 4.214951847574061e-06, "loss": 0.7667, "step": 271950 }, { "epoch": 1.7374749242937275, "grad_norm": 0.7332319021224976, "learning_rate": 4.212935669438367e-06, "loss": 0.9833, "step": 271960 }, { "epoch": 1.7375388114434662, "grad_norm": 1.1881091594696045, "learning_rate": 4.2109199524180245e-06, "loss": 0.7004, "step": 271970 }, { "epoch": 1.737602698593205, "grad_norm": 0.7997522950172424, "learning_rate": 4.208904696533322e-06, "loss": 0.6841, "step": 271980 }, { "epoch": 1.7376665857429436, "grad_norm": 0.9761389493942261, "learning_rate": 4.206889901804567e-06, "loss": 0.7738, "step": 271990 }, { "epoch": 1.7377304728926823, "grad_norm": 0.8929351568222046, "learning_rate": 4.204875568252048e-06, "loss": 0.9213, "step": 272000 }, { "epoch": 1.737794360042421, "grad_norm": 1.209773302078247, "learning_rate": 4.202861695896043e-06, "loss": 0.9985, "step": 272010 }, { "epoch": 1.7378582471921598, "grad_norm": 0.8349350690841675, "learning_rate": 4.2008482847568535e-06, "loss": 0.9204, "step": 272020 }, { "epoch": 1.7379221343418985, "grad_norm": 0.8182839751243591, "learning_rate": 4.198835334854734e-06, "loss": 0.727, "step": 272030 }, { "epoch": 1.7379860214916372, "grad_norm": 1.3342599868774414, "learning_rate": 4.196822846209969e-06, "loss": 0.76, "step": 272040 }, { "epoch": 1.7380499086413759, "grad_norm": 0.8246582746505737, "learning_rate": 4.19481081884282e-06, "loss": 0.932, "step": 272050 }, { "epoch": 1.7381137957911146, "grad_norm": 1.2628209590911865, "learning_rate": 4.19279925277355e-06, "loss": 1.127, "step": 272060 }, { "epoch": 1.7381776829408533, "grad_norm": 1.0423669815063477, "learning_rate": 4.190788148022434e-06, "loss": 1.0765, "step": 272070 }, { "epoch": 1.738241570090592, "grad_norm": 2.2734713554382324, "learning_rate": 4.188777504609698e-06, "loss": 0.7287, "step": 272080 }, { "epoch": 1.7383054572403307, "grad_norm": 2.1262595653533936, "learning_rate": 4.186767322555618e-06, "loss": 0.8729, "step": 272090 }, { "epoch": 1.7383693443900694, "grad_norm": 0.8176138401031494, "learning_rate": 4.184757601880413e-06, "loss": 1.0373, "step": 272100 }, { "epoch": 1.738433231539808, "grad_norm": 0.9398905634880066, "learning_rate": 4.182748342604348e-06, "loss": 0.7781, "step": 272110 }, { "epoch": 1.7384971186895468, "grad_norm": 0.8366762399673462, "learning_rate": 4.18073954474763e-06, "loss": 1.0162, "step": 272120 }, { "epoch": 1.7385610058392855, "grad_norm": 1.1980094909667969, "learning_rate": 4.1787312083305165e-06, "loss": 0.9061, "step": 272130 }, { "epoch": 1.7386248929890242, "grad_norm": 1.1438990831375122, "learning_rate": 4.176723333373211e-06, "loss": 0.8736, "step": 272140 }, { "epoch": 1.738688780138763, "grad_norm": 1.3131656646728516, "learning_rate": 4.174715919895955e-06, "loss": 0.7236, "step": 272150 }, { "epoch": 1.7387526672885016, "grad_norm": 1.1731828451156616, "learning_rate": 4.1727089679189426e-06, "loss": 1.1201, "step": 272160 }, { "epoch": 1.7388165544382403, "grad_norm": 0.84318608045578, "learning_rate": 4.170702477462407e-06, "loss": 0.8461, "step": 272170 }, { "epoch": 1.738880441587979, "grad_norm": 0.7444975972175598, "learning_rate": 4.168696448546539e-06, "loss": 0.8019, "step": 272180 }, { "epoch": 1.7389443287377175, "grad_norm": 1.2526865005493164, "learning_rate": 4.166690881191554e-06, "loss": 0.9074, "step": 272190 }, { "epoch": 1.7390082158874565, "grad_norm": 1.6187922954559326, "learning_rate": 4.164685775417632e-06, "loss": 1.0094, "step": 272200 }, { "epoch": 1.739072103037195, "grad_norm": 1.1000633239746094, "learning_rate": 4.1626811312449884e-06, "loss": 0.9165, "step": 272210 }, { "epoch": 1.7391359901869339, "grad_norm": 0.9860941767692566, "learning_rate": 4.1606769486937925e-06, "loss": 1.0631, "step": 272220 }, { "epoch": 1.7391998773366724, "grad_norm": 1.1407016515731812, "learning_rate": 4.158673227784249e-06, "loss": 0.5973, "step": 272230 }, { "epoch": 1.7392637644864113, "grad_norm": 1.0110845565795898, "learning_rate": 4.156669968536509e-06, "loss": 0.879, "step": 272240 }, { "epoch": 1.7393276516361498, "grad_norm": 1.1520662307739258, "learning_rate": 4.154667170970777e-06, "loss": 1.0506, "step": 272250 }, { "epoch": 1.7393915387858887, "grad_norm": 0.8411242961883545, "learning_rate": 4.152664835107195e-06, "loss": 0.9874, "step": 272260 }, { "epoch": 1.7394554259356272, "grad_norm": 1.4038716554641724, "learning_rate": 4.150662960965946e-06, "loss": 0.9303, "step": 272270 }, { "epoch": 1.7395193130853661, "grad_norm": 0.9342250823974609, "learning_rate": 4.14866154856719e-06, "loss": 0.9133, "step": 272280 }, { "epoch": 1.7395832002351046, "grad_norm": 1.3906513452529907, "learning_rate": 4.146660597931074e-06, "loss": 0.956, "step": 272290 }, { "epoch": 1.7396470873848435, "grad_norm": 0.841761589050293, "learning_rate": 4.144660109077764e-06, "loss": 0.9594, "step": 272300 }, { "epoch": 1.739710974534582, "grad_norm": 0.8125125169754028, "learning_rate": 4.142660082027383e-06, "loss": 0.9382, "step": 272310 }, { "epoch": 1.739774861684321, "grad_norm": 0.9159964919090271, "learning_rate": 4.140660516800099e-06, "loss": 0.6513, "step": 272320 }, { "epoch": 1.7398387488340594, "grad_norm": 0.679341197013855, "learning_rate": 4.138661413416034e-06, "loss": 0.8616, "step": 272330 }, { "epoch": 1.7399026359837984, "grad_norm": 1.6589670181274414, "learning_rate": 4.136662771895328e-06, "loss": 1.0038, "step": 272340 }, { "epoch": 1.7399665231335368, "grad_norm": 0.7846057415008545, "learning_rate": 4.134664592258098e-06, "loss": 0.796, "step": 272350 }, { "epoch": 1.7400304102832758, "grad_norm": 1.1777966022491455, "learning_rate": 4.132666874524482e-06, "loss": 0.8401, "step": 272360 }, { "epoch": 1.7400942974330142, "grad_norm": 0.8257417678833008, "learning_rate": 4.130669618714583e-06, "loss": 0.902, "step": 272370 }, { "epoch": 1.7401581845827532, "grad_norm": 0.881574273109436, "learning_rate": 4.1286728248485284e-06, "loss": 0.7615, "step": 272380 }, { "epoch": 1.7402220717324917, "grad_norm": 1.0708754062652588, "learning_rate": 4.126676492946418e-06, "loss": 0.7624, "step": 272390 }, { "epoch": 1.7402859588822306, "grad_norm": 0.7476529479026794, "learning_rate": 4.124680623028371e-06, "loss": 0.7909, "step": 272400 }, { "epoch": 1.740349846031969, "grad_norm": 1.2250369787216187, "learning_rate": 4.122685215114469e-06, "loss": 0.8418, "step": 272410 }, { "epoch": 1.7404137331817078, "grad_norm": 0.7783737778663635, "learning_rate": 4.120690269224825e-06, "loss": 0.7274, "step": 272420 }, { "epoch": 1.7404776203314465, "grad_norm": 1.6127846240997314, "learning_rate": 4.118695785379512e-06, "loss": 0.7691, "step": 272430 }, { "epoch": 1.7405415074811852, "grad_norm": 0.8426513075828552, "learning_rate": 4.116701763598635e-06, "loss": 0.7788, "step": 272440 }, { "epoch": 1.740605394630924, "grad_norm": 1.2168272733688354, "learning_rate": 4.114708203902268e-06, "loss": 0.7771, "step": 272450 }, { "epoch": 1.7406692817806626, "grad_norm": 1.7537342309951782, "learning_rate": 4.1127151063104714e-06, "loss": 0.9725, "step": 272460 }, { "epoch": 1.7407331689304013, "grad_norm": 3.2746481895446777, "learning_rate": 4.110722470843348e-06, "loss": 1.1022, "step": 272470 }, { "epoch": 1.74079705608014, "grad_norm": 1.014939546585083, "learning_rate": 4.108730297520935e-06, "loss": 0.725, "step": 272480 }, { "epoch": 1.7408609432298787, "grad_norm": 0.6446115374565125, "learning_rate": 4.106738586363318e-06, "loss": 0.8543, "step": 272490 }, { "epoch": 1.7409248303796174, "grad_norm": 1.045475721359253, "learning_rate": 4.1047473373905406e-06, "loss": 0.7015, "step": 272500 }, { "epoch": 1.7409887175293561, "grad_norm": 0.8331463932991028, "learning_rate": 4.102756550622672e-06, "loss": 0.6955, "step": 272510 }, { "epoch": 1.7410526046790948, "grad_norm": 0.7141439914703369, "learning_rate": 4.100766226079739e-06, "loss": 0.9134, "step": 272520 }, { "epoch": 1.7411164918288335, "grad_norm": 1.0723060369491577, "learning_rate": 4.098776363781809e-06, "loss": 0.6845, "step": 272530 }, { "epoch": 1.7411803789785723, "grad_norm": 0.7683533430099487, "learning_rate": 4.0967869637489e-06, "loss": 0.7682, "step": 272540 }, { "epoch": 1.741244266128311, "grad_norm": 0.8382824063301086, "learning_rate": 4.094798026001074e-06, "loss": 0.9829, "step": 272550 }, { "epoch": 1.7413081532780497, "grad_norm": 0.8560100197792053, "learning_rate": 4.0928095505583295e-06, "loss": 0.7741, "step": 272560 }, { "epoch": 1.7413720404277884, "grad_norm": 0.55511873960495, "learning_rate": 4.09082153744072e-06, "loss": 0.7897, "step": 272570 }, { "epoch": 1.741435927577527, "grad_norm": 0.6832825541496277, "learning_rate": 4.088833986668245e-06, "loss": 1.0163, "step": 272580 }, { "epoch": 1.7414998147272658, "grad_norm": 1.0762792825698853, "learning_rate": 4.086846898260932e-06, "loss": 0.8245, "step": 272590 }, { "epoch": 1.7415637018770045, "grad_norm": 1.5624908208847046, "learning_rate": 4.084860272238799e-06, "loss": 0.7916, "step": 272600 }, { "epoch": 1.7416275890267432, "grad_norm": 2.772987127304077, "learning_rate": 4.0828741086218365e-06, "loss": 1.0442, "step": 272610 }, { "epoch": 1.741691476176482, "grad_norm": 0.8396859169006348, "learning_rate": 4.080888407430067e-06, "loss": 0.6276, "step": 272620 }, { "epoch": 1.7417553633262206, "grad_norm": 1.0782091617584229, "learning_rate": 4.078903168683468e-06, "loss": 0.9455, "step": 272630 }, { "epoch": 1.7418192504759593, "grad_norm": 1.256385087966919, "learning_rate": 4.0769183924020456e-06, "loss": 0.5904, "step": 272640 }, { "epoch": 1.741883137625698, "grad_norm": 0.8513548970222473, "learning_rate": 4.07493407860578e-06, "loss": 0.9067, "step": 272650 }, { "epoch": 1.7419470247754365, "grad_norm": 1.0283018350601196, "learning_rate": 4.072950227314665e-06, "loss": 1.066, "step": 272660 }, { "epoch": 1.7420109119251754, "grad_norm": 0.6767253875732422, "learning_rate": 4.070966838548668e-06, "loss": 0.8141, "step": 272670 }, { "epoch": 1.742074799074914, "grad_norm": 1.193358302116394, "learning_rate": 4.0689839123277725e-06, "loss": 0.8988, "step": 272680 }, { "epoch": 1.7421386862246528, "grad_norm": 0.9188578128814697, "learning_rate": 4.067001448671942e-06, "loss": 0.6723, "step": 272690 }, { "epoch": 1.7422025733743913, "grad_norm": 1.4902043342590332, "learning_rate": 4.065019447601154e-06, "loss": 0.7255, "step": 272700 }, { "epoch": 1.7422664605241303, "grad_norm": 0.8948713541030884, "learning_rate": 4.0630379091353475e-06, "loss": 0.84, "step": 272710 }, { "epoch": 1.7423303476738687, "grad_norm": 0.7600857019424438, "learning_rate": 4.061056833294497e-06, "loss": 0.8922, "step": 272720 }, { "epoch": 1.7423942348236077, "grad_norm": 0.9479913711547852, "learning_rate": 4.05907622009854e-06, "loss": 0.7225, "step": 272730 }, { "epoch": 1.7424581219733462, "grad_norm": 0.9155439734458923, "learning_rate": 4.05709606956744e-06, "loss": 0.9039, "step": 272740 }, { "epoch": 1.742522009123085, "grad_norm": 0.7955976128578186, "learning_rate": 4.055116381721119e-06, "loss": 0.7655, "step": 272750 }, { "epoch": 1.7425858962728236, "grad_norm": 1.2719805240631104, "learning_rate": 4.053137156579534e-06, "loss": 0.8107, "step": 272760 }, { "epoch": 1.7426497834225625, "grad_norm": 1.0513930320739746, "learning_rate": 4.051158394162602e-06, "loss": 0.8821, "step": 272770 }, { "epoch": 1.742713670572301, "grad_norm": 0.9332901835441589, "learning_rate": 4.049180094490257e-06, "loss": 0.655, "step": 272780 }, { "epoch": 1.74277755772204, "grad_norm": 1.1140823364257812, "learning_rate": 4.047202257582422e-06, "loss": 1.0025, "step": 272790 }, { "epoch": 1.7428414448717784, "grad_norm": 3.2180252075195312, "learning_rate": 4.045224883459009e-06, "loss": 1.0532, "step": 272800 }, { "epoch": 1.7429053320215173, "grad_norm": 0.7645334005355835, "learning_rate": 4.043247972139946e-06, "loss": 0.8575, "step": 272810 }, { "epoch": 1.7429692191712558, "grad_norm": 1.2418649196624756, "learning_rate": 4.041271523645129e-06, "loss": 0.7803, "step": 272820 }, { "epoch": 1.7430331063209947, "grad_norm": 1.6020556688308716, "learning_rate": 4.039295537994475e-06, "loss": 0.8428, "step": 272830 }, { "epoch": 1.7430969934707332, "grad_norm": 0.8513848185539246, "learning_rate": 4.037320015207868e-06, "loss": 0.758, "step": 272840 }, { "epoch": 1.7431608806204721, "grad_norm": 1.0215321779251099, "learning_rate": 4.0353449553052205e-06, "loss": 0.7453, "step": 272850 }, { "epoch": 1.7432247677702106, "grad_norm": 0.789835512638092, "learning_rate": 4.0333703583064045e-06, "loss": 0.7061, "step": 272860 }, { "epoch": 1.7432886549199496, "grad_norm": 1.0114730596542358, "learning_rate": 4.031396224231326e-06, "loss": 0.8441, "step": 272870 }, { "epoch": 1.743352542069688, "grad_norm": 0.5644866228103638, "learning_rate": 4.029422553099843e-06, "loss": 0.8071, "step": 272880 }, { "epoch": 1.743416429219427, "grad_norm": 0.9736144542694092, "learning_rate": 4.027449344931861e-06, "loss": 0.9542, "step": 272890 }, { "epoch": 1.7434803163691655, "grad_norm": 1.1532905101776123, "learning_rate": 4.025476599747225e-06, "loss": 0.5613, "step": 272900 }, { "epoch": 1.7435442035189042, "grad_norm": 1.4434239864349365, "learning_rate": 4.023504317565818e-06, "loss": 0.6643, "step": 272910 }, { "epoch": 1.7436080906686429, "grad_norm": 1.9808419942855835, "learning_rate": 4.0215324984074935e-06, "loss": 0.7913, "step": 272920 }, { "epoch": 1.7436719778183816, "grad_norm": 0.6124402284622192, "learning_rate": 4.019561142292122e-06, "loss": 0.8837, "step": 272930 }, { "epoch": 1.7437358649681203, "grad_norm": 1.0098079442977905, "learning_rate": 4.017590249239544e-06, "loss": 0.8274, "step": 272940 }, { "epoch": 1.743799752117859, "grad_norm": 1.0427857637405396, "learning_rate": 4.015619819269612e-06, "loss": 0.9455, "step": 272950 }, { "epoch": 1.7438636392675977, "grad_norm": 1.9890084266662598, "learning_rate": 4.013649852402174e-06, "loss": 0.631, "step": 272960 }, { "epoch": 1.7439275264173364, "grad_norm": 0.7183147668838501, "learning_rate": 4.011680348657054e-06, "loss": 0.9754, "step": 272970 }, { "epoch": 1.743991413567075, "grad_norm": 1.2257187366485596, "learning_rate": 4.009711308054115e-06, "loss": 1.1979, "step": 272980 }, { "epoch": 1.7440553007168138, "grad_norm": 0.5666488409042358, "learning_rate": 4.0077427306131565e-06, "loss": 0.6182, "step": 272990 }, { "epoch": 1.7441191878665525, "grad_norm": 1.052765130996704, "learning_rate": 4.00577461635403e-06, "loss": 0.9402, "step": 273000 }, { "epoch": 1.7441830750162912, "grad_norm": 0.763070821762085, "learning_rate": 4.00380696529653e-06, "loss": 0.6844, "step": 273010 }, { "epoch": 1.74424696216603, "grad_norm": 1.1995108127593994, "learning_rate": 4.001839777460498e-06, "loss": 1.0478, "step": 273020 }, { "epoch": 1.7443108493157686, "grad_norm": 0.8796985745429993, "learning_rate": 3.999873052865727e-06, "loss": 0.818, "step": 273030 }, { "epoch": 1.7443747364655073, "grad_norm": 0.7032323479652405, "learning_rate": 3.997906791532036e-06, "loss": 0.8984, "step": 273040 }, { "epoch": 1.744438623615246, "grad_norm": 1.2603150606155396, "learning_rate": 3.995940993479208e-06, "loss": 0.7287, "step": 273050 }, { "epoch": 1.7445025107649847, "grad_norm": 0.902034342288971, "learning_rate": 3.9939756587270675e-06, "loss": 1.0164, "step": 273060 }, { "epoch": 1.7445663979147235, "grad_norm": 1.0966947078704834, "learning_rate": 3.992010787295386e-06, "loss": 0.916, "step": 273070 }, { "epoch": 1.7446302850644622, "grad_norm": 1.2284424304962158, "learning_rate": 3.990046379203965e-06, "loss": 0.9632, "step": 273080 }, { "epoch": 1.7446941722142009, "grad_norm": 1.0536028146743774, "learning_rate": 3.988082434472573e-06, "loss": 0.8907, "step": 273090 }, { "epoch": 1.7447580593639396, "grad_norm": 1.0629215240478516, "learning_rate": 3.986118953121004e-06, "loss": 0.855, "step": 273100 }, { "epoch": 1.7448219465136783, "grad_norm": 1.213989496231079, "learning_rate": 3.984155935169015e-06, "loss": 0.9258, "step": 273110 }, { "epoch": 1.744885833663417, "grad_norm": 0.9842742681503296, "learning_rate": 3.98219338063639e-06, "loss": 1.0247, "step": 273120 }, { "epoch": 1.7449497208131557, "grad_norm": 1.209633469581604, "learning_rate": 3.9802312895428904e-06, "loss": 0.9219, "step": 273130 }, { "epoch": 1.7450136079628944, "grad_norm": 1.1460846662521362, "learning_rate": 3.978269661908268e-06, "loss": 0.9312, "step": 273140 }, { "epoch": 1.7450774951126329, "grad_norm": 0.75315260887146, "learning_rate": 3.976308497752296e-06, "loss": 0.9829, "step": 273150 }, { "epoch": 1.7451413822623718, "grad_norm": 1.920297622680664, "learning_rate": 3.9743477970947075e-06, "loss": 1.0939, "step": 273160 }, { "epoch": 1.7452052694121103, "grad_norm": 1.0004301071166992, "learning_rate": 3.97238755995526e-06, "loss": 0.8376, "step": 273170 }, { "epoch": 1.7452691565618492, "grad_norm": 1.2894623279571533, "learning_rate": 3.970427786353681e-06, "loss": 0.8547, "step": 273180 }, { "epoch": 1.7453330437115877, "grad_norm": 2.6042747497558594, "learning_rate": 3.9684684763097225e-06, "loss": 0.7822, "step": 273190 }, { "epoch": 1.7453969308613266, "grad_norm": 0.6897473931312561, "learning_rate": 3.966509629843101e-06, "loss": 0.8511, "step": 273200 }, { "epoch": 1.7454608180110651, "grad_norm": 0.7066933512687683, "learning_rate": 3.964551246973558e-06, "loss": 1.0343, "step": 273210 }, { "epoch": 1.745524705160804, "grad_norm": 1.0931143760681152, "learning_rate": 3.962593327720809e-06, "loss": 0.9285, "step": 273220 }, { "epoch": 1.7455885923105425, "grad_norm": 0.878592312335968, "learning_rate": 3.960635872104573e-06, "loss": 1.004, "step": 273230 }, { "epoch": 1.7456524794602815, "grad_norm": 3.0393524169921875, "learning_rate": 3.958678880144562e-06, "loss": 0.9157, "step": 273240 }, { "epoch": 1.74571636661002, "grad_norm": 0.7381773591041565, "learning_rate": 3.956722351860492e-06, "loss": 0.7609, "step": 273250 }, { "epoch": 1.7457802537597589, "grad_norm": 1.044517993927002, "learning_rate": 3.954766287272055e-06, "loss": 0.9149, "step": 273260 }, { "epoch": 1.7458441409094974, "grad_norm": 1.0228127241134644, "learning_rate": 3.9528106863989615e-06, "loss": 0.7727, "step": 273270 }, { "epoch": 1.7459080280592363, "grad_norm": 0.7310999631881714, "learning_rate": 3.950855549260896e-06, "loss": 0.6429, "step": 273280 }, { "epoch": 1.7459719152089748, "grad_norm": 0.7440299987792969, "learning_rate": 3.9489008758775595e-06, "loss": 0.7806, "step": 273290 }, { "epoch": 1.7460358023587137, "grad_norm": 1.0097001791000366, "learning_rate": 3.9469466662686195e-06, "loss": 0.878, "step": 273300 }, { "epoch": 1.7460996895084522, "grad_norm": 0.48892977833747864, "learning_rate": 3.944992920453783e-06, "loss": 0.6629, "step": 273310 }, { "epoch": 1.746163576658191, "grad_norm": 0.8091384768486023, "learning_rate": 3.943039638452694e-06, "loss": 0.8756, "step": 273320 }, { "epoch": 1.7462274638079296, "grad_norm": 1.1609203815460205, "learning_rate": 3.941086820285051e-06, "loss": 0.9226, "step": 273330 }, { "epoch": 1.7462913509576685, "grad_norm": 0.7286596298217773, "learning_rate": 3.939134465970512e-06, "loss": 0.8502, "step": 273340 }, { "epoch": 1.746355238107407, "grad_norm": 0.649346649646759, "learning_rate": 3.937182575528731e-06, "loss": 0.7657, "step": 273350 }, { "epoch": 1.746419125257146, "grad_norm": 1.26242995262146, "learning_rate": 3.935231148979379e-06, "loss": 0.6829, "step": 273360 }, { "epoch": 1.7464830124068844, "grad_norm": 0.7780110239982605, "learning_rate": 3.933280186342092e-06, "loss": 0.8697, "step": 273370 }, { "epoch": 1.7465468995566233, "grad_norm": 1.1589394807815552, "learning_rate": 3.9313296876365365e-06, "loss": 0.9451, "step": 273380 }, { "epoch": 1.7466107867063618, "grad_norm": 1.0762748718261719, "learning_rate": 3.929379652882337e-06, "loss": 1.202, "step": 273390 }, { "epoch": 1.7466746738561005, "grad_norm": 0.7835119962692261, "learning_rate": 3.927430082099154e-06, "loss": 0.818, "step": 273400 }, { "epoch": 1.7467385610058392, "grad_norm": 0.9623402953147888, "learning_rate": 3.925480975306595e-06, "loss": 0.8321, "step": 273410 }, { "epoch": 1.746802448155578, "grad_norm": 0.8765859603881836, "learning_rate": 3.92353233252431e-06, "loss": 1.0047, "step": 273420 }, { "epoch": 1.7468663353053167, "grad_norm": 1.7282193899154663, "learning_rate": 3.9215841537719175e-06, "loss": 0.7416, "step": 273430 }, { "epoch": 1.7469302224550554, "grad_norm": 0.9430720210075378, "learning_rate": 3.91963643906903e-06, "loss": 0.7672, "step": 273440 }, { "epoch": 1.746994109604794, "grad_norm": 0.8017761707305908, "learning_rate": 3.917689188435275e-06, "loss": 0.8316, "step": 273450 }, { "epoch": 1.7470579967545328, "grad_norm": 0.7006866335868835, "learning_rate": 3.915742401890249e-06, "loss": 0.748, "step": 273460 }, { "epoch": 1.7471218839042715, "grad_norm": 0.8986942768096924, "learning_rate": 3.913796079453575e-06, "loss": 0.997, "step": 273470 }, { "epoch": 1.7471857710540102, "grad_norm": 1.1999050378799438, "learning_rate": 3.911850221144836e-06, "loss": 1.0847, "step": 273480 }, { "epoch": 1.747249658203749, "grad_norm": 1.0616730451583862, "learning_rate": 3.909904826983646e-06, "loss": 0.8245, "step": 273490 }, { "epoch": 1.7473135453534876, "grad_norm": 0.9581630825996399, "learning_rate": 3.907959896989577e-06, "loss": 0.6649, "step": 273500 }, { "epoch": 1.7473774325032263, "grad_norm": 0.7028853297233582, "learning_rate": 3.906015431182242e-06, "loss": 0.7376, "step": 273510 }, { "epoch": 1.747441319652965, "grad_norm": 0.8302884101867676, "learning_rate": 3.904071429581191e-06, "loss": 1.0715, "step": 273520 }, { "epoch": 1.7475052068027037, "grad_norm": 1.7672325372695923, "learning_rate": 3.902127892206037e-06, "loss": 1.0683, "step": 273530 }, { "epoch": 1.7475690939524424, "grad_norm": 1.1879684925079346, "learning_rate": 3.900184819076319e-06, "loss": 0.7099, "step": 273540 }, { "epoch": 1.7476329811021811, "grad_norm": 1.1202888488769531, "learning_rate": 3.8982422102116335e-06, "loss": 0.751, "step": 273550 }, { "epoch": 1.7476968682519198, "grad_norm": 1.224502682685852, "learning_rate": 3.896300065631525e-06, "loss": 0.8411, "step": 273560 }, { "epoch": 1.7477607554016585, "grad_norm": 1.0552843809127808, "learning_rate": 3.894358385355568e-06, "loss": 1.0771, "step": 273570 }, { "epoch": 1.7478246425513972, "grad_norm": 0.961863100528717, "learning_rate": 3.8924171694033005e-06, "loss": 0.7817, "step": 273580 }, { "epoch": 1.747888529701136, "grad_norm": 0.8923673629760742, "learning_rate": 3.890476417794286e-06, "loss": 0.9693, "step": 273590 }, { "epoch": 1.7479524168508747, "grad_norm": 0.7879084944725037, "learning_rate": 3.888536130548065e-06, "loss": 0.9674, "step": 273600 }, { "epoch": 1.7480163040006134, "grad_norm": 1.2090606689453125, "learning_rate": 3.886596307684176e-06, "loss": 0.8197, "step": 273610 }, { "epoch": 1.748080191150352, "grad_norm": 1.116636037826538, "learning_rate": 3.884656949222154e-06, "loss": 1.0655, "step": 273620 }, { "epoch": 1.7481440783000908, "grad_norm": 0.9606205821037292, "learning_rate": 3.882718055181539e-06, "loss": 0.7936, "step": 273630 }, { "epoch": 1.7482079654498293, "grad_norm": 1.0061858892440796, "learning_rate": 3.880779625581837e-06, "loss": 0.9665, "step": 273640 }, { "epoch": 1.7482718525995682, "grad_norm": 1.251664638519287, "learning_rate": 3.878841660442589e-06, "loss": 0.843, "step": 273650 }, { "epoch": 1.7483357397493067, "grad_norm": 1.0400848388671875, "learning_rate": 3.876904159783312e-06, "loss": 1.1301, "step": 273660 }, { "epoch": 1.7483996268990456, "grad_norm": 0.9120842218399048, "learning_rate": 3.874967123623502e-06, "loss": 0.8551, "step": 273670 }, { "epoch": 1.748463514048784, "grad_norm": 0.9632731676101685, "learning_rate": 3.873030551982687e-06, "loss": 0.7669, "step": 273680 }, { "epoch": 1.748527401198523, "grad_norm": 0.9888166785240173, "learning_rate": 3.8710944448803525e-06, "loss": 0.7613, "step": 273690 }, { "epoch": 1.7485912883482615, "grad_norm": 0.7286546230316162, "learning_rate": 3.869158802336015e-06, "loss": 0.7758, "step": 273700 }, { "epoch": 1.7486551754980004, "grad_norm": 0.7055147886276245, "learning_rate": 3.867223624369143e-06, "loss": 1.0643, "step": 273710 }, { "epoch": 1.748719062647739, "grad_norm": 1.2502223253250122, "learning_rate": 3.865288910999254e-06, "loss": 0.8506, "step": 273720 }, { "epoch": 1.7487829497974778, "grad_norm": 1.2963123321533203, "learning_rate": 3.863354662245805e-06, "loss": 0.9619, "step": 273730 }, { "epoch": 1.7488468369472163, "grad_norm": 1.1647197008132935, "learning_rate": 3.861420878128296e-06, "loss": 0.7913, "step": 273740 }, { "epoch": 1.7489107240969552, "grad_norm": 1.0641111135482788, "learning_rate": 3.8594875586661914e-06, "loss": 0.7081, "step": 273750 }, { "epoch": 1.7489746112466937, "grad_norm": 1.3449046611785889, "learning_rate": 3.8575547038789685e-06, "loss": 0.7491, "step": 273760 }, { "epoch": 1.7490384983964327, "grad_norm": 0.7886676788330078, "learning_rate": 3.855622313786078e-06, "loss": 0.7877, "step": 273770 }, { "epoch": 1.7491023855461711, "grad_norm": 1.1052894592285156, "learning_rate": 3.853690388407006e-06, "loss": 0.7696, "step": 273780 }, { "epoch": 1.74916627269591, "grad_norm": 0.8340318202972412, "learning_rate": 3.851758927761178e-06, "loss": 0.7721, "step": 273790 }, { "epoch": 1.7492301598456486, "grad_norm": 2.4856231212615967, "learning_rate": 3.849827931868077e-06, "loss": 0.9347, "step": 273800 }, { "epoch": 1.7492940469953875, "grad_norm": 0.927199125289917, "learning_rate": 3.8478974007471245e-06, "loss": 1.0155, "step": 273810 }, { "epoch": 1.749357934145126, "grad_norm": 1.527815580368042, "learning_rate": 3.845967334417777e-06, "loss": 0.9916, "step": 273820 }, { "epoch": 1.749421821294865, "grad_norm": 0.7958928942680359, "learning_rate": 3.8440377328994635e-06, "loss": 0.8657, "step": 273830 }, { "epoch": 1.7494857084446034, "grad_norm": 1.13398015499115, "learning_rate": 3.842108596211619e-06, "loss": 0.9195, "step": 273840 }, { "epoch": 1.7495495955943423, "grad_norm": 0.7874863147735596, "learning_rate": 3.840179924373677e-06, "loss": 0.8805, "step": 273850 }, { "epoch": 1.7496134827440808, "grad_norm": 1.1453686952590942, "learning_rate": 3.838251717405056e-06, "loss": 0.8999, "step": 273860 }, { "epoch": 1.7496773698938197, "grad_norm": 0.9717140197753906, "learning_rate": 3.836323975325179e-06, "loss": 0.8969, "step": 273870 }, { "epoch": 1.7497412570435582, "grad_norm": 0.8356024026870728, "learning_rate": 3.834396698153453e-06, "loss": 0.876, "step": 273880 }, { "epoch": 1.749805144193297, "grad_norm": 0.8037473559379578, "learning_rate": 3.832469885909296e-06, "loss": 0.8511, "step": 273890 }, { "epoch": 1.7498690313430356, "grad_norm": 0.9532538652420044, "learning_rate": 3.830543538612103e-06, "loss": 0.8685, "step": 273900 }, { "epoch": 1.7499329184927743, "grad_norm": 1.1712671518325806, "learning_rate": 3.828617656281292e-06, "loss": 1.0234, "step": 273910 }, { "epoch": 1.749996805642513, "grad_norm": 1.6563823223114014, "learning_rate": 3.82669223893623e-06, "loss": 0.809, "step": 273920 }, { "epoch": 1.7500606927922517, "grad_norm": 1.1994155645370483, "learning_rate": 3.824767286596331e-06, "loss": 0.7852, "step": 273930 }, { "epoch": 1.7501245799419904, "grad_norm": 0.8417662382125854, "learning_rate": 3.8228427992809625e-06, "loss": 0.8903, "step": 273940 }, { "epoch": 1.7501884670917291, "grad_norm": 0.8921682238578796, "learning_rate": 3.820918777009525e-06, "loss": 1.0056, "step": 273950 }, { "epoch": 1.7502523542414679, "grad_norm": 0.6824707984924316, "learning_rate": 3.81899521980138e-06, "loss": 0.8528, "step": 273960 }, { "epoch": 1.7503162413912066, "grad_norm": 0.944797933101654, "learning_rate": 3.817072127675902e-06, "loss": 0.9613, "step": 273970 }, { "epoch": 1.7503801285409453, "grad_norm": 0.9320737719535828, "learning_rate": 3.815149500652471e-06, "loss": 1.1001, "step": 273980 }, { "epoch": 1.750444015690684, "grad_norm": 0.78386390209198, "learning_rate": 3.81322733875043e-06, "loss": 1.0468, "step": 273990 }, { "epoch": 1.7505079028404227, "grad_norm": 1.0317583084106445, "learning_rate": 3.8113056419891603e-06, "loss": 0.9644, "step": 274000 }, { "epoch": 1.7505717899901614, "grad_norm": 1.138235330581665, "learning_rate": 3.8093844103879893e-06, "loss": 0.8534, "step": 274010 }, { "epoch": 1.7506356771399, "grad_norm": 0.7695803642272949, "learning_rate": 3.8074636439662913e-06, "loss": 0.8002, "step": 274020 }, { "epoch": 1.7506995642896388, "grad_norm": 1.170741081237793, "learning_rate": 3.8055433427433838e-06, "loss": 1.1794, "step": 274030 }, { "epoch": 1.7507634514393775, "grad_norm": 0.96340411901474, "learning_rate": 3.8036235067386295e-06, "loss": 0.7233, "step": 274040 }, { "epoch": 1.7508273385891162, "grad_norm": 1.1876211166381836, "learning_rate": 3.80170413597134e-06, "loss": 0.8274, "step": 274050 }, { "epoch": 1.750891225738855, "grad_norm": 0.652173638343811, "learning_rate": 3.7997852304608673e-06, "loss": 0.923, "step": 274060 }, { "epoch": 1.7509551128885936, "grad_norm": 0.9098193645477295, "learning_rate": 3.7978667902265175e-06, "loss": 0.813, "step": 274070 }, { "epoch": 1.7510190000383323, "grad_norm": 2.1511783599853516, "learning_rate": 3.795948815287631e-06, "loss": 0.721, "step": 274080 }, { "epoch": 1.751082887188071, "grad_norm": 1.1116671562194824, "learning_rate": 3.794031305663498e-06, "loss": 1.0658, "step": 274090 }, { "epoch": 1.7511467743378097, "grad_norm": 1.0649752616882324, "learning_rate": 3.7921142613734586e-06, "loss": 0.8786, "step": 274100 }, { "epoch": 1.7512106614875484, "grad_norm": 1.5795048475265503, "learning_rate": 3.790197682436791e-06, "loss": 0.915, "step": 274110 }, { "epoch": 1.7512745486372872, "grad_norm": 1.1178737878799438, "learning_rate": 3.7882815688728203e-06, "loss": 0.6936, "step": 274120 }, { "epoch": 1.7513384357870256, "grad_norm": 1.1777743101119995, "learning_rate": 3.786365920700824e-06, "loss": 0.8257, "step": 274130 }, { "epoch": 1.7514023229367646, "grad_norm": 0.7389242649078369, "learning_rate": 3.784450737940115e-06, "loss": 0.9016, "step": 274140 }, { "epoch": 1.751466210086503, "grad_norm": 0.9251654744148254, "learning_rate": 3.7825360206099556e-06, "loss": 0.6585, "step": 274150 }, { "epoch": 1.751530097236242, "grad_norm": 1.0728000402450562, "learning_rate": 3.780621768729642e-06, "loss": 0.8943, "step": 274160 }, { "epoch": 1.7515939843859805, "grad_norm": 1.1792503595352173, "learning_rate": 3.7787079823184635e-06, "loss": 0.724, "step": 274170 }, { "epoch": 1.7516578715357194, "grad_norm": 0.9268394708633423, "learning_rate": 3.7767946613956775e-06, "loss": 0.8984, "step": 274180 }, { "epoch": 1.7517217586854579, "grad_norm": 0.8403618931770325, "learning_rate": 3.774881805980562e-06, "loss": 0.9804, "step": 274190 }, { "epoch": 1.7517856458351968, "grad_norm": 1.1892706155776978, "learning_rate": 3.77296941609237e-06, "loss": 0.9623, "step": 274200 }, { "epoch": 1.7518495329849353, "grad_norm": 0.9425559639930725, "learning_rate": 3.7710574917503736e-06, "loss": 0.7345, "step": 274210 }, { "epoch": 1.7519134201346742, "grad_norm": 0.986301064491272, "learning_rate": 3.769146032973819e-06, "loss": 0.7878, "step": 274220 }, { "epoch": 1.7519773072844127, "grad_norm": 1.136167049407959, "learning_rate": 3.7672350397819633e-06, "loss": 0.7139, "step": 274230 }, { "epoch": 1.7520411944341516, "grad_norm": 1.2224135398864746, "learning_rate": 3.7653245121940406e-06, "loss": 0.9626, "step": 274240 }, { "epoch": 1.7521050815838901, "grad_norm": 1.4254239797592163, "learning_rate": 3.763414450229308e-06, "loss": 0.8709, "step": 274250 }, { "epoch": 1.752168968733629, "grad_norm": 1.0309650897979736, "learning_rate": 3.761504853906983e-06, "loss": 0.8792, "step": 274260 }, { "epoch": 1.7522328558833675, "grad_norm": 1.1323344707489014, "learning_rate": 3.7595957232463174e-06, "loss": 1.0123, "step": 274270 }, { "epoch": 1.7522967430331065, "grad_norm": 0.822934091091156, "learning_rate": 3.7576870582665125e-06, "loss": 0.6937, "step": 274280 }, { "epoch": 1.752360630182845, "grad_norm": 1.0567173957824707, "learning_rate": 3.755778858986819e-06, "loss": 0.741, "step": 274290 }, { "epoch": 1.7524245173325839, "grad_norm": 0.784637451171875, "learning_rate": 3.753871125426428e-06, "loss": 0.7805, "step": 274300 }, { "epoch": 1.7524884044823223, "grad_norm": 1.0519686937332153, "learning_rate": 3.751963857604568e-06, "loss": 0.8545, "step": 274310 }, { "epoch": 1.7525522916320613, "grad_norm": 0.5889614224433899, "learning_rate": 3.75005705554044e-06, "loss": 1.0286, "step": 274320 }, { "epoch": 1.7526161787817998, "grad_norm": 0.8073184490203857, "learning_rate": 3.7481507192532574e-06, "loss": 0.8111, "step": 274330 }, { "epoch": 1.7526800659315387, "grad_norm": 0.8798998594284058, "learning_rate": 3.7462448487621982e-06, "loss": 1.0912, "step": 274340 }, { "epoch": 1.7527439530812772, "grad_norm": 0.648949384689331, "learning_rate": 3.7443394440864755e-06, "loss": 0.7207, "step": 274350 }, { "epoch": 1.7528078402310159, "grad_norm": 1.0135072469711304, "learning_rate": 3.742434505245268e-06, "loss": 0.9531, "step": 274360 }, { "epoch": 1.7528717273807546, "grad_norm": 0.8730331659317017, "learning_rate": 3.7405300322577607e-06, "loss": 0.6867, "step": 274370 }, { "epoch": 1.7529356145304933, "grad_norm": 1.0686019659042358, "learning_rate": 3.7386260251431494e-06, "loss": 0.8929, "step": 274380 }, { "epoch": 1.752999501680232, "grad_norm": 1.0393273830413818, "learning_rate": 3.7367224839205796e-06, "loss": 0.7427, "step": 274390 }, { "epoch": 1.7530633888299707, "grad_norm": 0.8225563764572144, "learning_rate": 3.734819408609258e-06, "loss": 0.8183, "step": 274400 }, { "epoch": 1.7531272759797094, "grad_norm": 1.018921971321106, "learning_rate": 3.7329167992283143e-06, "loss": 0.8351, "step": 274410 }, { "epoch": 1.7531911631294481, "grad_norm": 0.7754051685333252, "learning_rate": 3.731014655796933e-06, "loss": 0.6592, "step": 274420 }, { "epoch": 1.7532550502791868, "grad_norm": 0.7325241565704346, "learning_rate": 3.729112978334254e-06, "loss": 0.8715, "step": 274430 }, { "epoch": 1.7533189374289255, "grad_norm": 0.9066896438598633, "learning_rate": 3.727211766859445e-06, "loss": 0.7465, "step": 274440 }, { "epoch": 1.7533828245786642, "grad_norm": 0.7925408482551575, "learning_rate": 3.7253110213916365e-06, "loss": 0.7522, "step": 274450 }, { "epoch": 1.753446711728403, "grad_norm": 1.0522191524505615, "learning_rate": 3.72341074194999e-06, "loss": 0.9043, "step": 274460 }, { "epoch": 1.7535105988781416, "grad_norm": 0.9948369860649109, "learning_rate": 3.721510928553623e-06, "loss": 0.954, "step": 274470 }, { "epoch": 1.7535744860278804, "grad_norm": 0.7237744927406311, "learning_rate": 3.719611581221688e-06, "loss": 0.5447, "step": 274480 }, { "epoch": 1.753638373177619, "grad_norm": 0.5895835161209106, "learning_rate": 3.7177126999732913e-06, "loss": 0.7027, "step": 274490 }, { "epoch": 1.7537022603273578, "grad_norm": 0.9909476041793823, "learning_rate": 3.715814284827568e-06, "loss": 0.6463, "step": 274500 }, { "epoch": 1.7537661474770965, "grad_norm": 0.7529990673065186, "learning_rate": 3.7139163358036464e-06, "loss": 0.8121, "step": 274510 }, { "epoch": 1.7538300346268352, "grad_norm": 0.7453489899635315, "learning_rate": 3.712018852920618e-06, "loss": 0.6555, "step": 274520 }, { "epoch": 1.7538939217765739, "grad_norm": 2.500941753387451, "learning_rate": 3.7101218361976165e-06, "loss": 0.9852, "step": 274530 }, { "epoch": 1.7539578089263126, "grad_norm": 1.3202393054962158, "learning_rate": 3.708225285653727e-06, "loss": 0.829, "step": 274540 }, { "epoch": 1.7540216960760513, "grad_norm": 1.2711588144302368, "learning_rate": 3.706329201308062e-06, "loss": 1.165, "step": 274550 }, { "epoch": 1.75408558322579, "grad_norm": 1.3019556999206543, "learning_rate": 3.704433583179706e-06, "loss": 0.9572, "step": 274560 }, { "epoch": 1.7541494703755287, "grad_norm": 1.163252353668213, "learning_rate": 3.7025384312877607e-06, "loss": 0.8862, "step": 274570 }, { "epoch": 1.7542133575252674, "grad_norm": 1.1229562759399414, "learning_rate": 3.7006437456513e-06, "loss": 0.8069, "step": 274580 }, { "epoch": 1.7542772446750061, "grad_norm": 1.851560354232788, "learning_rate": 3.698749526289419e-06, "loss": 1.021, "step": 274590 }, { "epoch": 1.7543411318247448, "grad_norm": 0.8247557282447815, "learning_rate": 3.6968557732211752e-06, "loss": 0.8604, "step": 274600 }, { "epoch": 1.7544050189744835, "grad_norm": 0.7716491222381592, "learning_rate": 3.694962486465664e-06, "loss": 0.9591, "step": 274610 }, { "epoch": 1.754468906124222, "grad_norm": 2.111315965652466, "learning_rate": 3.6930696660419317e-06, "loss": 1.0666, "step": 274620 }, { "epoch": 1.754532793273961, "grad_norm": 2.500415086746216, "learning_rate": 3.6911773119690572e-06, "loss": 0.988, "step": 274630 }, { "epoch": 1.7545966804236994, "grad_norm": 1.3053700923919678, "learning_rate": 3.689285424266081e-06, "loss": 0.9789, "step": 274640 }, { "epoch": 1.7546605675734384, "grad_norm": 0.8793125152587891, "learning_rate": 3.687394002952077e-06, "loss": 1.0954, "step": 274650 }, { "epoch": 1.7547244547231768, "grad_norm": 0.8418011665344238, "learning_rate": 3.6855030480460686e-06, "loss": 0.8198, "step": 274660 }, { "epoch": 1.7547883418729158, "grad_norm": 1.2235418558120728, "learning_rate": 3.6836125595671234e-06, "loss": 0.7422, "step": 274670 }, { "epoch": 1.7548522290226543, "grad_norm": 0.7017014026641846, "learning_rate": 3.6817225375342603e-06, "loss": 0.7226, "step": 274680 }, { "epoch": 1.7549161161723932, "grad_norm": 1.1416294574737549, "learning_rate": 3.679832981966519e-06, "loss": 0.7806, "step": 274690 }, { "epoch": 1.7549800033221317, "grad_norm": 1.0321959257125854, "learning_rate": 3.6779438928829403e-06, "loss": 1.0022, "step": 274700 }, { "epoch": 1.7550438904718706, "grad_norm": 0.8865734338760376, "learning_rate": 3.6760552703025364e-06, "loss": 1.2421, "step": 274710 }, { "epoch": 1.755107777621609, "grad_norm": 0.9447980523109436, "learning_rate": 3.674167114244342e-06, "loss": 0.9127, "step": 274720 }, { "epoch": 1.755171664771348, "grad_norm": 0.7748607397079468, "learning_rate": 3.6722794247273483e-06, "loss": 0.7871, "step": 274730 }, { "epoch": 1.7552355519210865, "grad_norm": 0.8334684371948242, "learning_rate": 3.6703922017705895e-06, "loss": 0.8679, "step": 274740 }, { "epoch": 1.7552994390708254, "grad_norm": 1.1279202699661255, "learning_rate": 3.6685054453930558e-06, "loss": 0.7924, "step": 274750 }, { "epoch": 1.755363326220564, "grad_norm": 0.8488116264343262, "learning_rate": 3.666619155613765e-06, "loss": 0.8025, "step": 274760 }, { "epoch": 1.7554272133703028, "grad_norm": 0.7714635729789734, "learning_rate": 3.664733332451692e-06, "loss": 0.7927, "step": 274770 }, { "epoch": 1.7554911005200413, "grad_norm": 0.9155358672142029, "learning_rate": 3.6628479759258485e-06, "loss": 0.8599, "step": 274780 }, { "epoch": 1.7555549876697802, "grad_norm": 0.6552935838699341, "learning_rate": 3.6609630860552023e-06, "loss": 1.0248, "step": 274790 }, { "epoch": 1.7556188748195187, "grad_norm": 1.1390666961669922, "learning_rate": 3.6590786628587615e-06, "loss": 0.9757, "step": 274800 }, { "epoch": 1.7556827619692577, "grad_norm": 0.7530357837677002, "learning_rate": 3.657194706355477e-06, "loss": 0.7806, "step": 274810 }, { "epoch": 1.7557466491189961, "grad_norm": 0.8142558932304382, "learning_rate": 3.6553112165643387e-06, "loss": 0.9117, "step": 274820 }, { "epoch": 1.755810536268735, "grad_norm": 1.016108751296997, "learning_rate": 3.65342819350431e-06, "loss": 0.8177, "step": 274830 }, { "epoch": 1.7558744234184736, "grad_norm": 1.1278547048568726, "learning_rate": 3.651545637194359e-06, "loss": 0.7878, "step": 274840 }, { "epoch": 1.7559383105682123, "grad_norm": 1.5448342561721802, "learning_rate": 3.6496635476534313e-06, "loss": 0.7154, "step": 274850 }, { "epoch": 1.756002197717951, "grad_norm": 1.060778260231018, "learning_rate": 3.6477819249005007e-06, "loss": 0.9649, "step": 274860 }, { "epoch": 1.7560660848676897, "grad_norm": 1.027547836303711, "learning_rate": 3.645900768954497e-06, "loss": 1.0678, "step": 274870 }, { "epoch": 1.7561299720174284, "grad_norm": 0.8141332268714905, "learning_rate": 3.6440200798343815e-06, "loss": 0.9652, "step": 274880 }, { "epoch": 1.756193859167167, "grad_norm": 0.9573503136634827, "learning_rate": 3.6421398575590903e-06, "loss": 0.9573, "step": 274890 }, { "epoch": 1.7562577463169058, "grad_norm": 1.0918097496032715, "learning_rate": 3.640260102147547e-06, "loss": 0.9735, "step": 274900 }, { "epoch": 1.7563216334666445, "grad_norm": 1.0628275871276855, "learning_rate": 3.638380813618697e-06, "loss": 0.8058, "step": 274910 }, { "epoch": 1.7563855206163832, "grad_norm": 1.0889729261398315, "learning_rate": 3.6365019919914533e-06, "loss": 1.0859, "step": 274920 }, { "epoch": 1.756449407766122, "grad_norm": 1.5826724767684937, "learning_rate": 3.6346236372847564e-06, "loss": 0.7498, "step": 274930 }, { "epoch": 1.7565132949158606, "grad_norm": 0.85593581199646, "learning_rate": 3.632745749517502e-06, "loss": 0.9096, "step": 274940 }, { "epoch": 1.7565771820655993, "grad_norm": 1.0470643043518066, "learning_rate": 3.6308683287086143e-06, "loss": 0.8088, "step": 274950 }, { "epoch": 1.756641069215338, "grad_norm": 0.9474973678588867, "learning_rate": 3.6289913748769944e-06, "loss": 0.816, "step": 274960 }, { "epoch": 1.7567049563650767, "grad_norm": 0.6857970356941223, "learning_rate": 3.627114888041555e-06, "loss": 0.9826, "step": 274970 }, { "epoch": 1.7567688435148154, "grad_norm": 0.4515606760978699, "learning_rate": 3.6252388682211815e-06, "loss": 0.9802, "step": 274980 }, { "epoch": 1.7568327306645541, "grad_norm": 1.347015380859375, "learning_rate": 3.6233633154347747e-06, "loss": 0.9361, "step": 274990 }, { "epoch": 1.7568966178142928, "grad_norm": 1.2522832155227661, "learning_rate": 3.62148822970122e-06, "loss": 0.6884, "step": 275000 }, { "epoch": 1.7569605049640316, "grad_norm": 0.6121107935905457, "learning_rate": 3.6196136110393963e-06, "loss": 0.57, "step": 275010 }, { "epoch": 1.7570243921137703, "grad_norm": 0.9248584508895874, "learning_rate": 3.6177394594682e-06, "loss": 0.8776, "step": 275020 }, { "epoch": 1.757088279263509, "grad_norm": 2.5324671268463135, "learning_rate": 3.6158657750064828e-06, "loss": 0.6613, "step": 275030 }, { "epoch": 1.7571521664132477, "grad_norm": 2.7908589839935303, "learning_rate": 3.613992557673135e-06, "loss": 1.0586, "step": 275040 }, { "epoch": 1.7572160535629864, "grad_norm": 0.8149098753929138, "learning_rate": 3.612119807487002e-06, "loss": 0.747, "step": 275050 }, { "epoch": 1.757279940712725, "grad_norm": 0.5843006372451782, "learning_rate": 3.610247524466959e-06, "loss": 0.8862, "step": 275060 }, { "epoch": 1.7573438278624638, "grad_norm": 1.139170527458191, "learning_rate": 3.608375708631856e-06, "loss": 0.7829, "step": 275070 }, { "epoch": 1.7574077150122025, "grad_norm": 1.3390437364578247, "learning_rate": 3.6065043600005453e-06, "loss": 0.8041, "step": 275080 }, { "epoch": 1.757471602161941, "grad_norm": 0.9355593919754028, "learning_rate": 3.604633478591868e-06, "loss": 0.6621, "step": 275090 }, { "epoch": 1.75753548931168, "grad_norm": NaN, "learning_rate": 3.6029500848149933e-06, "loss": 0.9683, "step": 275100 }, { "epoch": 1.7575993764614184, "grad_norm": 0.6551210880279541, "learning_rate": 3.6010800911812303e-06, "loss": 0.92, "step": 275110 }, { "epoch": 1.7576632636111573, "grad_norm": 0.9354518055915833, "learning_rate": 3.59921056482474e-06, "loss": 0.792, "step": 275120 }, { "epoch": 1.7577271507608958, "grad_norm": 0.7017722725868225, "learning_rate": 3.597341505764329e-06, "loss": 0.807, "step": 275130 }, { "epoch": 1.7577910379106347, "grad_norm": 1.1099634170532227, "learning_rate": 3.595472914018838e-06, "loss": 1.1256, "step": 275140 }, { "epoch": 1.7578549250603732, "grad_norm": 0.9013046026229858, "learning_rate": 3.5936047896070856e-06, "loss": 0.7302, "step": 275150 }, { "epoch": 1.7579188122101121, "grad_norm": 0.8545605540275574, "learning_rate": 3.591737132547873e-06, "loss": 0.7842, "step": 275160 }, { "epoch": 1.7579826993598506, "grad_norm": 0.887650728225708, "learning_rate": 3.5898699428600245e-06, "loss": 1.1057, "step": 275170 }, { "epoch": 1.7580465865095896, "grad_norm": 0.7681732177734375, "learning_rate": 3.5880032205623304e-06, "loss": 0.8383, "step": 275180 }, { "epoch": 1.758110473659328, "grad_norm": 1.0179623365402222, "learning_rate": 3.5861369656736034e-06, "loss": 0.8732, "step": 275190 }, { "epoch": 1.758174360809067, "grad_norm": 0.8366377353668213, "learning_rate": 3.584271178212617e-06, "loss": 0.7925, "step": 275200 }, { "epoch": 1.7582382479588055, "grad_norm": 1.0721460580825806, "learning_rate": 3.58240585819819e-06, "loss": 0.7945, "step": 275210 }, { "epoch": 1.7583021351085444, "grad_norm": 1.5178515911102295, "learning_rate": 3.580541005649085e-06, "loss": 0.6637, "step": 275220 }, { "epoch": 1.7583660222582829, "grad_norm": 1.2210355997085571, "learning_rate": 3.5786766205840928e-06, "loss": 0.7637, "step": 275230 }, { "epoch": 1.7584299094080218, "grad_norm": 0.9875472187995911, "learning_rate": 3.576812703021981e-06, "loss": 0.9334, "step": 275240 }, { "epoch": 1.7584937965577603, "grad_norm": 1.1672228574752808, "learning_rate": 3.5749492529815407e-06, "loss": 0.8829, "step": 275250 }, { "epoch": 1.7585576837074992, "grad_norm": 0.7068379521369934, "learning_rate": 3.5730862704815125e-06, "loss": 0.7711, "step": 275260 }, { "epoch": 1.7586215708572377, "grad_norm": 1.5251413583755493, "learning_rate": 3.5712237555406756e-06, "loss": 1.1443, "step": 275270 }, { "epoch": 1.7586854580069766, "grad_norm": 0.8467580080032349, "learning_rate": 3.569361708177776e-06, "loss": 0.7313, "step": 275280 }, { "epoch": 1.758749345156715, "grad_norm": 0.9554235935211182, "learning_rate": 3.567500128411583e-06, "loss": 1.1889, "step": 275290 }, { "epoch": 1.758813232306454, "grad_norm": 1.2097324132919312, "learning_rate": 3.5656390162608246e-06, "loss": 0.9851, "step": 275300 }, { "epoch": 1.7588771194561925, "grad_norm": 0.9549233913421631, "learning_rate": 3.5637783717442587e-06, "loss": 1.0911, "step": 275310 }, { "epoch": 1.7589410066059314, "grad_norm": 0.894060492515564, "learning_rate": 3.561918194880609e-06, "loss": 1.0995, "step": 275320 }, { "epoch": 1.75900489375567, "grad_norm": 1.4603536128997803, "learning_rate": 3.5600584856886277e-06, "loss": 0.6686, "step": 275330 }, { "epoch": 1.7590687809054086, "grad_norm": 0.8984410166740417, "learning_rate": 3.5581992441870273e-06, "loss": 0.7872, "step": 275340 }, { "epoch": 1.7591326680551473, "grad_norm": 0.9645366668701172, "learning_rate": 3.5563404703945367e-06, "loss": 0.8436, "step": 275350 }, { "epoch": 1.759196555204886, "grad_norm": 2.0763237476348877, "learning_rate": 3.5544821643298864e-06, "loss": 0.6756, "step": 275360 }, { "epoch": 1.7592604423546248, "grad_norm": 1.1000155210494995, "learning_rate": 3.552624326011772e-06, "loss": 0.9746, "step": 275370 }, { "epoch": 1.7593243295043635, "grad_norm": 0.747199296951294, "learning_rate": 3.550766955458923e-06, "loss": 0.7532, "step": 275380 }, { "epoch": 1.7593882166541022, "grad_norm": 1.1015061140060425, "learning_rate": 3.5489100526900244e-06, "loss": 1.1804, "step": 275390 }, { "epoch": 1.7594521038038409, "grad_norm": 0.7550133466720581, "learning_rate": 3.5470536177238e-06, "loss": 0.9128, "step": 275400 }, { "epoch": 1.7595159909535796, "grad_norm": 0.7676512002944946, "learning_rate": 3.5451976505789188e-06, "loss": 0.8771, "step": 275410 }, { "epoch": 1.7595798781033183, "grad_norm": 0.9398499131202698, "learning_rate": 3.5433421512740983e-06, "loss": 0.7606, "step": 275420 }, { "epoch": 1.759643765253057, "grad_norm": 0.7365298867225647, "learning_rate": 3.5414871198280076e-06, "loss": 0.7261, "step": 275430 }, { "epoch": 1.7597076524027957, "grad_norm": 1.3296804428100586, "learning_rate": 3.539632556259337e-06, "loss": 0.9213, "step": 275440 }, { "epoch": 1.7597715395525344, "grad_norm": 0.9793141484260559, "learning_rate": 3.5377784605867546e-06, "loss": 0.9185, "step": 275450 }, { "epoch": 1.759835426702273, "grad_norm": 1.0745623111724854, "learning_rate": 3.5359248328289464e-06, "loss": 0.9798, "step": 275460 }, { "epoch": 1.7598993138520118, "grad_norm": 0.9916425943374634, "learning_rate": 3.534071673004563e-06, "loss": 0.7789, "step": 275470 }, { "epoch": 1.7599632010017505, "grad_norm": 1.2632156610488892, "learning_rate": 3.532218981132285e-06, "loss": 0.8559, "step": 275480 }, { "epoch": 1.7600270881514892, "grad_norm": 0.8439663052558899, "learning_rate": 3.5303667572307573e-06, "loss": 0.7617, "step": 275490 }, { "epoch": 1.760090975301228, "grad_norm": 1.366858959197998, "learning_rate": 3.528515001318644e-06, "loss": 0.8881, "step": 275500 }, { "epoch": 1.7601548624509666, "grad_norm": 1.0938761234283447, "learning_rate": 3.52666371341458e-06, "loss": 0.9536, "step": 275510 }, { "epoch": 1.7602187496007053, "grad_norm": 1.4395384788513184, "learning_rate": 3.524812893537227e-06, "loss": 0.8025, "step": 275520 }, { "epoch": 1.760282636750444, "grad_norm": 2.9542572498321533, "learning_rate": 3.5229625417052104e-06, "loss": 0.9451, "step": 275530 }, { "epoch": 1.7603465239001828, "grad_norm": 0.8342198133468628, "learning_rate": 3.5211126579371646e-06, "loss": 0.9168, "step": 275540 }, { "epoch": 1.7604104110499215, "grad_norm": 0.983436107635498, "learning_rate": 3.519263242251736e-06, "loss": 1.034, "step": 275550 }, { "epoch": 1.7604742981996602, "grad_norm": 0.7490174770355225, "learning_rate": 3.517414294667526e-06, "loss": 0.7333, "step": 275560 }, { "epoch": 1.7605381853493989, "grad_norm": 1.718109130859375, "learning_rate": 3.5155658152031757e-06, "loss": 0.9025, "step": 275570 }, { "epoch": 1.7606020724991374, "grad_norm": 0.7378758788108826, "learning_rate": 3.513717803877281e-06, "loss": 0.8956, "step": 275580 }, { "epoch": 1.7606659596488763, "grad_norm": 1.3051201105117798, "learning_rate": 3.5118702607084773e-06, "loss": 0.9403, "step": 275590 }, { "epoch": 1.7607298467986148, "grad_norm": 0.6553983688354492, "learning_rate": 3.51002318571535e-06, "loss": 1.1106, "step": 275600 }, { "epoch": 1.7607937339483537, "grad_norm": 0.7879045009613037, "learning_rate": 3.5081765789165164e-06, "loss": 0.6659, "step": 275610 }, { "epoch": 1.7608576210980922, "grad_norm": 0.8307299613952637, "learning_rate": 3.5063304403305577e-06, "loss": 0.7752, "step": 275620 }, { "epoch": 1.7609215082478311, "grad_norm": 1.0239109992980957, "learning_rate": 3.5044847699760795e-06, "loss": 0.7822, "step": 275630 }, { "epoch": 1.7609853953975696, "grad_norm": 2.6831319332122803, "learning_rate": 3.5026395678716572e-06, "loss": 0.8854, "step": 275640 }, { "epoch": 1.7610492825473085, "grad_norm": 0.6408683061599731, "learning_rate": 3.5007948340358864e-06, "loss": 1.0743, "step": 275650 }, { "epoch": 1.761113169697047, "grad_norm": 0.7142156958580017, "learning_rate": 3.498950568487336e-06, "loss": 0.9547, "step": 275660 }, { "epoch": 1.761177056846786, "grad_norm": 1.1051113605499268, "learning_rate": 3.497106771244579e-06, "loss": 0.8606, "step": 275670 }, { "epoch": 1.7612409439965244, "grad_norm": 1.0908756256103516, "learning_rate": 3.4952634423261966e-06, "loss": 0.639, "step": 275680 }, { "epoch": 1.7613048311462633, "grad_norm": 1.1603355407714844, "learning_rate": 3.493420581750739e-06, "loss": 0.7543, "step": 275690 }, { "epoch": 1.7613687182960018, "grad_norm": 0.5926783084869385, "learning_rate": 3.491578189536776e-06, "loss": 1.0203, "step": 275700 }, { "epoch": 1.7614326054457408, "grad_norm": 0.939707338809967, "learning_rate": 3.4897362657028476e-06, "loss": 0.8937, "step": 275710 }, { "epoch": 1.7614964925954792, "grad_norm": 1.0953004360198975, "learning_rate": 3.487894810267517e-06, "loss": 0.8824, "step": 275720 }, { "epoch": 1.7615603797452182, "grad_norm": 1.258279800415039, "learning_rate": 3.486053823249319e-06, "loss": 0.9268, "step": 275730 }, { "epoch": 1.7616242668949567, "grad_norm": 0.9371123909950256, "learning_rate": 3.4842133046668055e-06, "loss": 0.9814, "step": 275740 }, { "epoch": 1.7616881540446956, "grad_norm": 2.118590831756592, "learning_rate": 3.4823732545385013e-06, "loss": 0.9614, "step": 275750 }, { "epoch": 1.761752041194434, "grad_norm": 0.7608202695846558, "learning_rate": 3.4805336728829464e-06, "loss": 1.0091, "step": 275760 }, { "epoch": 1.761815928344173, "grad_norm": 0.9030619263648987, "learning_rate": 3.4786945597186593e-06, "loss": 0.8409, "step": 275770 }, { "epoch": 1.7618798154939115, "grad_norm": 1.2993314266204834, "learning_rate": 3.47685591506417e-06, "loss": 1.0666, "step": 275780 }, { "epoch": 1.7619437026436504, "grad_norm": 2.566411256790161, "learning_rate": 3.47501773893798e-06, "loss": 0.8391, "step": 275790 }, { "epoch": 1.762007589793389, "grad_norm": 1.1588977575302124, "learning_rate": 3.4731800313586195e-06, "loss": 0.7433, "step": 275800 }, { "epoch": 1.7620714769431278, "grad_norm": 1.8010987043380737, "learning_rate": 3.471342792344584e-06, "loss": 1.0248, "step": 275810 }, { "epoch": 1.7621353640928663, "grad_norm": 0.8282926082611084, "learning_rate": 3.4695060219143815e-06, "loss": 0.8259, "step": 275820 }, { "epoch": 1.762199251242605, "grad_norm": 1.285724401473999, "learning_rate": 3.4676697200865025e-06, "loss": 0.8297, "step": 275830 }, { "epoch": 1.7622631383923437, "grad_norm": 1.0171455144882202, "learning_rate": 3.465833886879455e-06, "loss": 0.8694, "step": 275840 }, { "epoch": 1.7623270255420824, "grad_norm": 1.3438469171524048, "learning_rate": 3.4639985223117066e-06, "loss": 0.9486, "step": 275850 }, { "epoch": 1.7623909126918211, "grad_norm": 0.8757327795028687, "learning_rate": 3.4621636264017596e-06, "loss": 1.0706, "step": 275860 }, { "epoch": 1.7624547998415598, "grad_norm": 0.7441837787628174, "learning_rate": 3.4603291991680885e-06, "loss": 0.8272, "step": 275870 }, { "epoch": 1.7625186869912985, "grad_norm": 0.8464133739471436, "learning_rate": 3.458495240629156e-06, "loss": 1.1859, "step": 275880 }, { "epoch": 1.7625825741410373, "grad_norm": 0.8027544617652893, "learning_rate": 3.456661750803447e-06, "loss": 0.7095, "step": 275890 }, { "epoch": 1.762646461290776, "grad_norm": 0.8621078729629517, "learning_rate": 3.4548287297094196e-06, "loss": 0.8539, "step": 275900 }, { "epoch": 1.7627103484405147, "grad_norm": 1.7479239702224731, "learning_rate": 3.452996177365536e-06, "loss": 0.6586, "step": 275910 }, { "epoch": 1.7627742355902534, "grad_norm": 1.0658780336380005, "learning_rate": 3.4511640937902434e-06, "loss": 0.6436, "step": 275920 }, { "epoch": 1.762838122739992, "grad_norm": 0.9121516942977905, "learning_rate": 3.4493324790020044e-06, "loss": 0.9354, "step": 275930 }, { "epoch": 1.7629020098897308, "grad_norm": 0.8368337750434875, "learning_rate": 3.447501333019254e-06, "loss": 0.8779, "step": 275940 }, { "epoch": 1.7629658970394695, "grad_norm": 0.9886699318885803, "learning_rate": 3.4456706558604502e-06, "loss": 0.8241, "step": 275950 }, { "epoch": 1.7630297841892082, "grad_norm": 0.7883988618850708, "learning_rate": 3.4438404475440055e-06, "loss": 0.7167, "step": 275960 }, { "epoch": 1.763093671338947, "grad_norm": 0.9630401134490967, "learning_rate": 3.4420107080883723e-06, "loss": 0.7229, "step": 275970 }, { "epoch": 1.7631575584886856, "grad_norm": 1.144049048423767, "learning_rate": 3.4401814375119632e-06, "loss": 1.0576, "step": 275980 }, { "epoch": 1.7632214456384243, "grad_norm": 0.8971433043479919, "learning_rate": 3.4383526358332142e-06, "loss": 0.8893, "step": 275990 }, { "epoch": 1.763285332788163, "grad_norm": 0.8197485208511353, "learning_rate": 3.4365243030705373e-06, "loss": 0.9808, "step": 276000 }, { "epoch": 1.7633492199379017, "grad_norm": 1.0844979286193848, "learning_rate": 3.4346964392423296e-06, "loss": 1.062, "step": 276010 }, { "epoch": 1.7634131070876404, "grad_norm": 0.7812053561210632, "learning_rate": 3.432869044367032e-06, "loss": 0.8334, "step": 276020 }, { "epoch": 1.7634769942373791, "grad_norm": 0.8636895418167114, "learning_rate": 3.4310421184630127e-06, "loss": 0.629, "step": 276030 }, { "epoch": 1.7635408813871178, "grad_norm": 0.8970896005630493, "learning_rate": 3.4292156615487013e-06, "loss": 0.8893, "step": 276040 }, { "epoch": 1.7636047685368565, "grad_norm": 1.0095678567886353, "learning_rate": 3.4273896736424615e-06, "loss": 0.7679, "step": 276050 }, { "epoch": 1.7636686556865953, "grad_norm": 1.979683518409729, "learning_rate": 3.4255641547627172e-06, "loss": 0.9901, "step": 276060 }, { "epoch": 1.7637325428363337, "grad_norm": 1.5521012544631958, "learning_rate": 3.42373910492782e-06, "loss": 1.0343, "step": 276070 }, { "epoch": 1.7637964299860727, "grad_norm": 1.0704764127731323, "learning_rate": 3.4219145241561725e-06, "loss": 0.7701, "step": 276080 }, { "epoch": 1.7638603171358112, "grad_norm": 0.7185716032981873, "learning_rate": 3.4200904124661316e-06, "loss": 0.629, "step": 276090 }, { "epoch": 1.76392420428555, "grad_norm": 1.0846500396728516, "learning_rate": 3.4182667698760883e-06, "loss": 1.2639, "step": 276100 }, { "epoch": 1.7639880914352886, "grad_norm": 1.1522587537765503, "learning_rate": 3.4164435964043894e-06, "loss": 1.037, "step": 276110 }, { "epoch": 1.7640519785850275, "grad_norm": 1.6337602138519287, "learning_rate": 3.4146208920694146e-06, "loss": 0.7858, "step": 276120 }, { "epoch": 1.764115865734766, "grad_norm": 1.0767061710357666, "learning_rate": 3.412798656889499e-06, "loss": 0.8875, "step": 276130 }, { "epoch": 1.764179752884505, "grad_norm": 1.116227388381958, "learning_rate": 3.410976890883011e-06, "loss": 0.9552, "step": 276140 }, { "epoch": 1.7642436400342434, "grad_norm": 1.6324573755264282, "learning_rate": 3.409155594068286e-06, "loss": 0.8507, "step": 276150 }, { "epoch": 1.7643075271839823, "grad_norm": 1.1252837181091309, "learning_rate": 3.407334766463677e-06, "loss": 0.7192, "step": 276160 }, { "epoch": 1.7643714143337208, "grad_norm": 0.8622950911521912, "learning_rate": 3.4055144080875066e-06, "loss": 0.7325, "step": 276170 }, { "epoch": 1.7644353014834597, "grad_norm": 1.1538571119308472, "learning_rate": 3.403694518958128e-06, "loss": 1.0452, "step": 276180 }, { "epoch": 1.7644991886331982, "grad_norm": 0.7291510701179504, "learning_rate": 3.401875099093843e-06, "loss": 1.0454, "step": 276190 }, { "epoch": 1.7645630757829371, "grad_norm": 0.843101441860199, "learning_rate": 3.4000561485129924e-06, "loss": 1.0167, "step": 276200 }, { "epoch": 1.7646269629326756, "grad_norm": 1.3674508333206177, "learning_rate": 3.3982376672339e-06, "loss": 1.0741, "step": 276210 }, { "epoch": 1.7646908500824146, "grad_norm": 0.8573333621025085, "learning_rate": 3.3964196552748627e-06, "loss": 0.9575, "step": 276220 }, { "epoch": 1.764754737232153, "grad_norm": 2.657174587249756, "learning_rate": 3.3946021126542047e-06, "loss": 1.0709, "step": 276230 }, { "epoch": 1.764818624381892, "grad_norm": 0.7357352375984192, "learning_rate": 3.3927850393902115e-06, "loss": 0.8728, "step": 276240 }, { "epoch": 1.7648825115316304, "grad_norm": 2.2075095176696777, "learning_rate": 3.3909684355012074e-06, "loss": 0.7582, "step": 276250 }, { "epoch": 1.7649463986813694, "grad_norm": 0.9739333391189575, "learning_rate": 3.3891523010054606e-06, "loss": 1.1386, "step": 276260 }, { "epoch": 1.7650102858311079, "grad_norm": 1.1989326477050781, "learning_rate": 3.3873366359212845e-06, "loss": 0.7894, "step": 276270 }, { "epoch": 1.7650741729808468, "grad_norm": 1.4745514392852783, "learning_rate": 3.385521440266948e-06, "loss": 0.6456, "step": 276280 }, { "epoch": 1.7651380601305853, "grad_norm": 0.7231010794639587, "learning_rate": 3.383706714060747e-06, "loss": 0.9416, "step": 276290 }, { "epoch": 1.7652019472803242, "grad_norm": 0.5068309903144836, "learning_rate": 3.3818924573209342e-06, "loss": 1.1731, "step": 276300 }, { "epoch": 1.7652658344300627, "grad_norm": 2.7875280380249023, "learning_rate": 3.380078670065806e-06, "loss": 0.9234, "step": 276310 }, { "epoch": 1.7653297215798014, "grad_norm": 0.622486412525177, "learning_rate": 3.3782653523136144e-06, "loss": 0.5415, "step": 276320 }, { "epoch": 1.76539360872954, "grad_norm": 0.9856690168380737, "learning_rate": 3.3764525040826224e-06, "loss": 0.9147, "step": 276330 }, { "epoch": 1.7654574958792788, "grad_norm": 0.8422602415084839, "learning_rate": 3.3746401253910874e-06, "loss": 1.0639, "step": 276340 }, { "epoch": 1.7655213830290175, "grad_norm": 0.8297490477561951, "learning_rate": 3.3728282162572676e-06, "loss": 1.2381, "step": 276350 }, { "epoch": 1.7655852701787562, "grad_norm": 0.8892385363578796, "learning_rate": 3.371016776699404e-06, "loss": 0.6482, "step": 276360 }, { "epoch": 1.765649157328495, "grad_norm": 1.1976794004440308, "learning_rate": 3.3692058067357425e-06, "loss": 0.924, "step": 276370 }, { "epoch": 1.7657130444782336, "grad_norm": 1.4974721670150757, "learning_rate": 3.367395306384513e-06, "loss": 0.9347, "step": 276380 }, { "epoch": 1.7657769316279723, "grad_norm": 0.9004227519035339, "learning_rate": 3.365585275663957e-06, "loss": 0.7606, "step": 276390 }, { "epoch": 1.765840818777711, "grad_norm": 0.8492181897163391, "learning_rate": 3.36377571459231e-06, "loss": 0.8315, "step": 276400 }, { "epoch": 1.7659047059274497, "grad_norm": 1.402463674545288, "learning_rate": 3.3619666231877846e-06, "loss": 0.9241, "step": 276410 }, { "epoch": 1.7659685930771885, "grad_norm": 0.6081668138504028, "learning_rate": 3.3601580014686053e-06, "loss": 0.8031, "step": 276420 }, { "epoch": 1.7660324802269272, "grad_norm": 1.5759546756744385, "learning_rate": 3.358349849452974e-06, "loss": 0.64, "step": 276430 }, { "epoch": 1.7660963673766659, "grad_norm": 0.9555615186691284, "learning_rate": 3.356542167159121e-06, "loss": 0.801, "step": 276440 }, { "epoch": 1.7661602545264046, "grad_norm": 0.8987722396850586, "learning_rate": 3.354734954605232e-06, "loss": 0.8198, "step": 276450 }, { "epoch": 1.7662241416761433, "grad_norm": 1.1487159729003906, "learning_rate": 3.352928211809525e-06, "loss": 0.801, "step": 276460 }, { "epoch": 1.766288028825882, "grad_norm": 0.9127746820449829, "learning_rate": 3.3511219387901803e-06, "loss": 0.9023, "step": 276470 }, { "epoch": 1.7663519159756207, "grad_norm": 0.9431878924369812, "learning_rate": 3.3493161355654e-06, "loss": 1.065, "step": 276480 }, { "epoch": 1.7664158031253594, "grad_norm": 1.7045027017593384, "learning_rate": 3.347510802153364e-06, "loss": 0.9515, "step": 276490 }, { "epoch": 1.766479690275098, "grad_norm": 0.4762714207172394, "learning_rate": 3.3457059385722577e-06, "loss": 0.9678, "step": 276500 }, { "epoch": 1.7665435774248368, "grad_norm": 1.3455365896224976, "learning_rate": 3.3439015448402444e-06, "loss": 0.7454, "step": 276510 }, { "epoch": 1.7666074645745755, "grad_norm": 0.713361382484436, "learning_rate": 3.342097620975504e-06, "loss": 0.7936, "step": 276520 }, { "epoch": 1.7666713517243142, "grad_norm": 1.2825889587402344, "learning_rate": 3.340294166996216e-06, "loss": 0.9622, "step": 276530 }, { "epoch": 1.766735238874053, "grad_norm": 0.8402884006500244, "learning_rate": 3.3384911829205222e-06, "loss": 1.0876, "step": 276540 }, { "epoch": 1.7667991260237916, "grad_norm": 1.9258328676223755, "learning_rate": 3.3366886687666022e-06, "loss": 0.7881, "step": 276550 }, { "epoch": 1.7668630131735301, "grad_norm": 1.8669530153274536, "learning_rate": 3.3348866245525855e-06, "loss": 1.2596, "step": 276560 }, { "epoch": 1.766926900323269, "grad_norm": 0.6702399849891663, "learning_rate": 3.3330850502966416e-06, "loss": 0.8636, "step": 276570 }, { "epoch": 1.7669907874730075, "grad_norm": 1.2956793308258057, "learning_rate": 3.3312839460168943e-06, "loss": 0.8951, "step": 276580 }, { "epoch": 1.7670546746227465, "grad_norm": 0.9595689177513123, "learning_rate": 3.329483311731496e-06, "loss": 0.8165, "step": 276590 }, { "epoch": 1.767118561772485, "grad_norm": 0.9628953337669373, "learning_rate": 3.3276831474585712e-06, "loss": 0.9032, "step": 276600 }, { "epoch": 1.7671824489222239, "grad_norm": 1.8576788902282715, "learning_rate": 3.3258834532162664e-06, "loss": 0.6831, "step": 276610 }, { "epoch": 1.7672463360719624, "grad_norm": 0.9650187492370605, "learning_rate": 3.3240842290226835e-06, "loss": 0.8597, "step": 276620 }, { "epoch": 1.7673102232217013, "grad_norm": 0.8248710632324219, "learning_rate": 3.322285474895959e-06, "loss": 0.7399, "step": 276630 }, { "epoch": 1.7673741103714398, "grad_norm": 1.4074620008468628, "learning_rate": 3.3204871908541935e-06, "loss": 0.8139, "step": 276640 }, { "epoch": 1.7674379975211787, "grad_norm": 0.8231216669082642, "learning_rate": 3.318689376915518e-06, "loss": 0.7705, "step": 276650 }, { "epoch": 1.7675018846709172, "grad_norm": 0.8074039220809937, "learning_rate": 3.3168920330980125e-06, "loss": 0.8506, "step": 276660 }, { "epoch": 1.767565771820656, "grad_norm": 1.0663862228393555, "learning_rate": 3.315095159419801e-06, "loss": 0.8019, "step": 276670 }, { "epoch": 1.7676296589703946, "grad_norm": 1.2024550437927246, "learning_rate": 3.3132987558989636e-06, "loss": 0.9289, "step": 276680 }, { "epoch": 1.7676935461201335, "grad_norm": 0.8090937733650208, "learning_rate": 3.3115028225536028e-06, "loss": 1.0337, "step": 276690 }, { "epoch": 1.767757433269872, "grad_norm": 1.0161302089691162, "learning_rate": 3.3097073594017926e-06, "loss": 0.8066, "step": 276700 }, { "epoch": 1.767821320419611, "grad_norm": 0.6586167216300964, "learning_rate": 3.307912366461624e-06, "loss": 0.7344, "step": 276710 }, { "epoch": 1.7678852075693494, "grad_norm": 1.5533359050750732, "learning_rate": 3.3061178437511776e-06, "loss": 1.1381, "step": 276720 }, { "epoch": 1.7679490947190883, "grad_norm": 1.1131467819213867, "learning_rate": 3.3043237912885107e-06, "loss": 1.1111, "step": 276730 }, { "epoch": 1.7680129818688268, "grad_norm": 1.5943667888641357, "learning_rate": 3.302530209091714e-06, "loss": 0.9598, "step": 276740 }, { "epoch": 1.7680768690185658, "grad_norm": 0.6993541717529297, "learning_rate": 3.300737097178824e-06, "loss": 0.9595, "step": 276750 }, { "epoch": 1.7681407561683042, "grad_norm": 0.9403067231178284, "learning_rate": 3.298944455567926e-06, "loss": 1.0233, "step": 276760 }, { "epoch": 1.7682046433180432, "grad_norm": 1.483699917793274, "learning_rate": 3.2971522842770442e-06, "loss": 0.7465, "step": 276770 }, { "epoch": 1.7682685304677817, "grad_norm": 0.983133852481842, "learning_rate": 3.2953605833242583e-06, "loss": 0.7698, "step": 276780 }, { "epoch": 1.7683324176175204, "grad_norm": 1.0668721199035645, "learning_rate": 3.2935693527275826e-06, "loss": 0.7182, "step": 276790 }, { "epoch": 1.768396304767259, "grad_norm": 0.9246516227722168, "learning_rate": 3.2917785925050792e-06, "loss": 0.7324, "step": 276800 }, { "epoch": 1.7684601919169978, "grad_norm": 0.9839482307434082, "learning_rate": 3.289988302674768e-06, "loss": 0.9245, "step": 276810 }, { "epoch": 1.7685240790667365, "grad_norm": 0.8526636362075806, "learning_rate": 3.28819848325469e-06, "loss": 0.7506, "step": 276820 }, { "epoch": 1.7685879662164752, "grad_norm": 1.9389556646347046, "learning_rate": 3.286409134262858e-06, "loss": 1.1102, "step": 276830 }, { "epoch": 1.7686518533662139, "grad_norm": 1.0603058338165283, "learning_rate": 3.284620255717308e-06, "loss": 0.8008, "step": 276840 }, { "epoch": 1.7687157405159526, "grad_norm": 1.1200687885284424, "learning_rate": 3.2828318476360364e-06, "loss": 0.8575, "step": 276850 }, { "epoch": 1.7687796276656913, "grad_norm": 0.8741885423660278, "learning_rate": 3.2810439100370736e-06, "loss": 0.9655, "step": 276860 }, { "epoch": 1.76884351481543, "grad_norm": 1.8504701852798462, "learning_rate": 3.279256442938411e-06, "loss": 0.8095, "step": 276870 }, { "epoch": 1.7689074019651687, "grad_norm": 0.5831618309020996, "learning_rate": 3.2774694463580615e-06, "loss": 0.724, "step": 276880 }, { "epoch": 1.7689712891149074, "grad_norm": 0.8052890300750732, "learning_rate": 3.275682920314005e-06, "loss": 0.9097, "step": 276890 }, { "epoch": 1.7690351762646461, "grad_norm": 0.6509976983070374, "learning_rate": 3.2738968648242496e-06, "loss": 1.1575, "step": 276900 }, { "epoch": 1.7690990634143848, "grad_norm": 0.7471203207969666, "learning_rate": 3.2721112799067754e-06, "loss": 0.7795, "step": 276910 }, { "epoch": 1.7691629505641235, "grad_norm": 0.8894343376159668, "learning_rate": 3.2703261655795626e-06, "loss": 0.7585, "step": 276920 }, { "epoch": 1.7692268377138622, "grad_norm": 1.0406216382980347, "learning_rate": 3.268541521860602e-06, "loss": 0.8372, "step": 276930 }, { "epoch": 1.769290724863601, "grad_norm": 0.7905427813529968, "learning_rate": 3.266757348767846e-06, "loss": 0.95, "step": 276940 }, { "epoch": 1.7693546120133397, "grad_norm": 1.1895004510879517, "learning_rate": 3.2649736463192806e-06, "loss": 0.983, "step": 276950 }, { "epoch": 1.7694184991630784, "grad_norm": 0.6956319808959961, "learning_rate": 3.2631904145328575e-06, "loss": 0.7909, "step": 276960 }, { "epoch": 1.769482386312817, "grad_norm": 0.8152353167533875, "learning_rate": 3.2614076534265513e-06, "loss": 0.8163, "step": 276970 }, { "epoch": 1.7695462734625558, "grad_norm": 0.6797952055931091, "learning_rate": 3.259625363018293e-06, "loss": 0.8145, "step": 276980 }, { "epoch": 1.7696101606122945, "grad_norm": 1.0962252616882324, "learning_rate": 3.25784354332605e-06, "loss": 0.9919, "step": 276990 }, { "epoch": 1.7696740477620332, "grad_norm": 2.2895405292510986, "learning_rate": 3.2560621943677537e-06, "loss": 0.8943, "step": 277000 }, { "epoch": 1.769737934911772, "grad_norm": 1.275295615196228, "learning_rate": 3.254281316161362e-06, "loss": 0.9772, "step": 277010 }, { "epoch": 1.7698018220615106, "grad_norm": 0.9024759531021118, "learning_rate": 3.2525009087247816e-06, "loss": 0.9631, "step": 277020 }, { "epoch": 1.769865709211249, "grad_norm": 1.0591357946395874, "learning_rate": 3.2507209720759656e-06, "loss": 0.9282, "step": 277030 }, { "epoch": 1.769929596360988, "grad_norm": 1.0445270538330078, "learning_rate": 3.2489415062328387e-06, "loss": 0.9007, "step": 277040 }, { "epoch": 1.7699934835107265, "grad_norm": 0.9703136682510376, "learning_rate": 3.247162511213309e-06, "loss": 1.0824, "step": 277050 }, { "epoch": 1.7700573706604654, "grad_norm": 0.7905303835868835, "learning_rate": 3.2453839870353055e-06, "loss": 0.9206, "step": 277060 }, { "epoch": 1.770121257810204, "grad_norm": 1.0880717039108276, "learning_rate": 3.2436059337167313e-06, "loss": 0.6517, "step": 277070 }, { "epoch": 1.7701851449599428, "grad_norm": 1.243598461151123, "learning_rate": 3.241828351275494e-06, "loss": 0.9865, "step": 277080 }, { "epoch": 1.7702490321096813, "grad_norm": 0.9723210334777832, "learning_rate": 3.240051239729497e-06, "loss": 1.0514, "step": 277090 }, { "epoch": 1.7703129192594202, "grad_norm": 0.8347112536430359, "learning_rate": 3.2382745990966412e-06, "loss": 1.0216, "step": 277100 }, { "epoch": 1.7703768064091587, "grad_norm": 0.49017333984375, "learning_rate": 3.236498429394813e-06, "loss": 0.7933, "step": 277110 }, { "epoch": 1.7704406935588977, "grad_norm": 0.6923054456710815, "learning_rate": 3.234722730641904e-06, "loss": 0.7284, "step": 277120 }, { "epoch": 1.7705045807086361, "grad_norm": 1.0289267301559448, "learning_rate": 3.2329475028557876e-06, "loss": 0.7274, "step": 277130 }, { "epoch": 1.770568467858375, "grad_norm": 1.0343220233917236, "learning_rate": 3.2311727460543563e-06, "loss": 0.8632, "step": 277140 }, { "epoch": 1.7706323550081136, "grad_norm": 0.7125779986381531, "learning_rate": 3.229398460255467e-06, "loss": 0.7936, "step": 277150 }, { "epoch": 1.7706962421578525, "grad_norm": 1.3944861888885498, "learning_rate": 3.227624645477012e-06, "loss": 0.8167, "step": 277160 }, { "epoch": 1.770760129307591, "grad_norm": 1.0906258821487427, "learning_rate": 3.2258513017368265e-06, "loss": 0.9059, "step": 277170 }, { "epoch": 1.77082401645733, "grad_norm": 1.4286187887191772, "learning_rate": 3.2240784290527957e-06, "loss": 0.6454, "step": 277180 }, { "epoch": 1.7708879036070684, "grad_norm": 0.8852258324623108, "learning_rate": 3.222306027442751e-06, "loss": 0.6915, "step": 277190 }, { "epoch": 1.7709517907568073, "grad_norm": 1.2390975952148438, "learning_rate": 3.22053409692456e-06, "loss": 0.8217, "step": 277200 }, { "epoch": 1.7710156779065458, "grad_norm": 1.6062967777252197, "learning_rate": 3.2187626375160594e-06, "loss": 1.0739, "step": 277210 }, { "epoch": 1.7710795650562847, "grad_norm": 1.1709768772125244, "learning_rate": 3.2169916492350906e-06, "loss": 0.9225, "step": 277220 }, { "epoch": 1.7711434522060232, "grad_norm": 1.0782562494277954, "learning_rate": 3.2152211320994884e-06, "loss": 0.8072, "step": 277230 }, { "epoch": 1.7712073393557621, "grad_norm": 0.743114173412323, "learning_rate": 3.2134510861270783e-06, "loss": 0.8746, "step": 277240 }, { "epoch": 1.7712712265055006, "grad_norm": 0.7684194445610046, "learning_rate": 3.211858447611188e-06, "loss": 0.9179, "step": 277250 }, { "epoch": 1.7713351136552395, "grad_norm": 1.0073596239089966, "learning_rate": 3.2100892968979613e-06, "loss": 0.9356, "step": 277260 }, { "epoch": 1.771399000804978, "grad_norm": 0.894205629825592, "learning_rate": 3.2083206173996238e-06, "loss": 0.9639, "step": 277270 }, { "epoch": 1.7714628879547167, "grad_norm": 0.9179940223693848, "learning_rate": 3.2065524091339726e-06, "loss": 0.6965, "step": 277280 }, { "epoch": 1.7715267751044554, "grad_norm": 1.3406168222427368, "learning_rate": 3.2047846721188215e-06, "loss": 1.0092, "step": 277290 }, { "epoch": 1.7715906622541941, "grad_norm": 0.5127822160720825, "learning_rate": 3.203017406371972e-06, "loss": 0.7664, "step": 277300 }, { "epoch": 1.7716545494039329, "grad_norm": 0.6956382989883423, "learning_rate": 3.2012506119112273e-06, "loss": 0.8804, "step": 277310 }, { "epoch": 1.7717184365536716, "grad_norm": 1.7899528741836548, "learning_rate": 3.1994842887543673e-06, "loss": 0.8313, "step": 277320 }, { "epoch": 1.7717823237034103, "grad_norm": 0.7749053239822388, "learning_rate": 3.197718436919195e-06, "loss": 1.0221, "step": 277330 }, { "epoch": 1.771846210853149, "grad_norm": 0.6026731729507446, "learning_rate": 3.1959530564234787e-06, "loss": 0.9889, "step": 277340 }, { "epoch": 1.7719100980028877, "grad_norm": 1.6500853300094604, "learning_rate": 3.1941881472850212e-06, "loss": 0.9839, "step": 277350 }, { "epoch": 1.7719739851526264, "grad_norm": 1.0572153329849243, "learning_rate": 3.192423709521569e-06, "loss": 0.9506, "step": 277360 }, { "epoch": 1.772037872302365, "grad_norm": 1.2893636226654053, "learning_rate": 3.190659743150903e-06, "loss": 0.7442, "step": 277370 }, { "epoch": 1.7721017594521038, "grad_norm": 1.0569548606872559, "learning_rate": 3.188896248190798e-06, "loss": 0.7426, "step": 277380 }, { "epoch": 1.7721656466018425, "grad_norm": 0.6380995512008667, "learning_rate": 3.1871332246590003e-06, "loss": 0.8383, "step": 277390 }, { "epoch": 1.7722295337515812, "grad_norm": 1.4150300025939941, "learning_rate": 3.185370672573279e-06, "loss": 1.0424, "step": 277400 }, { "epoch": 1.77229342090132, "grad_norm": 0.9538283348083496, "learning_rate": 3.1836085919513648e-06, "loss": 0.8074, "step": 277410 }, { "epoch": 1.7723573080510586, "grad_norm": 2.292234420776367, "learning_rate": 3.181846982811021e-06, "loss": 0.9066, "step": 277420 }, { "epoch": 1.7724211952007973, "grad_norm": 0.9210676550865173, "learning_rate": 3.180085845169972e-06, "loss": 1.0798, "step": 277430 }, { "epoch": 1.772485082350536, "grad_norm": 0.7483298182487488, "learning_rate": 3.1783251790459766e-06, "loss": 0.8801, "step": 277440 }, { "epoch": 1.7725489695002747, "grad_norm": 0.8505380153656006, "learning_rate": 3.1765649844567424e-06, "loss": 0.5436, "step": 277450 }, { "epoch": 1.7726128566500134, "grad_norm": 0.7014589905738831, "learning_rate": 3.1748052614200106e-06, "loss": 0.9626, "step": 277460 }, { "epoch": 1.7726767437997522, "grad_norm": 0.8798777461051941, "learning_rate": 3.173046009953495e-06, "loss": 0.8165, "step": 277470 }, { "epoch": 1.7727406309494909, "grad_norm": 0.9326733350753784, "learning_rate": 3.1712872300749265e-06, "loss": 0.6907, "step": 277480 }, { "epoch": 1.7728045180992296, "grad_norm": 0.8117529153823853, "learning_rate": 3.169528921802001e-06, "loss": 0.9274, "step": 277490 }, { "epoch": 1.7728684052489683, "grad_norm": 1.0946813821792603, "learning_rate": 3.167771085152438e-06, "loss": 1.1256, "step": 277500 }, { "epoch": 1.772932292398707, "grad_norm": 0.8175672292709351, "learning_rate": 3.1660137201439287e-06, "loss": 0.68, "step": 277510 }, { "epoch": 1.7729961795484455, "grad_norm": 0.4712793529033661, "learning_rate": 3.164256826794193e-06, "loss": 0.6028, "step": 277520 }, { "epoch": 1.7730600666981844, "grad_norm": 0.7874718308448792, "learning_rate": 3.162500405120894e-06, "loss": 0.8733, "step": 277530 }, { "epoch": 1.7731239538479229, "grad_norm": 1.705673098564148, "learning_rate": 3.160744455141745e-06, "loss": 0.9226, "step": 277540 }, { "epoch": 1.7731878409976618, "grad_norm": 0.8848811388015747, "learning_rate": 3.1589889768744162e-06, "loss": 0.836, "step": 277550 }, { "epoch": 1.7732517281474003, "grad_norm": 0.8991907835006714, "learning_rate": 3.1572339703365874e-06, "loss": 0.9667, "step": 277560 }, { "epoch": 1.7733156152971392, "grad_norm": 1.2283343076705933, "learning_rate": 3.1554794355459493e-06, "loss": 0.8699, "step": 277570 }, { "epoch": 1.7733795024468777, "grad_norm": 0.8715161085128784, "learning_rate": 3.15372537252015e-06, "loss": 0.6607, "step": 277580 }, { "epoch": 1.7734433895966166, "grad_norm": 0.7413138747215271, "learning_rate": 3.151971781276869e-06, "loss": 0.8635, "step": 277590 }, { "epoch": 1.773507276746355, "grad_norm": 0.7562516927719116, "learning_rate": 3.1502186618337593e-06, "loss": 0.8093, "step": 277600 }, { "epoch": 1.773571163896094, "grad_norm": 0.8659891486167908, "learning_rate": 3.1484660142084897e-06, "loss": 0.8792, "step": 277610 }, { "epoch": 1.7736350510458325, "grad_norm": 0.9960166811943054, "learning_rate": 3.1467138384186857e-06, "loss": 0.8298, "step": 277620 }, { "epoch": 1.7736989381955715, "grad_norm": 0.9417649507522583, "learning_rate": 3.1449621344820156e-06, "loss": 1.0236, "step": 277630 }, { "epoch": 1.77376282534531, "grad_norm": 1.2807464599609375, "learning_rate": 3.143210902416105e-06, "loss": 0.8184, "step": 277640 }, { "epoch": 1.7738267124950489, "grad_norm": 0.8112235069274902, "learning_rate": 3.1414601422386002e-06, "loss": 0.8804, "step": 277650 }, { "epoch": 1.7738905996447873, "grad_norm": 1.1869792938232422, "learning_rate": 3.1397098539671264e-06, "loss": 0.5419, "step": 277660 }, { "epoch": 1.7739544867945263, "grad_norm": 0.7551632523536682, "learning_rate": 3.137960037619325e-06, "loss": 0.7217, "step": 277670 }, { "epoch": 1.7740183739442648, "grad_norm": 0.8500282764434814, "learning_rate": 3.136210693212793e-06, "loss": 0.6447, "step": 277680 }, { "epoch": 1.7740822610940037, "grad_norm": 0.8427389860153198, "learning_rate": 3.1344618207651666e-06, "loss": 0.7903, "step": 277690 }, { "epoch": 1.7741461482437422, "grad_norm": 0.6282423734664917, "learning_rate": 3.1327134202940645e-06, "loss": 0.8771, "step": 277700 }, { "epoch": 1.774210035393481, "grad_norm": 1.025516390800476, "learning_rate": 3.1309654918170726e-06, "loss": 0.8486, "step": 277710 }, { "epoch": 1.7742739225432196, "grad_norm": 0.846153736114502, "learning_rate": 3.129218035351811e-06, "loss": 1.0854, "step": 277720 }, { "epoch": 1.7743378096929585, "grad_norm": 2.999692916870117, "learning_rate": 3.12747105091587e-06, "loss": 0.923, "step": 277730 }, { "epoch": 1.774401696842697, "grad_norm": 1.197917103767395, "learning_rate": 3.125724538526853e-06, "loss": 1.0771, "step": 277740 }, { "epoch": 1.774465583992436, "grad_norm": 0.8440120220184326, "learning_rate": 3.1239784982023344e-06, "loss": 0.7957, "step": 277750 }, { "epoch": 1.7745294711421744, "grad_norm": 1.0203328132629395, "learning_rate": 3.122232929959912e-06, "loss": 1.3361, "step": 277760 }, { "epoch": 1.7745933582919131, "grad_norm": 0.8135339617729187, "learning_rate": 3.120487833817148e-06, "loss": 0.9691, "step": 277770 }, { "epoch": 1.7746572454416518, "grad_norm": 1.0247467756271362, "learning_rate": 3.1187432097916415e-06, "loss": 0.8512, "step": 277780 }, { "epoch": 1.7747211325913905, "grad_norm": 0.8693737387657166, "learning_rate": 3.116999057900938e-06, "loss": 1.0537, "step": 277790 }, { "epoch": 1.7747850197411292, "grad_norm": 0.9472058415412903, "learning_rate": 3.1152553781626237e-06, "loss": 1.0081, "step": 277800 }, { "epoch": 1.774848906890868, "grad_norm": 0.6859394907951355, "learning_rate": 3.113512170594235e-06, "loss": 0.8502, "step": 277810 }, { "epoch": 1.7749127940406066, "grad_norm": 0.7783265709877014, "learning_rate": 3.1117694352133576e-06, "loss": 0.9364, "step": 277820 }, { "epoch": 1.7749766811903454, "grad_norm": 0.9184941053390503, "learning_rate": 3.1100271720375106e-06, "loss": 0.9731, "step": 277830 }, { "epoch": 1.775040568340084, "grad_norm": 0.9210687875747681, "learning_rate": 3.1082853810842694e-06, "loss": 0.8218, "step": 277840 }, { "epoch": 1.7751044554898228, "grad_norm": 0.6392104029655457, "learning_rate": 3.1065440623711527e-06, "loss": 0.8711, "step": 277850 }, { "epoch": 1.7751683426395615, "grad_norm": 1.07809579372406, "learning_rate": 3.1048032159157136e-06, "loss": 0.7592, "step": 277860 }, { "epoch": 1.7752322297893002, "grad_norm": 0.7940744757652283, "learning_rate": 3.1030628417354658e-06, "loss": 0.9796, "step": 277870 }, { "epoch": 1.7752961169390389, "grad_norm": 0.9738584160804749, "learning_rate": 3.101322939847945e-06, "loss": 0.8472, "step": 277880 }, { "epoch": 1.7753600040887776, "grad_norm": 0.6570689082145691, "learning_rate": 3.099583510270687e-06, "loss": 0.8304, "step": 277890 }, { "epoch": 1.7754238912385163, "grad_norm": 0.966304361820221, "learning_rate": 3.0978445530211898e-06, "loss": 0.7785, "step": 277900 }, { "epoch": 1.775487778388255, "grad_norm": 0.8394570350646973, "learning_rate": 3.0961060681169827e-06, "loss": 0.9422, "step": 277910 }, { "epoch": 1.7755516655379937, "grad_norm": 0.7498250007629395, "learning_rate": 3.0943680555755583e-06, "loss": 0.7033, "step": 277920 }, { "epoch": 1.7756155526877324, "grad_norm": 1.4077019691467285, "learning_rate": 3.0926305154144296e-06, "loss": 1.0761, "step": 277930 }, { "epoch": 1.7756794398374711, "grad_norm": 2.1638362407684326, "learning_rate": 3.0908934476510888e-06, "loss": 0.9859, "step": 277940 }, { "epoch": 1.7757433269872098, "grad_norm": 0.9528393149375916, "learning_rate": 3.0891568523030377e-06, "loss": 0.8106, "step": 277950 }, { "epoch": 1.7758072141369485, "grad_norm": 1.0252223014831543, "learning_rate": 3.087420729387752e-06, "loss": 1.1796, "step": 277960 }, { "epoch": 1.7758711012866872, "grad_norm": 1.5917168855667114, "learning_rate": 3.0856850789227397e-06, "loss": 0.7109, "step": 277970 }, { "epoch": 1.775934988436426, "grad_norm": 1.0380032062530518, "learning_rate": 3.0839499009254534e-06, "loss": 0.9216, "step": 277980 }, { "epoch": 1.7759988755861646, "grad_norm": 1.0510272979736328, "learning_rate": 3.0822151954133848e-06, "loss": 0.8541, "step": 277990 }, { "epoch": 1.7760627627359034, "grad_norm": 1.1530609130859375, "learning_rate": 3.080480962403992e-06, "loss": 0.7698, "step": 278000 }, { "epoch": 1.7761266498856418, "grad_norm": 0.9889609813690186, "learning_rate": 3.0787472019147556e-06, "loss": 0.876, "step": 278010 }, { "epoch": 1.7761905370353808, "grad_norm": 0.921779453754425, "learning_rate": 3.0770139139631225e-06, "loss": 0.6693, "step": 278020 }, { "epoch": 1.7762544241851193, "grad_norm": 0.8533374667167664, "learning_rate": 3.075281098566557e-06, "loss": 0.8092, "step": 278030 }, { "epoch": 1.7763183113348582, "grad_norm": 1.26589035987854, "learning_rate": 3.0735487557425004e-06, "loss": 1.2583, "step": 278040 }, { "epoch": 1.7763821984845967, "grad_norm": 1.0811630487442017, "learning_rate": 3.071816885508416e-06, "loss": 0.6931, "step": 278050 }, { "epoch": 1.7764460856343356, "grad_norm": 0.6349918246269226, "learning_rate": 3.070085487881724e-06, "loss": 1.1791, "step": 278060 }, { "epoch": 1.776509972784074, "grad_norm": 0.718450665473938, "learning_rate": 3.068354562879877e-06, "loss": 0.8377, "step": 278070 }, { "epoch": 1.776573859933813, "grad_norm": 0.8059900999069214, "learning_rate": 3.0666241105202942e-06, "loss": 0.8405, "step": 278080 }, { "epoch": 1.7766377470835515, "grad_norm": 1.0362147092819214, "learning_rate": 3.0648941308204172e-06, "loss": 0.8976, "step": 278090 }, { "epoch": 1.7767016342332904, "grad_norm": 0.9528014063835144, "learning_rate": 3.063164623797665e-06, "loss": 0.6518, "step": 278100 }, { "epoch": 1.776765521383029, "grad_norm": 2.4749984741210938, "learning_rate": 3.061435589469441e-06, "loss": 0.9381, "step": 278110 }, { "epoch": 1.7768294085327678, "grad_norm": 0.8902806043624878, "learning_rate": 3.059707027853176e-06, "loss": 0.8247, "step": 278120 }, { "epoch": 1.7768932956825063, "grad_norm": 1.6432876586914062, "learning_rate": 3.0579789389662607e-06, "loss": 0.6879, "step": 278130 }, { "epoch": 1.7769571828322452, "grad_norm": 0.9981576800346375, "learning_rate": 3.0562513228261147e-06, "loss": 0.9707, "step": 278140 }, { "epoch": 1.7770210699819837, "grad_norm": 0.7556832432746887, "learning_rate": 3.0545241794501246e-06, "loss": 0.7934, "step": 278150 }, { "epoch": 1.7770849571317227, "grad_norm": 0.7431328892707825, "learning_rate": 3.052797508855698e-06, "loss": 0.6255, "step": 278160 }, { "epoch": 1.7771488442814611, "grad_norm": 1.2007827758789062, "learning_rate": 3.0510713110602106e-06, "loss": 0.7706, "step": 278170 }, { "epoch": 1.7772127314312, "grad_norm": 0.9197219014167786, "learning_rate": 3.049345586081054e-06, "loss": 0.7547, "step": 278180 }, { "epoch": 1.7772766185809385, "grad_norm": 0.6609572172164917, "learning_rate": 3.0476203339356023e-06, "loss": 0.6398, "step": 278190 }, { "epoch": 1.7773405057306775, "grad_norm": 0.6442859768867493, "learning_rate": 3.045895554641237e-06, "loss": 0.7506, "step": 278200 }, { "epoch": 1.777404392880416, "grad_norm": 1.3745826482772827, "learning_rate": 3.044171248215316e-06, "loss": 0.9476, "step": 278210 }, { "epoch": 1.777468280030155, "grad_norm": 1.1745824813842773, "learning_rate": 3.04244741467522e-06, "loss": 0.9133, "step": 278220 }, { "epoch": 1.7775321671798934, "grad_norm": 1.0461777448654175, "learning_rate": 3.040724054038302e-06, "loss": 0.9329, "step": 278230 }, { "epoch": 1.7775960543296323, "grad_norm": 1.0739800930023193, "learning_rate": 3.0390011663219198e-06, "loss": 0.7345, "step": 278240 }, { "epoch": 1.7776599414793708, "grad_norm": 0.7248935103416443, "learning_rate": 3.037278751543421e-06, "loss": 0.8388, "step": 278250 }, { "epoch": 1.7777238286291095, "grad_norm": 1.6017006635665894, "learning_rate": 3.0355568097201526e-06, "loss": 0.8607, "step": 278260 }, { "epoch": 1.7777877157788482, "grad_norm": 0.7197296023368835, "learning_rate": 3.033835340869462e-06, "loss": 0.7747, "step": 278270 }, { "epoch": 1.777851602928587, "grad_norm": 0.9725298285484314, "learning_rate": 3.032114345008674e-06, "loss": 1.0129, "step": 278280 }, { "epoch": 1.7779154900783256, "grad_norm": 0.7443470358848572, "learning_rate": 3.0303938221551363e-06, "loss": 0.7772, "step": 278290 }, { "epoch": 1.7779793772280643, "grad_norm": 1.0656518936157227, "learning_rate": 3.0286737723261573e-06, "loss": 1.0035, "step": 278300 }, { "epoch": 1.778043264377803, "grad_norm": 0.737565815448761, "learning_rate": 3.026954195539078e-06, "loss": 0.8194, "step": 278310 }, { "epoch": 1.7781071515275417, "grad_norm": 1.947318196296692, "learning_rate": 3.0252350918112015e-06, "loss": 1.1093, "step": 278320 }, { "epoch": 1.7781710386772804, "grad_norm": 0.6763631105422974, "learning_rate": 3.023516461159853e-06, "loss": 0.7545, "step": 278330 }, { "epoch": 1.7782349258270191, "grad_norm": 0.7558571100234985, "learning_rate": 3.0217983036023246e-06, "loss": 0.8406, "step": 278340 }, { "epoch": 1.7782988129767578, "grad_norm": 0.950883150100708, "learning_rate": 3.0200806191559407e-06, "loss": 0.8893, "step": 278350 }, { "epoch": 1.7783627001264966, "grad_norm": 0.9428911805152893, "learning_rate": 3.018363407837976e-06, "loss": 0.869, "step": 278360 }, { "epoch": 1.7784265872762353, "grad_norm": 0.6479962468147278, "learning_rate": 3.016646669665751e-06, "loss": 0.8571, "step": 278370 }, { "epoch": 1.778490474425974, "grad_norm": 1.2233079671859741, "learning_rate": 3.0149304046565296e-06, "loss": 0.902, "step": 278380 }, { "epoch": 1.7785543615757127, "grad_norm": 0.8563507199287415, "learning_rate": 3.013214612827614e-06, "loss": 0.7688, "step": 278390 }, { "epoch": 1.7786182487254514, "grad_norm": 0.8968834280967712, "learning_rate": 3.011499294196268e-06, "loss": 0.8853, "step": 278400 }, { "epoch": 1.77868213587519, "grad_norm": 0.4790927469730377, "learning_rate": 3.0097844487797733e-06, "loss": 0.7484, "step": 278410 }, { "epoch": 1.7787460230249288, "grad_norm": 0.8275429010391235, "learning_rate": 3.008070076595415e-06, "loss": 1.1507, "step": 278420 }, { "epoch": 1.7788099101746675, "grad_norm": 1.8453471660614014, "learning_rate": 3.0063561776604298e-06, "loss": 0.852, "step": 278430 }, { "epoch": 1.7788737973244062, "grad_norm": 0.7870879173278809, "learning_rate": 3.004642751992104e-06, "loss": 0.8445, "step": 278440 }, { "epoch": 1.778937684474145, "grad_norm": 0.8181558847427368, "learning_rate": 3.0029297996076787e-06, "loss": 0.993, "step": 278450 }, { "epoch": 1.7790015716238836, "grad_norm": 1.7123284339904785, "learning_rate": 3.0012173205244132e-06, "loss": 0.7388, "step": 278460 }, { "epoch": 1.7790654587736223, "grad_norm": 0.8432309031486511, "learning_rate": 2.999505314759543e-06, "loss": 1.0784, "step": 278470 }, { "epoch": 1.779129345923361, "grad_norm": 1.3005728721618652, "learning_rate": 2.9977937823303217e-06, "loss": 1.1719, "step": 278480 }, { "epoch": 1.7791932330730997, "grad_norm": 1.1980042457580566, "learning_rate": 2.9960827232539677e-06, "loss": 0.845, "step": 278490 }, { "epoch": 1.7792571202228382, "grad_norm": 1.6383336782455444, "learning_rate": 2.9943721375477408e-06, "loss": 0.9195, "step": 278500 }, { "epoch": 1.7793210073725771, "grad_norm": 0.8793671131134033, "learning_rate": 2.9926620252288373e-06, "loss": 0.7686, "step": 278510 }, { "epoch": 1.7793848945223156, "grad_norm": 0.8216747045516968, "learning_rate": 2.990952386314505e-06, "loss": 0.6514, "step": 278520 }, { "epoch": 1.7794487816720546, "grad_norm": 0.842990517616272, "learning_rate": 2.989243220821941e-06, "loss": 1.0126, "step": 278530 }, { "epoch": 1.779512668821793, "grad_norm": 1.0940358638763428, "learning_rate": 2.987534528768382e-06, "loss": 0.7461, "step": 278540 }, { "epoch": 1.779576555971532, "grad_norm": 1.65617036819458, "learning_rate": 2.985826310171008e-06, "loss": 1.0087, "step": 278550 }, { "epoch": 1.7796404431212705, "grad_norm": 0.6357586979866028, "learning_rate": 2.984118565047045e-06, "loss": 0.7344, "step": 278560 }, { "epoch": 1.7797043302710094, "grad_norm": 0.8728939890861511, "learning_rate": 2.9824112934136784e-06, "loss": 0.8424, "step": 278570 }, { "epoch": 1.7797682174207479, "grad_norm": 1.3085471391677856, "learning_rate": 2.9807044952881115e-06, "loss": 0.8914, "step": 278580 }, { "epoch": 1.7798321045704868, "grad_norm": 1.9346404075622559, "learning_rate": 2.978998170687525e-06, "loss": 0.6496, "step": 278590 }, { "epoch": 1.7798959917202253, "grad_norm": 1.123392939567566, "learning_rate": 2.977292319629099e-06, "loss": 1.0704, "step": 278600 }, { "epoch": 1.7799598788699642, "grad_norm": 1.1311415433883667, "learning_rate": 2.975586942130032e-06, "loss": 0.8, "step": 278610 }, { "epoch": 1.7800237660197027, "grad_norm": 0.8052006959915161, "learning_rate": 2.9738820382074704e-06, "loss": 0.8831, "step": 278620 }, { "epoch": 1.7800876531694416, "grad_norm": 1.8844988346099854, "learning_rate": 2.972177607878618e-06, "loss": 1.1519, "step": 278630 }, { "epoch": 1.78015154031918, "grad_norm": 1.1015048027038574, "learning_rate": 2.9704736511606047e-06, "loss": 0.7323, "step": 278640 }, { "epoch": 1.780215427468919, "grad_norm": 3.153304100036621, "learning_rate": 2.9687701680706227e-06, "loss": 1.3893, "step": 278650 }, { "epoch": 1.7802793146186575, "grad_norm": 1.0777071714401245, "learning_rate": 2.9670671586258024e-06, "loss": 0.8355, "step": 278660 }, { "epoch": 1.7803432017683964, "grad_norm": 0.8020691275596619, "learning_rate": 2.9653646228433143e-06, "loss": 0.8968, "step": 278670 }, { "epoch": 1.780407088918135, "grad_norm": 0.9972878098487854, "learning_rate": 2.9636625607402825e-06, "loss": 0.9588, "step": 278680 }, { "epoch": 1.7804709760678739, "grad_norm": 0.5217418670654297, "learning_rate": 2.961960972333877e-06, "loss": 0.8306, "step": 278690 }, { "epoch": 1.7805348632176123, "grad_norm": 0.9895110130310059, "learning_rate": 2.960259857641201e-06, "loss": 0.8076, "step": 278700 }, { "epoch": 1.7805987503673513, "grad_norm": 0.6926957964897156, "learning_rate": 2.9585592166794185e-06, "loss": 0.7323, "step": 278710 }, { "epoch": 1.7806626375170898, "grad_norm": 4.4439167976379395, "learning_rate": 2.9568590494656322e-06, "loss": 0.8698, "step": 278720 }, { "epoch": 1.7807265246668285, "grad_norm": 1.4048861265182495, "learning_rate": 2.9551593560169733e-06, "loss": 0.6555, "step": 278730 }, { "epoch": 1.7807904118165672, "grad_norm": 0.5778505802154541, "learning_rate": 2.9534601363505666e-06, "loss": 0.6811, "step": 278740 }, { "epoch": 1.7808542989663059, "grad_norm": 1.2079328298568726, "learning_rate": 2.9517613904835095e-06, "loss": 1.0647, "step": 278750 }, { "epoch": 1.7809181861160446, "grad_norm": 1.3561972379684448, "learning_rate": 2.950063118432922e-06, "loss": 0.9528, "step": 278760 }, { "epoch": 1.7809820732657833, "grad_norm": 1.4584667682647705, "learning_rate": 2.948365320215901e-06, "loss": 1.1817, "step": 278770 }, { "epoch": 1.781045960415522, "grad_norm": 0.6736270785331726, "learning_rate": 2.94666799584955e-06, "loss": 0.9713, "step": 278780 }, { "epoch": 1.7811098475652607, "grad_norm": 1.5708751678466797, "learning_rate": 2.94497114535095e-06, "loss": 0.7995, "step": 278790 }, { "epoch": 1.7811737347149994, "grad_norm": 0.9714763760566711, "learning_rate": 2.943274768737214e-06, "loss": 1.1868, "step": 278800 }, { "epoch": 1.781237621864738, "grad_norm": 1.4231477975845337, "learning_rate": 2.9415788660253963e-06, "loss": 0.9282, "step": 278810 }, { "epoch": 1.7813015090144768, "grad_norm": 0.867780864238739, "learning_rate": 2.939883437232599e-06, "loss": 0.8377, "step": 278820 }, { "epoch": 1.7813653961642155, "grad_norm": 0.6904677152633667, "learning_rate": 2.9381884823758866e-06, "loss": 0.8879, "step": 278830 }, { "epoch": 1.7814292833139542, "grad_norm": 0.8885203003883362, "learning_rate": 2.9364940014723343e-06, "loss": 0.7462, "step": 278840 }, { "epoch": 1.781493170463693, "grad_norm": 1.1848344802856445, "learning_rate": 2.9347999945389946e-06, "loss": 0.7568, "step": 278850 }, { "epoch": 1.7815570576134316, "grad_norm": 1.0793530941009521, "learning_rate": 2.9331064615929438e-06, "loss": 0.7066, "step": 278860 }, { "epoch": 1.7816209447631703, "grad_norm": 1.1547480821609497, "learning_rate": 2.931413402651223e-06, "loss": 0.7813, "step": 278870 }, { "epoch": 1.781684831912909, "grad_norm": 1.114449143409729, "learning_rate": 2.9297208177308964e-06, "loss": 0.7996, "step": 278880 }, { "epoch": 1.7817487190626478, "grad_norm": 1.191058874130249, "learning_rate": 2.9280287068489954e-06, "loss": 0.8922, "step": 278890 }, { "epoch": 1.7818126062123865, "grad_norm": 0.906446099281311, "learning_rate": 2.926337070022578e-06, "loss": 0.8086, "step": 278900 }, { "epoch": 1.7818764933621252, "grad_norm": 1.750216007232666, "learning_rate": 2.924645907268658e-06, "loss": 0.8135, "step": 278910 }, { "epoch": 1.7819403805118639, "grad_norm": 2.0710866451263428, "learning_rate": 2.9229552186042896e-06, "loss": 0.9289, "step": 278920 }, { "epoch": 1.7820042676616026, "grad_norm": 1.7744084596633911, "learning_rate": 2.921265004046486e-06, "loss": 0.867, "step": 278930 }, { "epoch": 1.7820681548113413, "grad_norm": 1.1938283443450928, "learning_rate": 2.9195752636122677e-06, "loss": 1.0544, "step": 278940 }, { "epoch": 1.78213204196108, "grad_norm": 0.7993178367614746, "learning_rate": 2.9178859973186703e-06, "loss": 0.8555, "step": 278950 }, { "epoch": 1.7821959291108187, "grad_norm": 0.5344235301017761, "learning_rate": 2.91619720518268e-06, "loss": 1.0545, "step": 278960 }, { "epoch": 1.7822598162605574, "grad_norm": 0.8354693651199341, "learning_rate": 2.9145088872213233e-06, "loss": 0.7915, "step": 278970 }, { "epoch": 1.7823237034102961, "grad_norm": 0.97170490026474, "learning_rate": 2.912821043451591e-06, "loss": 0.7246, "step": 278980 }, { "epoch": 1.7823875905600346, "grad_norm": 0.8123016953468323, "learning_rate": 2.911133673890498e-06, "loss": 0.8792, "step": 278990 }, { "epoch": 1.7824514777097735, "grad_norm": 1.0553772449493408, "learning_rate": 2.9094467785550193e-06, "loss": 0.8939, "step": 279000 }, { "epoch": 1.782515364859512, "grad_norm": 0.7460412383079529, "learning_rate": 2.9077603574621526e-06, "loss": 0.8926, "step": 279010 }, { "epoch": 1.782579252009251, "grad_norm": 0.870805025100708, "learning_rate": 2.906074410628873e-06, "loss": 0.8963, "step": 279020 }, { "epoch": 1.7826431391589894, "grad_norm": 1.2756316661834717, "learning_rate": 2.9043889380721778e-06, "loss": 0.9378, "step": 279030 }, { "epoch": 1.7827070263087283, "grad_norm": 1.367229700088501, "learning_rate": 2.9027039398090204e-06, "loss": 0.8562, "step": 279040 }, { "epoch": 1.7827709134584668, "grad_norm": 1.108358383178711, "learning_rate": 2.9010194158563876e-06, "loss": 0.7689, "step": 279050 }, { "epoch": 1.7828348006082058, "grad_norm": 1.223518967628479, "learning_rate": 2.8993353662312316e-06, "loss": 0.9621, "step": 279060 }, { "epoch": 1.7828986877579442, "grad_norm": 1.6448414325714111, "learning_rate": 2.897651790950512e-06, "loss": 1.0777, "step": 279070 }, { "epoch": 1.7829625749076832, "grad_norm": 1.6710528135299683, "learning_rate": 2.895968690031198e-06, "loss": 0.781, "step": 279080 }, { "epoch": 1.7830264620574217, "grad_norm": 1.0129505395889282, "learning_rate": 2.8942860634902202e-06, "loss": 0.8742, "step": 279090 }, { "epoch": 1.7830903492071606, "grad_norm": 1.2868965864181519, "learning_rate": 2.892603911344538e-06, "loss": 0.8708, "step": 279100 }, { "epoch": 1.783154236356899, "grad_norm": 0.8371222019195557, "learning_rate": 2.890922233611082e-06, "loss": 0.804, "step": 279110 }, { "epoch": 1.783218123506638, "grad_norm": 1.1141200065612793, "learning_rate": 2.8892410303068053e-06, "loss": 0.8911, "step": 279120 }, { "epoch": 1.7832820106563765, "grad_norm": 1.4201537370681763, "learning_rate": 2.887560301448622e-06, "loss": 0.6672, "step": 279130 }, { "epoch": 1.7833458978061154, "grad_norm": 0.7168004512786865, "learning_rate": 2.8858800470534684e-06, "loss": 0.8023, "step": 279140 }, { "epoch": 1.783409784955854, "grad_norm": 1.1202197074890137, "learning_rate": 2.884200267138254e-06, "loss": 0.8567, "step": 279150 }, { "epoch": 1.7834736721055928, "grad_norm": 0.7139795422554016, "learning_rate": 2.882520961719909e-06, "loss": 0.9578, "step": 279160 }, { "epoch": 1.7835375592553313, "grad_norm": 0.8012449145317078, "learning_rate": 2.8808421308153367e-06, "loss": 0.7102, "step": 279170 }, { "epoch": 1.7836014464050702, "grad_norm": 1.4097734689712524, "learning_rate": 2.8791637744414566e-06, "loss": 1.0442, "step": 279180 }, { "epoch": 1.7836653335548087, "grad_norm": 0.6851403117179871, "learning_rate": 2.877485892615156e-06, "loss": 0.8663, "step": 279190 }, { "epoch": 1.7837292207045476, "grad_norm": 1.378808617591858, "learning_rate": 2.875808485353343e-06, "loss": 1.0306, "step": 279200 }, { "epoch": 1.7837931078542861, "grad_norm": 1.5122627019882202, "learning_rate": 2.8741315526728985e-06, "loss": 0.8163, "step": 279210 }, { "epoch": 1.7838569950040248, "grad_norm": 1.231703519821167, "learning_rate": 2.8724550945907313e-06, "loss": 0.8521, "step": 279220 }, { "epoch": 1.7839208821537635, "grad_norm": 0.7119479179382324, "learning_rate": 2.8707791111237005e-06, "loss": 0.9475, "step": 279230 }, { "epoch": 1.7839847693035022, "grad_norm": 1.721937656402588, "learning_rate": 2.8691036022887086e-06, "loss": 0.9926, "step": 279240 }, { "epoch": 1.784048656453241, "grad_norm": 1.1561145782470703, "learning_rate": 2.867428568102604e-06, "loss": 0.8603, "step": 279250 }, { "epoch": 1.7841125436029797, "grad_norm": 0.8052932620048523, "learning_rate": 2.8657540085822722e-06, "loss": 0.9676, "step": 279260 }, { "epoch": 1.7841764307527184, "grad_norm": 0.6737070083618164, "learning_rate": 2.864079923744584e-06, "loss": 0.7396, "step": 279270 }, { "epoch": 1.784240317902457, "grad_norm": 0.632921040058136, "learning_rate": 2.8624063136063805e-06, "loss": 0.7189, "step": 279280 }, { "epoch": 1.7843042050521958, "grad_norm": 0.8166862726211548, "learning_rate": 2.8607331781845328e-06, "loss": 0.7994, "step": 279290 }, { "epoch": 1.7843680922019345, "grad_norm": 3.0488243103027344, "learning_rate": 2.8590605174958706e-06, "loss": 0.7269, "step": 279300 }, { "epoch": 1.7844319793516732, "grad_norm": 1.1048704385757446, "learning_rate": 2.8573883315572647e-06, "loss": 1.0499, "step": 279310 }, { "epoch": 1.784495866501412, "grad_norm": 1.0506843328475952, "learning_rate": 2.8557166203855344e-06, "loss": 1.1301, "step": 279320 }, { "epoch": 1.7845597536511506, "grad_norm": 1.0728496313095093, "learning_rate": 2.8540453839975334e-06, "loss": 0.8735, "step": 279330 }, { "epoch": 1.7846236408008893, "grad_norm": 0.7642419338226318, "learning_rate": 2.852541677202314e-06, "loss": 1.0205, "step": 279340 }, { "epoch": 1.784687527950628, "grad_norm": 0.9445726275444031, "learning_rate": 2.85087134294974e-06, "loss": 0.6283, "step": 279350 }, { "epoch": 1.7847514151003667, "grad_norm": 1.404316782951355, "learning_rate": 2.8492014835296787e-06, "loss": 0.9088, "step": 279360 }, { "epoch": 1.7848153022501054, "grad_norm": 0.8889473080635071, "learning_rate": 2.847532098958955e-06, "loss": 0.6324, "step": 279370 }, { "epoch": 1.7848791893998441, "grad_norm": 1.1736335754394531, "learning_rate": 2.8458631892543665e-06, "loss": 0.7437, "step": 279380 }, { "epoch": 1.7849430765495828, "grad_norm": 1.3474094867706299, "learning_rate": 2.8441947544327276e-06, "loss": 0.8552, "step": 279390 }, { "epoch": 1.7850069636993215, "grad_norm": 0.7636286616325378, "learning_rate": 2.842526794510858e-06, "loss": 0.7877, "step": 279400 }, { "epoch": 1.7850708508490603, "grad_norm": 1.1588830947875977, "learning_rate": 2.840859309505528e-06, "loss": 0.8884, "step": 279410 }, { "epoch": 1.785134737998799, "grad_norm": 0.8181111812591553, "learning_rate": 2.8391922994335517e-06, "loss": 0.9266, "step": 279420 }, { "epoch": 1.7851986251485377, "grad_norm": 2.1087288856506348, "learning_rate": 2.8375257643116995e-06, "loss": 0.7319, "step": 279430 }, { "epoch": 1.7852625122982764, "grad_norm": 0.7751528024673462, "learning_rate": 2.8358597041567737e-06, "loss": 0.5595, "step": 279440 }, { "epoch": 1.785326399448015, "grad_norm": 0.7834802269935608, "learning_rate": 2.834194118985534e-06, "loss": 0.654, "step": 279450 }, { "epoch": 1.7853902865977536, "grad_norm": 1.1989582777023315, "learning_rate": 2.8325290088147718e-06, "loss": 1.0158, "step": 279460 }, { "epoch": 1.7854541737474925, "grad_norm": 0.7740297913551331, "learning_rate": 2.830864373661246e-06, "loss": 1.0383, "step": 279470 }, { "epoch": 1.785518060897231, "grad_norm": 0.8387667536735535, "learning_rate": 2.829200213541722e-06, "loss": 0.9114, "step": 279480 }, { "epoch": 1.78558194804697, "grad_norm": 0.8275271058082581, "learning_rate": 2.8275365284729573e-06, "loss": 0.9103, "step": 279490 }, { "epoch": 1.7856458351967084, "grad_norm": 0.7638615965843201, "learning_rate": 2.8258733184717168e-06, "loss": 1.0611, "step": 279500 }, { "epoch": 1.7857097223464473, "grad_norm": 0.6845916509628296, "learning_rate": 2.824210583554737e-06, "loss": 0.6659, "step": 279510 }, { "epoch": 1.7857736094961858, "grad_norm": 0.6819515824317932, "learning_rate": 2.822548323738777e-06, "loss": 0.8027, "step": 279520 }, { "epoch": 1.7858374966459247, "grad_norm": 0.7219198942184448, "learning_rate": 2.8208865390405627e-06, "loss": 0.7542, "step": 279530 }, { "epoch": 1.7859013837956632, "grad_norm": 1.013564944267273, "learning_rate": 2.8192252294768406e-06, "loss": 1.0698, "step": 279540 }, { "epoch": 1.7859652709454021, "grad_norm": 1.280639886856079, "learning_rate": 2.8175643950643314e-06, "loss": 0.7745, "step": 279550 }, { "epoch": 1.7860291580951406, "grad_norm": 3.118612289428711, "learning_rate": 2.8159040358197775e-06, "loss": 0.8694, "step": 279560 }, { "epoch": 1.7860930452448796, "grad_norm": 1.6748183965682983, "learning_rate": 2.8142441517598816e-06, "loss": 0.9532, "step": 279570 }, { "epoch": 1.786156932394618, "grad_norm": 1.1805542707443237, "learning_rate": 2.812584742901364e-06, "loss": 0.7383, "step": 279580 }, { "epoch": 1.786220819544357, "grad_norm": 1.7524734735488892, "learning_rate": 2.810925809260956e-06, "loss": 1.2987, "step": 279590 }, { "epoch": 1.7862847066940954, "grad_norm": 0.9927526116371155, "learning_rate": 2.809267350855338e-06, "loss": 1.0647, "step": 279600 }, { "epoch": 1.7863485938438344, "grad_norm": 0.8447758555412292, "learning_rate": 2.8076093677012304e-06, "loss": 0.9666, "step": 279610 }, { "epoch": 1.7864124809935729, "grad_norm": 0.811472475528717, "learning_rate": 2.8059518598153144e-06, "loss": 0.8278, "step": 279620 }, { "epoch": 1.7864763681433118, "grad_norm": 0.9613576531410217, "learning_rate": 2.8042948272143044e-06, "loss": 0.8291, "step": 279630 }, { "epoch": 1.7865402552930503, "grad_norm": 1.0779619216918945, "learning_rate": 2.8026382699148645e-06, "loss": 0.8296, "step": 279640 }, { "epoch": 1.7866041424427892, "grad_norm": 1.7146506309509277, "learning_rate": 2.800982187933698e-06, "loss": 0.9163, "step": 279650 }, { "epoch": 1.7866680295925277, "grad_norm": 1.0744938850402832, "learning_rate": 2.7993265812874646e-06, "loss": 0.7879, "step": 279660 }, { "epoch": 1.7867319167422666, "grad_norm": 1.2424300909042358, "learning_rate": 2.7976714499928556e-06, "loss": 0.8617, "step": 279670 }, { "epoch": 1.786795803892005, "grad_norm": 0.732014536857605, "learning_rate": 2.796016794066525e-06, "loss": 0.9629, "step": 279680 }, { "epoch": 1.786859691041744, "grad_norm": 1.452577829360962, "learning_rate": 2.794362613525148e-06, "loss": 1.0294, "step": 279690 }, { "epoch": 1.7869235781914825, "grad_norm": 0.8016159534454346, "learning_rate": 2.7927089083853776e-06, "loss": 0.7991, "step": 279700 }, { "epoch": 1.7869874653412212, "grad_norm": 0.7308264970779419, "learning_rate": 2.791055678663862e-06, "loss": 0.8295, "step": 279710 }, { "epoch": 1.78705135249096, "grad_norm": 0.6703952550888062, "learning_rate": 2.789402924377266e-06, "loss": 0.8897, "step": 279720 }, { "epoch": 1.7871152396406986, "grad_norm": 0.7939437627792358, "learning_rate": 2.7877506455422144e-06, "loss": 0.8806, "step": 279730 }, { "epoch": 1.7871791267904373, "grad_norm": 0.9304759502410889, "learning_rate": 2.786098842175372e-06, "loss": 0.8723, "step": 279740 }, { "epoch": 1.787243013940176, "grad_norm": 1.3307108879089355, "learning_rate": 2.784447514293348e-06, "loss": 0.8925, "step": 279750 }, { "epoch": 1.7873069010899147, "grad_norm": 2.4299263954162598, "learning_rate": 2.7827966619127897e-06, "loss": 0.8418, "step": 279760 }, { "epoch": 1.7873707882396535, "grad_norm": 0.7929475903511047, "learning_rate": 2.781146285050318e-06, "loss": 0.9435, "step": 279770 }, { "epoch": 1.7874346753893922, "grad_norm": 1.3617475032806396, "learning_rate": 2.7794963837225576e-06, "loss": 0.6976, "step": 279780 }, { "epoch": 1.7874985625391309, "grad_norm": 1.198539137840271, "learning_rate": 2.7778469579461065e-06, "loss": 0.9228, "step": 279790 }, { "epoch": 1.7875624496888696, "grad_norm": 0.852506697177887, "learning_rate": 2.7761980077376017e-06, "loss": 0.7974, "step": 279800 }, { "epoch": 1.7876263368386083, "grad_norm": 1.110378384590149, "learning_rate": 2.7745495331136353e-06, "loss": 0.7613, "step": 279810 }, { "epoch": 1.787690223988347, "grad_norm": 0.4857124090194702, "learning_rate": 2.7729015340908104e-06, "loss": 0.633, "step": 279820 }, { "epoch": 1.7877541111380857, "grad_norm": 1.0407607555389404, "learning_rate": 2.77125401068572e-06, "loss": 0.8661, "step": 279830 }, { "epoch": 1.7878179982878244, "grad_norm": 0.7220622897148132, "learning_rate": 2.769606962914967e-06, "loss": 0.6934, "step": 279840 }, { "epoch": 1.787881885437563, "grad_norm": 0.8035644888877869, "learning_rate": 2.767960390795127e-06, "loss": 1.0495, "step": 279850 }, { "epoch": 1.7879457725873018, "grad_norm": 0.9362843632698059, "learning_rate": 2.766314294342792e-06, "loss": 0.9978, "step": 279860 }, { "epoch": 1.7880096597370405, "grad_norm": 3.5854671001434326, "learning_rate": 2.7646686735745274e-06, "loss": 0.938, "step": 279870 }, { "epoch": 1.7880735468867792, "grad_norm": 0.7580956220626831, "learning_rate": 2.763023528506925e-06, "loss": 0.933, "step": 279880 }, { "epoch": 1.788137434036518, "grad_norm": 1.3497252464294434, "learning_rate": 2.761378859156527e-06, "loss": 0.99, "step": 279890 }, { "epoch": 1.7882013211862566, "grad_norm": 1.0116132497787476, "learning_rate": 2.7597346655399258e-06, "loss": 0.6312, "step": 279900 }, { "epoch": 1.7882652083359953, "grad_norm": 0.81215500831604, "learning_rate": 2.7580909476736527e-06, "loss": 0.8248, "step": 279910 }, { "epoch": 1.788329095485734, "grad_norm": 1.3211039304733276, "learning_rate": 2.7564477055742775e-06, "loss": 0.8918, "step": 279920 }, { "epoch": 1.7883929826354727, "grad_norm": 0.8589543104171753, "learning_rate": 2.7548049392583485e-06, "loss": 0.7415, "step": 279930 }, { "epoch": 1.7884568697852115, "grad_norm": 0.7645835876464844, "learning_rate": 2.753162648742402e-06, "loss": 0.7322, "step": 279940 }, { "epoch": 1.78852075693495, "grad_norm": 0.8146962523460388, "learning_rate": 2.7515208340429922e-06, "loss": 0.8259, "step": 279950 }, { "epoch": 1.7885846440846889, "grad_norm": 0.9893962144851685, "learning_rate": 2.7498794951766326e-06, "loss": 0.7888, "step": 279960 }, { "epoch": 1.7886485312344274, "grad_norm": 1.0243322849273682, "learning_rate": 2.7482386321598717e-06, "loss": 1.0943, "step": 279970 }, { "epoch": 1.7887124183841663, "grad_norm": 1.2842010259628296, "learning_rate": 2.7465982450092187e-06, "loss": 1.1016, "step": 279980 }, { "epoch": 1.7887763055339048, "grad_norm": 0.7452923655509949, "learning_rate": 2.74495833374121e-06, "loss": 0.7953, "step": 279990 }, { "epoch": 1.7888401926836437, "grad_norm": 0.8941696286201477, "learning_rate": 2.7433188983723436e-06, "loss": 0.7354, "step": 280000 }, { "epoch": 1.7889040798333822, "grad_norm": 0.608316957950592, "learning_rate": 2.741679938919145e-06, "loss": 0.7913, "step": 280010 }, { "epoch": 1.788967966983121, "grad_norm": 0.7802823185920715, "learning_rate": 2.740041455398107e-06, "loss": 0.8159, "step": 280020 }, { "epoch": 1.7890318541328596, "grad_norm": 1.0596716403961182, "learning_rate": 2.738403447825744e-06, "loss": 1.2073, "step": 280030 }, { "epoch": 1.7890957412825985, "grad_norm": 1.100813388824463, "learning_rate": 2.7367659162185424e-06, "loss": 0.7146, "step": 280040 }, { "epoch": 1.789159628432337, "grad_norm": 0.778558075428009, "learning_rate": 2.7351288605930005e-06, "loss": 0.9001, "step": 280050 }, { "epoch": 1.789223515582076, "grad_norm": 0.989571750164032, "learning_rate": 2.733492280965594e-06, "loss": 0.7467, "step": 280060 }, { "epoch": 1.7892874027318144, "grad_norm": 0.946502685546875, "learning_rate": 2.7318561773528153e-06, "loss": 1.2593, "step": 280070 }, { "epoch": 1.7893512898815533, "grad_norm": 0.8070095181465149, "learning_rate": 2.730220549771134e-06, "loss": 0.8182, "step": 280080 }, { "epoch": 1.7894151770312918, "grad_norm": 1.299543023109436, "learning_rate": 2.728585398237032e-06, "loss": 0.7447, "step": 280090 }, { "epoch": 1.7894790641810308, "grad_norm": 1.6871373653411865, "learning_rate": 2.726950722766969e-06, "loss": 0.921, "step": 280100 }, { "epoch": 1.7895429513307692, "grad_norm": 0.7189459204673767, "learning_rate": 2.7253165233774026e-06, "loss": 0.8707, "step": 280110 }, { "epoch": 1.7896068384805082, "grad_norm": 1.1018993854522705, "learning_rate": 2.7236828000848034e-06, "loss": 0.8479, "step": 280120 }, { "epoch": 1.7896707256302467, "grad_norm": 1.2004035711288452, "learning_rate": 2.7220495529056143e-06, "loss": 0.848, "step": 280130 }, { "epoch": 1.7897346127799856, "grad_norm": 1.2776861190795898, "learning_rate": 2.7204167818562944e-06, "loss": 0.9096, "step": 280140 }, { "epoch": 1.789798499929724, "grad_norm": 0.7575962543487549, "learning_rate": 2.718784486953274e-06, "loss": 0.8194, "step": 280150 }, { "epoch": 1.789862387079463, "grad_norm": 0.7639335989952087, "learning_rate": 2.717152668213008e-06, "loss": 0.8457, "step": 280160 }, { "epoch": 1.7899262742292015, "grad_norm": 1.0216253995895386, "learning_rate": 2.715521325651904e-06, "loss": 0.7565, "step": 280170 }, { "epoch": 1.7899901613789404, "grad_norm": 1.1266084909439087, "learning_rate": 2.7138904592864278e-06, "loss": 1.0067, "step": 280180 }, { "epoch": 1.7900540485286789, "grad_norm": 1.3970292806625366, "learning_rate": 2.7122600691329657e-06, "loss": 0.8546, "step": 280190 }, { "epoch": 1.7901179356784176, "grad_norm": 0.7054768204689026, "learning_rate": 2.7106301552079662e-06, "loss": 0.7755, "step": 280200 }, { "epoch": 1.7901818228281563, "grad_norm": 0.8589091897010803, "learning_rate": 2.7090007175278207e-06, "loss": 0.85, "step": 280210 }, { "epoch": 1.790245709977895, "grad_norm": 0.6626318693161011, "learning_rate": 2.7073717561089616e-06, "loss": 0.69, "step": 280220 }, { "epoch": 1.7903095971276337, "grad_norm": 0.8320964574813843, "learning_rate": 2.70574327096777e-06, "loss": 0.9992, "step": 280230 }, { "epoch": 1.7903734842773724, "grad_norm": 0.9648492932319641, "learning_rate": 2.7041152621206655e-06, "loss": 1.0147, "step": 280240 }, { "epoch": 1.7904373714271111, "grad_norm": 1.173795223236084, "learning_rate": 2.7024877295840413e-06, "loss": 0.8556, "step": 280250 }, { "epoch": 1.7905012585768498, "grad_norm": 0.774719774723053, "learning_rate": 2.7008606733742723e-06, "loss": 0.7414, "step": 280260 }, { "epoch": 1.7905651457265885, "grad_norm": 1.7096000909805298, "learning_rate": 2.6992340935077685e-06, "loss": 0.7764, "step": 280270 }, { "epoch": 1.7906290328763272, "grad_norm": 1.836374282836914, "learning_rate": 2.6976079900008887e-06, "loss": 0.8475, "step": 280280 }, { "epoch": 1.790692920026066, "grad_norm": 0.7114518284797668, "learning_rate": 2.6959823628700255e-06, "loss": 0.7563, "step": 280290 }, { "epoch": 1.7907568071758047, "grad_norm": 0.8420717716217041, "learning_rate": 2.694357212131543e-06, "loss": 0.815, "step": 280300 }, { "epoch": 1.7908206943255434, "grad_norm": 0.8970429301261902, "learning_rate": 2.6927325378018063e-06, "loss": 0.7784, "step": 280310 }, { "epoch": 1.790884581475282, "grad_norm": 1.0795053243637085, "learning_rate": 2.6911083398971805e-06, "loss": 1.127, "step": 280320 }, { "epoch": 1.7909484686250208, "grad_norm": 1.1815121173858643, "learning_rate": 2.689484618434024e-06, "loss": 1.085, "step": 280330 }, { "epoch": 1.7910123557747595, "grad_norm": 0.815788745880127, "learning_rate": 2.6878613734286797e-06, "loss": 0.8834, "step": 280340 }, { "epoch": 1.7910762429244982, "grad_norm": 0.9623815417289734, "learning_rate": 2.686238604897512e-06, "loss": 1.0261, "step": 280350 }, { "epoch": 1.791140130074237, "grad_norm": 0.9388995170593262, "learning_rate": 2.684616312856841e-06, "loss": 0.7752, "step": 280360 }, { "epoch": 1.7912040172239756, "grad_norm": 1.041695475578308, "learning_rate": 2.6829944973230323e-06, "loss": 0.8244, "step": 280370 }, { "epoch": 1.7912679043737143, "grad_norm": 1.519972324371338, "learning_rate": 2.6813731583123948e-06, "loss": 1.1163, "step": 280380 }, { "epoch": 1.791331791523453, "grad_norm": 1.1777002811431885, "learning_rate": 2.67975229584127e-06, "loss": 1.0122, "step": 280390 }, { "epoch": 1.7913956786731917, "grad_norm": 1.1681914329528809, "learning_rate": 2.6781319099259737e-06, "loss": 0.9996, "step": 280400 }, { "epoch": 1.7914595658229304, "grad_norm": 0.7550179362297058, "learning_rate": 2.6765120005828315e-06, "loss": 0.8414, "step": 280410 }, { "epoch": 1.7915234529726691, "grad_norm": 1.111717939376831, "learning_rate": 2.6748925678281465e-06, "loss": 0.6463, "step": 280420 }, { "epoch": 1.7915873401224078, "grad_norm": 1.5783500671386719, "learning_rate": 2.6732736116782396e-06, "loss": 0.712, "step": 280430 }, { "epoch": 1.7916512272721463, "grad_norm": 1.1098181009292603, "learning_rate": 2.67165513214942e-06, "loss": 1.0321, "step": 280440 }, { "epoch": 1.7917151144218852, "grad_norm": 0.6833020448684692, "learning_rate": 2.6700371292579628e-06, "loss": 0.7637, "step": 280450 }, { "epoch": 1.7917790015716237, "grad_norm": 0.8996888995170593, "learning_rate": 2.6684196030201892e-06, "loss": 0.9861, "step": 280460 }, { "epoch": 1.7918428887213627, "grad_norm": 1.858432412147522, "learning_rate": 2.6668025534523743e-06, "loss": 0.967, "step": 280470 }, { "epoch": 1.7919067758711011, "grad_norm": 0.7341635227203369, "learning_rate": 2.665185980570811e-06, "loss": 0.8536, "step": 280480 }, { "epoch": 1.79197066302084, "grad_norm": 1.4962414503097534, "learning_rate": 2.6635698843917644e-06, "loss": 0.8061, "step": 280490 }, { "epoch": 1.7920345501705786, "grad_norm": 1.0739679336547852, "learning_rate": 2.6619542649315323e-06, "loss": 0.7118, "step": 280500 }, { "epoch": 1.7920984373203175, "grad_norm": 0.8998137712478638, "learning_rate": 2.6603391222063677e-06, "loss": 0.8174, "step": 280510 }, { "epoch": 1.792162324470056, "grad_norm": 0.8208849430084229, "learning_rate": 2.6587244562325476e-06, "loss": 1.0456, "step": 280520 }, { "epoch": 1.792226211619795, "grad_norm": 0.8737877011299133, "learning_rate": 2.657110267026325e-06, "loss": 0.8667, "step": 280530 }, { "epoch": 1.7922900987695334, "grad_norm": 2.8004097938537598, "learning_rate": 2.655496554603959e-06, "loss": 1.0523, "step": 280540 }, { "epoch": 1.7923539859192723, "grad_norm": 1.1986647844314575, "learning_rate": 2.6538833189817035e-06, "loss": 1.1346, "step": 280550 }, { "epoch": 1.7924178730690108, "grad_norm": 0.9740709662437439, "learning_rate": 2.652270560175801e-06, "loss": 0.8708, "step": 280560 }, { "epoch": 1.7924817602187497, "grad_norm": 0.835901141166687, "learning_rate": 2.6506582782024946e-06, "loss": 0.7152, "step": 280570 }, { "epoch": 1.7925456473684882, "grad_norm": 0.7605105638504028, "learning_rate": 2.6490464730780264e-06, "loss": 0.9103, "step": 280580 }, { "epoch": 1.7926095345182271, "grad_norm": 0.8529285192489624, "learning_rate": 2.647435144818622e-06, "loss": 0.9013, "step": 280590 }, { "epoch": 1.7926734216679656, "grad_norm": 0.9947511553764343, "learning_rate": 2.645824293440513e-06, "loss": 0.866, "step": 280600 }, { "epoch": 1.7927373088177045, "grad_norm": 0.9029473066329956, "learning_rate": 2.6442139189599203e-06, "loss": 0.9818, "step": 280610 }, { "epoch": 1.792801195967443, "grad_norm": 0.9523748755455017, "learning_rate": 2.6426040213930635e-06, "loss": 0.9322, "step": 280620 }, { "epoch": 1.792865083117182, "grad_norm": 0.8351441621780396, "learning_rate": 2.640994600756147e-06, "loss": 1.008, "step": 280630 }, { "epoch": 1.7929289702669204, "grad_norm": 1.6545028686523438, "learning_rate": 2.6393856570653906e-06, "loss": 1.0136, "step": 280640 }, { "epoch": 1.7929928574166594, "grad_norm": 0.8904891014099121, "learning_rate": 2.637777190336993e-06, "loss": 0.8758, "step": 280650 }, { "epoch": 1.7930567445663979, "grad_norm": 1.1041239500045776, "learning_rate": 2.6361692005871517e-06, "loss": 0.7892, "step": 280660 }, { "epoch": 1.7931206317161368, "grad_norm": 1.135854721069336, "learning_rate": 2.6345616878320767e-06, "loss": 1.1031, "step": 280670 }, { "epoch": 1.7931845188658753, "grad_norm": 0.6659204959869385, "learning_rate": 2.6329546520879265e-06, "loss": 1.0759, "step": 280680 }, { "epoch": 1.793248406015614, "grad_norm": 1.7435749769210815, "learning_rate": 2.631348093370911e-06, "loss": 0.9217, "step": 280690 }, { "epoch": 1.7933122931653527, "grad_norm": 2.084075689315796, "learning_rate": 2.6297420116971895e-06, "loss": 1.0404, "step": 280700 }, { "epoch": 1.7933761803150914, "grad_norm": 1.4105054140090942, "learning_rate": 2.628136407082954e-06, "loss": 0.8211, "step": 280710 }, { "epoch": 1.79344006746483, "grad_norm": 1.0215950012207031, "learning_rate": 2.626531279544364e-06, "loss": 1.0964, "step": 280720 }, { "epoch": 1.7935039546145688, "grad_norm": 0.8240851759910583, "learning_rate": 2.6249266290975905e-06, "loss": 0.8405, "step": 280730 }, { "epoch": 1.7935678417643075, "grad_norm": 0.5688413381576538, "learning_rate": 2.6233224557587867e-06, "loss": 1.1805, "step": 280740 }, { "epoch": 1.7936317289140462, "grad_norm": 5.479889392852783, "learning_rate": 2.6217187595441172e-06, "loss": 1.1006, "step": 280750 }, { "epoch": 1.793695616063785, "grad_norm": 0.7729905247688293, "learning_rate": 2.6201155404697253e-06, "loss": 0.7942, "step": 280760 }, { "epoch": 1.7937595032135236, "grad_norm": 1.061532735824585, "learning_rate": 2.618512798551753e-06, "loss": 0.9488, "step": 280770 }, { "epoch": 1.7938233903632623, "grad_norm": 0.8962979316711426, "learning_rate": 2.6169105338063604e-06, "loss": 0.8684, "step": 280780 }, { "epoch": 1.793887277513001, "grad_norm": 1.4239200353622437, "learning_rate": 2.6153087462496616e-06, "loss": 0.732, "step": 280790 }, { "epoch": 1.7939511646627397, "grad_norm": 1.5325372219085693, "learning_rate": 2.6137074358978054e-06, "loss": 0.8815, "step": 280800 }, { "epoch": 1.7940150518124784, "grad_norm": 1.595062494277954, "learning_rate": 2.6121066027669006e-06, "loss": 0.9914, "step": 280810 }, { "epoch": 1.7940789389622172, "grad_norm": 0.9620861411094666, "learning_rate": 2.61050624687309e-06, "loss": 1.1952, "step": 280820 }, { "epoch": 1.7941428261119559, "grad_norm": 1.8029391765594482, "learning_rate": 2.6089063682324664e-06, "loss": 1.0367, "step": 280830 }, { "epoch": 1.7942067132616946, "grad_norm": 0.8456335663795471, "learning_rate": 2.607306966861167e-06, "loss": 1.0241, "step": 280840 }, { "epoch": 1.7942706004114333, "grad_norm": 0.8139656782150269, "learning_rate": 2.6057080427752787e-06, "loss": 0.954, "step": 280850 }, { "epoch": 1.794334487561172, "grad_norm": 0.5983510613441467, "learning_rate": 2.6041095959909167e-06, "loss": 0.9332, "step": 280860 }, { "epoch": 1.7943983747109107, "grad_norm": 1.8724719285964966, "learning_rate": 2.602511626524168e-06, "loss": 0.7195, "step": 280870 }, { "epoch": 1.7944622618606494, "grad_norm": 1.7823671102523804, "learning_rate": 2.6009141343911414e-06, "loss": 1.058, "step": 280880 }, { "epoch": 1.794526149010388, "grad_norm": 1.1335912942886353, "learning_rate": 2.599317119607908e-06, "loss": 0.7466, "step": 280890 }, { "epoch": 1.7945900361601268, "grad_norm": 0.8973339200019836, "learning_rate": 2.597720582190566e-06, "loss": 0.825, "step": 280900 }, { "epoch": 1.7946539233098655, "grad_norm": 0.9591314792633057, "learning_rate": 2.5961245221551746e-06, "loss": 0.9578, "step": 280910 }, { "epoch": 1.7947178104596042, "grad_norm": 1.0243194103240967, "learning_rate": 2.594528939517832e-06, "loss": 0.6736, "step": 280920 }, { "epoch": 1.7947816976093427, "grad_norm": 0.5387315154075623, "learning_rate": 2.5929338342945865e-06, "loss": 0.9586, "step": 280930 }, { "epoch": 1.7948455847590816, "grad_norm": 0.982440173625946, "learning_rate": 2.591339206501514e-06, "loss": 0.7208, "step": 280940 }, { "epoch": 1.79490947190882, "grad_norm": 1.2296168804168701, "learning_rate": 2.589745056154669e-06, "loss": 0.8387, "step": 280950 }, { "epoch": 1.794973359058559, "grad_norm": 1.1976443529129028, "learning_rate": 2.5881513832701044e-06, "loss": 0.9695, "step": 280960 }, { "epoch": 1.7950372462082975, "grad_norm": 1.1437692642211914, "learning_rate": 2.5865581878638745e-06, "loss": 0.6054, "step": 280970 }, { "epoch": 1.7951011333580364, "grad_norm": 0.8556347489356995, "learning_rate": 2.584965469952022e-06, "loss": 0.9218, "step": 280980 }, { "epoch": 1.795165020507775, "grad_norm": 0.7509362101554871, "learning_rate": 2.5833732295505895e-06, "loss": 0.9018, "step": 280990 }, { "epoch": 1.7952289076575139, "grad_norm": 0.9571146368980408, "learning_rate": 2.5817814666756035e-06, "loss": 0.7376, "step": 281000 }, { "epoch": 1.7952927948072523, "grad_norm": 1.0304933786392212, "learning_rate": 2.5801901813431116e-06, "loss": 1.0329, "step": 281010 }, { "epoch": 1.7953566819569913, "grad_norm": 0.7072136402130127, "learning_rate": 2.5785993735691184e-06, "loss": 0.644, "step": 281020 }, { "epoch": 1.7954205691067298, "grad_norm": 1.249049186706543, "learning_rate": 2.5770090433696604e-06, "loss": 1.0775, "step": 281030 }, { "epoch": 1.7954844562564687, "grad_norm": 1.0728756189346313, "learning_rate": 2.5754191907607473e-06, "loss": 1.0548, "step": 281040 }, { "epoch": 1.7955483434062072, "grad_norm": 0.8451454043388367, "learning_rate": 2.5738298157583885e-06, "loss": 0.9135, "step": 281050 }, { "epoch": 1.795612230555946, "grad_norm": 2.4911673069000244, "learning_rate": 2.5722409183785933e-06, "loss": 0.9373, "step": 281060 }, { "epoch": 1.7956761177056846, "grad_norm": 0.8939559459686279, "learning_rate": 2.570652498637366e-06, "loss": 0.8767, "step": 281070 }, { "epoch": 1.7957400048554235, "grad_norm": 5.280501365661621, "learning_rate": 2.5690645565506987e-06, "loss": 0.8534, "step": 281080 }, { "epoch": 1.795803892005162, "grad_norm": 0.7299918532371521, "learning_rate": 2.567477092134585e-06, "loss": 0.7935, "step": 281090 }, { "epoch": 1.795867779154901, "grad_norm": 0.9297654032707214, "learning_rate": 2.565890105405011e-06, "loss": 0.7796, "step": 281100 }, { "epoch": 1.7959316663046394, "grad_norm": 1.0455909967422485, "learning_rate": 2.564303596377965e-06, "loss": 0.9042, "step": 281110 }, { "epoch": 1.7959955534543783, "grad_norm": 1.2347888946533203, "learning_rate": 2.562717565069411e-06, "loss": 1.0895, "step": 281120 }, { "epoch": 1.7960594406041168, "grad_norm": 0.769524335861206, "learning_rate": 2.5611320114953374e-06, "loss": 0.6139, "step": 281130 }, { "epoch": 1.7961233277538557, "grad_norm": 0.8894920945167542, "learning_rate": 2.559546935671697e-06, "loss": 1.0127, "step": 281140 }, { "epoch": 1.7961872149035942, "grad_norm": 0.9349243640899658, "learning_rate": 2.557962337614467e-06, "loss": 0.8493, "step": 281150 }, { "epoch": 1.796251102053333, "grad_norm": 1.5105633735656738, "learning_rate": 2.5563782173396054e-06, "loss": 1.0115, "step": 281160 }, { "epoch": 1.7963149892030716, "grad_norm": 1.3091589212417603, "learning_rate": 2.5547945748630454e-06, "loss": 0.8647, "step": 281170 }, { "epoch": 1.7963788763528103, "grad_norm": 0.8863422274589539, "learning_rate": 2.553211410200762e-06, "loss": 0.8885, "step": 281180 }, { "epoch": 1.796442763502549, "grad_norm": 1.0648189783096313, "learning_rate": 2.5516287233686766e-06, "loss": 0.7835, "step": 281190 }, { "epoch": 1.7965066506522878, "grad_norm": 1.1669793128967285, "learning_rate": 2.550046514382748e-06, "loss": 1.1771, "step": 281200 }, { "epoch": 1.7965705378020265, "grad_norm": 1.1036698818206787, "learning_rate": 2.548464783258897e-06, "loss": 1.0883, "step": 281210 }, { "epoch": 1.7966344249517652, "grad_norm": 1.1956664323806763, "learning_rate": 2.546883530013061e-06, "loss": 0.7737, "step": 281220 }, { "epoch": 1.7966983121015039, "grad_norm": 0.824192225933075, "learning_rate": 2.5453027546611495e-06, "loss": 0.8914, "step": 281230 }, { "epoch": 1.7967621992512426, "grad_norm": 0.9419105052947998, "learning_rate": 2.5437224572191053e-06, "loss": 0.7246, "step": 281240 }, { "epoch": 1.7968260864009813, "grad_norm": 1.3376890420913696, "learning_rate": 2.542142637702821e-06, "loss": 0.8412, "step": 281250 }, { "epoch": 1.79688997355072, "grad_norm": 0.7286721467971802, "learning_rate": 2.540563296128229e-06, "loss": 0.7594, "step": 281260 }, { "epoch": 1.7969538607004587, "grad_norm": 1.2010440826416016, "learning_rate": 2.5389844325112157e-06, "loss": 0.8162, "step": 281270 }, { "epoch": 1.7970177478501974, "grad_norm": 1.0000466108322144, "learning_rate": 2.537406046867685e-06, "loss": 0.72, "step": 281280 }, { "epoch": 1.7970816349999361, "grad_norm": 0.7603448033332825, "learning_rate": 2.5358281392135417e-06, "loss": 0.9938, "step": 281290 }, { "epoch": 1.7971455221496748, "grad_norm": 0.7650514841079712, "learning_rate": 2.5342507095646727e-06, "loss": 0.7677, "step": 281300 }, { "epoch": 1.7972094092994135, "grad_norm": 0.8645716309547424, "learning_rate": 2.53267375793696e-06, "loss": 0.8062, "step": 281310 }, { "epoch": 1.7972732964491522, "grad_norm": 1.3306423425674438, "learning_rate": 2.53109728434629e-06, "loss": 0.7866, "step": 281320 }, { "epoch": 1.797337183598891, "grad_norm": 0.8305380940437317, "learning_rate": 2.5295212888085398e-06, "loss": 0.8128, "step": 281330 }, { "epoch": 1.7974010707486296, "grad_norm": 1.4031856060028076, "learning_rate": 2.5279457713395684e-06, "loss": 0.7964, "step": 281340 }, { "epoch": 1.7974649578983684, "grad_norm": 0.9651740193367004, "learning_rate": 2.526370731955258e-06, "loss": 0.7984, "step": 281350 }, { "epoch": 1.797528845048107, "grad_norm": 1.2332689762115479, "learning_rate": 2.524796170671462e-06, "loss": 0.9097, "step": 281360 }, { "epoch": 1.7975927321978458, "grad_norm": 0.8523402214050293, "learning_rate": 2.5232220875040456e-06, "loss": 0.7414, "step": 281370 }, { "epoch": 1.7976566193475845, "grad_norm": 1.4965907335281372, "learning_rate": 2.5216484824688522e-06, "loss": 0.8115, "step": 281380 }, { "epoch": 1.7977205064973232, "grad_norm": 1.2132138013839722, "learning_rate": 2.5200753555817358e-06, "loss": 0.9746, "step": 281390 }, { "epoch": 1.7977843936470619, "grad_norm": 1.3495724201202393, "learning_rate": 2.518502706858533e-06, "loss": 1.032, "step": 281400 }, { "epoch": 1.7978482807968006, "grad_norm": 1.1350055932998657, "learning_rate": 2.516930536315093e-06, "loss": 0.9833, "step": 281410 }, { "epoch": 1.797912167946539, "grad_norm": 1.4620221853256226, "learning_rate": 2.5153588439672303e-06, "loss": 0.7098, "step": 281420 }, { "epoch": 1.797976055096278, "grad_norm": 0.6531504988670349, "learning_rate": 2.513787629830794e-06, "loss": 0.784, "step": 281430 }, { "epoch": 1.7980399422460165, "grad_norm": 0.8889174461364746, "learning_rate": 2.512216893921587e-06, "loss": 1.0544, "step": 281440 }, { "epoch": 1.7981038293957554, "grad_norm": 2.057274341583252, "learning_rate": 2.510646636255448e-06, "loss": 0.6042, "step": 281450 }, { "epoch": 1.798167716545494, "grad_norm": 0.6107593774795532, "learning_rate": 2.5090768568481804e-06, "loss": 0.8022, "step": 281460 }, { "epoch": 1.7982316036952328, "grad_norm": 0.7373892664909363, "learning_rate": 2.5075075557155935e-06, "loss": 0.6763, "step": 281470 }, { "epoch": 1.7982954908449713, "grad_norm": 1.1133463382720947, "learning_rate": 2.505938732873486e-06, "loss": 0.9057, "step": 281480 }, { "epoch": 1.7983593779947102, "grad_norm": 2.7442684173583984, "learning_rate": 2.504370388337668e-06, "loss": 0.9499, "step": 281490 }, { "epoch": 1.7984232651444487, "grad_norm": 1.0574442148208618, "learning_rate": 2.5028025221239315e-06, "loss": 1.0303, "step": 281500 }, { "epoch": 1.7984871522941877, "grad_norm": 0.6583574414253235, "learning_rate": 2.5012351342480587e-06, "loss": 0.8544, "step": 281510 }, { "epoch": 1.7985510394439261, "grad_norm": 3.4296627044677734, "learning_rate": 2.499668224725854e-06, "loss": 0.7747, "step": 281520 }, { "epoch": 1.798614926593665, "grad_norm": 0.8521742224693298, "learning_rate": 2.498101793573071e-06, "loss": 0.9831, "step": 281530 }, { "epoch": 1.7986788137434035, "grad_norm": 1.4164676666259766, "learning_rate": 2.4965358408055026e-06, "loss": 0.8453, "step": 281540 }, { "epoch": 1.7987427008931425, "grad_norm": 1.0506097078323364, "learning_rate": 2.4949703664389144e-06, "loss": 0.8994, "step": 281550 }, { "epoch": 1.798806588042881, "grad_norm": 1.057515025138855, "learning_rate": 2.4934053704890712e-06, "loss": 0.7166, "step": 281560 }, { "epoch": 1.7988704751926199, "grad_norm": 1.1061023473739624, "learning_rate": 2.491840852971733e-06, "loss": 0.6907, "step": 281570 }, { "epoch": 1.7989343623423584, "grad_norm": 1.2089459896087646, "learning_rate": 2.4902768139026646e-06, "loss": 0.8302, "step": 281580 }, { "epoch": 1.7989982494920973, "grad_norm": 1.5082370042800903, "learning_rate": 2.488713253297603e-06, "loss": 0.8124, "step": 281590 }, { "epoch": 1.7990621366418358, "grad_norm": 0.8607321977615356, "learning_rate": 2.487150171172309e-06, "loss": 1.0578, "step": 281600 }, { "epoch": 1.7991260237915747, "grad_norm": 0.9606289863586426, "learning_rate": 2.4855875675425135e-06, "loss": 1.0664, "step": 281610 }, { "epoch": 1.7991899109413132, "grad_norm": 1.1536765098571777, "learning_rate": 2.4840254424239595e-06, "loss": 0.6769, "step": 281620 }, { "epoch": 1.7992537980910521, "grad_norm": 1.187015175819397, "learning_rate": 2.4824637958323683e-06, "loss": 0.632, "step": 281630 }, { "epoch": 1.7993176852407906, "grad_norm": 0.8003060221672058, "learning_rate": 2.480902627783488e-06, "loss": 0.9771, "step": 281640 }, { "epoch": 1.7993815723905293, "grad_norm": 0.8572350740432739, "learning_rate": 2.4793419382930226e-06, "loss": 0.8384, "step": 281650 }, { "epoch": 1.799445459540268, "grad_norm": 1.1636265516281128, "learning_rate": 2.4777817273766935e-06, "loss": 0.7707, "step": 281660 }, { "epoch": 1.7995093466900067, "grad_norm": 0.8209835886955261, "learning_rate": 2.476221995050215e-06, "loss": 0.7719, "step": 281670 }, { "epoch": 1.7995732338397454, "grad_norm": 1.7101112604141235, "learning_rate": 2.474662741329292e-06, "loss": 0.8495, "step": 281680 }, { "epoch": 1.7996371209894841, "grad_norm": 0.7957033514976501, "learning_rate": 2.4731039662296394e-06, "loss": 0.8171, "step": 281690 }, { "epoch": 1.7997010081392228, "grad_norm": 1.2597066164016724, "learning_rate": 2.4715456697669336e-06, "loss": 0.8401, "step": 281700 }, { "epoch": 1.7997648952889616, "grad_norm": 0.9615569710731506, "learning_rate": 2.4699878519568954e-06, "loss": 0.856, "step": 281710 }, { "epoch": 1.7998287824387003, "grad_norm": 0.6585493683815002, "learning_rate": 2.468430512815184e-06, "loss": 0.9701, "step": 281720 }, { "epoch": 1.799892669588439, "grad_norm": 1.0327953100204468, "learning_rate": 2.46687365235751e-06, "loss": 0.7134, "step": 281730 }, { "epoch": 1.7999565567381777, "grad_norm": 1.1007215976715088, "learning_rate": 2.465317270599532e-06, "loss": 0.819, "step": 281740 }, { "epoch": 1.8000204438879164, "grad_norm": 1.172269582748413, "learning_rate": 2.463761367556944e-06, "loss": 0.8989, "step": 281750 }, { "epoch": 1.800084331037655, "grad_norm": 0.9221561551094055, "learning_rate": 2.462205943245388e-06, "loss": 0.9553, "step": 281760 }, { "epoch": 1.8001482181873938, "grad_norm": 1.5058354139328003, "learning_rate": 2.460650997680558e-06, "loss": 0.7927, "step": 281770 }, { "epoch": 1.8002121053371325, "grad_norm": 0.97669917345047, "learning_rate": 2.4590965308780913e-06, "loss": 0.8084, "step": 281780 }, { "epoch": 1.8002759924868712, "grad_norm": 1.6297342777252197, "learning_rate": 2.457542542853658e-06, "loss": 0.9909, "step": 281790 }, { "epoch": 1.80033987963661, "grad_norm": 0.9887993335723877, "learning_rate": 2.455989033622891e-06, "loss": 1.1471, "step": 281800 }, { "epoch": 1.8004037667863486, "grad_norm": 1.278480887413025, "learning_rate": 2.454436003201449e-06, "loss": 0.74, "step": 281810 }, { "epoch": 1.8004676539360873, "grad_norm": 0.8170709609985352, "learning_rate": 2.452883451604976e-06, "loss": 0.7428, "step": 281820 }, { "epoch": 1.800531541085826, "grad_norm": 0.9986400604248047, "learning_rate": 2.4513313788490923e-06, "loss": 1.0936, "step": 281830 }, { "epoch": 1.8005954282355647, "grad_norm": 2.614264726638794, "learning_rate": 2.449779784949446e-06, "loss": 0.7667, "step": 281840 }, { "epoch": 1.8006593153853034, "grad_norm": 1.312461018562317, "learning_rate": 2.4482286699216483e-06, "loss": 0.9062, "step": 281850 }, { "epoch": 1.8007232025350421, "grad_norm": 1.056638240814209, "learning_rate": 2.44667803378133e-06, "loss": 0.9314, "step": 281860 }, { "epoch": 1.8007870896847809, "grad_norm": 0.556037425994873, "learning_rate": 2.4451278765440954e-06, "loss": 0.9544, "step": 281870 }, { "epoch": 1.8008509768345196, "grad_norm": 0.976234495639801, "learning_rate": 2.443578198225577e-06, "loss": 0.8445, "step": 281880 }, { "epoch": 1.800914863984258, "grad_norm": 0.8293027877807617, "learning_rate": 2.4420289988413557e-06, "loss": 0.9645, "step": 281890 }, { "epoch": 1.800978751133997, "grad_norm": 1.5045146942138672, "learning_rate": 2.4404802784070535e-06, "loss": 0.8362, "step": 281900 }, { "epoch": 1.8010426382837355, "grad_norm": 0.8356955051422119, "learning_rate": 2.4389320369382574e-06, "loss": 0.7479, "step": 281910 }, { "epoch": 1.8011065254334744, "grad_norm": 0.9490901231765747, "learning_rate": 2.437384274450566e-06, "loss": 0.8967, "step": 281920 }, { "epoch": 1.8011704125832129, "grad_norm": 1.8158320188522339, "learning_rate": 2.43583699095955e-06, "loss": 0.7877, "step": 281930 }, { "epoch": 1.8012342997329518, "grad_norm": 1.2870683670043945, "learning_rate": 2.43429018648082e-06, "loss": 0.7343, "step": 281940 }, { "epoch": 1.8012981868826903, "grad_norm": 0.7817414999008179, "learning_rate": 2.4327438610299238e-06, "loss": 0.7824, "step": 281950 }, { "epoch": 1.8013620740324292, "grad_norm": 0.9672678112983704, "learning_rate": 2.43119801462246e-06, "loss": 1.0592, "step": 281960 }, { "epoch": 1.8014259611821677, "grad_norm": 1.268545150756836, "learning_rate": 2.429652647273978e-06, "loss": 0.943, "step": 281970 }, { "epoch": 1.8014898483319066, "grad_norm": 0.6564430594444275, "learning_rate": 2.4281077590000533e-06, "loss": 0.8437, "step": 281980 }, { "epoch": 1.801553735481645, "grad_norm": 0.684155285358429, "learning_rate": 2.42656334981623e-06, "loss": 0.7213, "step": 281990 }, { "epoch": 1.801617622631384, "grad_norm": 1.8212864398956299, "learning_rate": 2.4250194197380837e-06, "loss": 0.7796, "step": 282000 }, { "epoch": 1.8016815097811225, "grad_norm": 1.3423765897750854, "learning_rate": 2.423630292315937e-06, "loss": 1.1215, "step": 282010 }, { "epoch": 1.8017453969308614, "grad_norm": 1.205952763557434, "learning_rate": 2.4220872725813747e-06, "loss": 0.8938, "step": 282020 }, { "epoch": 1.8018092840806, "grad_norm": 1.2857036590576172, "learning_rate": 2.4205447319975593e-06, "loss": 0.977, "step": 282030 }, { "epoch": 1.8018731712303389, "grad_norm": 0.7614555358886719, "learning_rate": 2.4190026705800175e-06, "loss": 0.8993, "step": 282040 }, { "epoch": 1.8019370583800773, "grad_norm": 2.7580156326293945, "learning_rate": 2.417461088344286e-06, "loss": 1.2222, "step": 282050 }, { "epoch": 1.8020009455298163, "grad_norm": 1.0370123386383057, "learning_rate": 2.415919985305881e-06, "loss": 0.7246, "step": 282060 }, { "epoch": 1.8020648326795548, "grad_norm": 0.966238260269165, "learning_rate": 2.4143793614803347e-06, "loss": 1.3524, "step": 282070 }, { "epoch": 1.8021287198292937, "grad_norm": 0.941066324710846, "learning_rate": 2.4128392168831504e-06, "loss": 1.0444, "step": 282080 }, { "epoch": 1.8021926069790322, "grad_norm": 0.6195220947265625, "learning_rate": 2.4112995515298444e-06, "loss": 0.7292, "step": 282090 }, { "epoch": 1.802256494128771, "grad_norm": 0.6646325588226318, "learning_rate": 2.4097603654359203e-06, "loss": 0.7402, "step": 282100 }, { "epoch": 1.8023203812785096, "grad_norm": 0.9299492835998535, "learning_rate": 2.4082216586168883e-06, "loss": 0.7345, "step": 282110 }, { "epoch": 1.8023842684282485, "grad_norm": 0.9561014175415039, "learning_rate": 2.4066834310882247e-06, "loss": 0.8976, "step": 282120 }, { "epoch": 1.802448155577987, "grad_norm": 1.0798419713974, "learning_rate": 2.4051456828654394e-06, "loss": 0.8831, "step": 282130 }, { "epoch": 1.8025120427277257, "grad_norm": 0.5972111821174622, "learning_rate": 2.4036084139640146e-06, "loss": 0.7122, "step": 282140 }, { "epoch": 1.8025759298774644, "grad_norm": 1.6115037202835083, "learning_rate": 2.4020716243994267e-06, "loss": 0.8927, "step": 282150 }, { "epoch": 1.802639817027203, "grad_norm": 1.084858775138855, "learning_rate": 2.400535314187158e-06, "loss": 0.7647, "step": 282160 }, { "epoch": 1.8027037041769418, "grad_norm": 1.1421221494674683, "learning_rate": 2.3989994833426788e-06, "loss": 0.8956, "step": 282170 }, { "epoch": 1.8027675913266805, "grad_norm": 0.9091895818710327, "learning_rate": 2.397464131881455e-06, "loss": 1.0873, "step": 282180 }, { "epoch": 1.8028314784764192, "grad_norm": 1.038530707359314, "learning_rate": 2.3959292598189463e-06, "loss": 1.2032, "step": 282190 }, { "epoch": 1.802895365626158, "grad_norm": 0.9789948463439941, "learning_rate": 2.3943948671706183e-06, "loss": 0.6278, "step": 282200 }, { "epoch": 1.8029592527758966, "grad_norm": 1.1401680707931519, "learning_rate": 2.392860953951909e-06, "loss": 0.9235, "step": 282210 }, { "epoch": 1.8030231399256353, "grad_norm": 0.698215901851654, "learning_rate": 2.391327520178288e-06, "loss": 1.067, "step": 282220 }, { "epoch": 1.803087027075374, "grad_norm": 1.2067962884902954, "learning_rate": 2.3897945658651777e-06, "loss": 0.987, "step": 282230 }, { "epoch": 1.8031509142251128, "grad_norm": 1.8061639070510864, "learning_rate": 2.3882620910280316e-06, "loss": 1.1182, "step": 282240 }, { "epoch": 1.8032148013748515, "grad_norm": 0.9816470146179199, "learning_rate": 2.3867300956822714e-06, "loss": 1.0446, "step": 282250 }, { "epoch": 1.8032786885245902, "grad_norm": 0.7584142088890076, "learning_rate": 2.385198579843334e-06, "loss": 0.9838, "step": 282260 }, { "epoch": 1.8033425756743289, "grad_norm": 1.0630838871002197, "learning_rate": 2.383667543526641e-06, "loss": 0.9216, "step": 282270 }, { "epoch": 1.8034064628240676, "grad_norm": 0.9282609224319458, "learning_rate": 2.3821369867476016e-06, "loss": 1.1867, "step": 282280 }, { "epoch": 1.8034703499738063, "grad_norm": 0.8381339311599731, "learning_rate": 2.3806069095216487e-06, "loss": 0.7, "step": 282290 }, { "epoch": 1.803534237123545, "grad_norm": 0.9528864026069641, "learning_rate": 2.3790773118641695e-06, "loss": 0.9546, "step": 282300 }, { "epoch": 1.8035981242732837, "grad_norm": 1.0386065244674683, "learning_rate": 2.377548193790591e-06, "loss": 1.104, "step": 282310 }, { "epoch": 1.8036620114230224, "grad_norm": 0.791536271572113, "learning_rate": 2.376019555316289e-06, "loss": 0.7687, "step": 282320 }, { "epoch": 1.803725898572761, "grad_norm": 1.0364983081817627, "learning_rate": 2.3744913964566795e-06, "loss": 1.0096, "step": 282330 }, { "epoch": 1.8037897857224998, "grad_norm": 1.0348879098892212, "learning_rate": 2.372963717227139e-06, "loss": 0.8892, "step": 282340 }, { "epoch": 1.8038536728722385, "grad_norm": 0.7205535769462585, "learning_rate": 2.3714365176430662e-06, "loss": 0.8306, "step": 282350 }, { "epoch": 1.8039175600219772, "grad_norm": 1.5916186571121216, "learning_rate": 2.3699097977198215e-06, "loss": 0.7633, "step": 282360 }, { "epoch": 1.803981447171716, "grad_norm": 0.7715607285499573, "learning_rate": 2.3683835574727973e-06, "loss": 0.6269, "step": 282370 }, { "epoch": 1.8040453343214544, "grad_norm": 0.8104017972946167, "learning_rate": 2.3668577969173544e-06, "loss": 1.0736, "step": 282380 }, { "epoch": 1.8041092214711933, "grad_norm": 1.4469940662384033, "learning_rate": 2.365332516068863e-06, "loss": 0.8361, "step": 282390 }, { "epoch": 1.8041731086209318, "grad_norm": 1.8273128271102905, "learning_rate": 2.363807714942684e-06, "loss": 0.7805, "step": 282400 }, { "epoch": 1.8042369957706708, "grad_norm": 0.9197923541069031, "learning_rate": 2.362283393554171e-06, "loss": 0.745, "step": 282410 }, { "epoch": 1.8043008829204092, "grad_norm": 0.9238811731338501, "learning_rate": 2.360759551918673e-06, "loss": 0.6943, "step": 282420 }, { "epoch": 1.8043647700701482, "grad_norm": 0.8211013674736023, "learning_rate": 2.3592361900515504e-06, "loss": 0.7315, "step": 282430 }, { "epoch": 1.8044286572198867, "grad_norm": 1.5779584646224976, "learning_rate": 2.3577133079681235e-06, "loss": 0.8395, "step": 282440 }, { "epoch": 1.8044925443696256, "grad_norm": 0.649736762046814, "learning_rate": 2.356190905683742e-06, "loss": 0.7725, "step": 282450 }, { "epoch": 1.804556431519364, "grad_norm": 1.5219225883483887, "learning_rate": 2.3546689832137423e-06, "loss": 1.0372, "step": 282460 }, { "epoch": 1.804620318669103, "grad_norm": 0.9140293598175049, "learning_rate": 2.3531475405734414e-06, "loss": 0.6017, "step": 282470 }, { "epoch": 1.8046842058188415, "grad_norm": 1.8277541399002075, "learning_rate": 2.3516265777781656e-06, "loss": 0.8381, "step": 282480 }, { "epoch": 1.8047480929685804, "grad_norm": 0.8005611896514893, "learning_rate": 2.35010609484323e-06, "loss": 0.8137, "step": 282490 }, { "epoch": 1.804811980118319, "grad_norm": 1.1830252408981323, "learning_rate": 2.348586091783955e-06, "loss": 0.9156, "step": 282500 }, { "epoch": 1.8048758672680578, "grad_norm": 0.9271206855773926, "learning_rate": 2.3470665686156356e-06, "loss": 1.1015, "step": 282510 }, { "epoch": 1.8049397544177963, "grad_norm": 1.3627598285675049, "learning_rate": 2.3455475253535864e-06, "loss": 1.0528, "step": 282520 }, { "epoch": 1.8050036415675352, "grad_norm": 0.9792562127113342, "learning_rate": 2.344028962013095e-06, "loss": 0.9871, "step": 282530 }, { "epoch": 1.8050675287172737, "grad_norm": 0.9024627804756165, "learning_rate": 2.3425108786094663e-06, "loss": 0.9655, "step": 282540 }, { "epoch": 1.8051314158670126, "grad_norm": 0.9467481374740601, "learning_rate": 2.3409932751579767e-06, "loss": 0.8784, "step": 282550 }, { "epoch": 1.8051953030167511, "grad_norm": 0.7127422094345093, "learning_rate": 2.339476151673925e-06, "loss": 0.7942, "step": 282560 }, { "epoch": 1.80525919016649, "grad_norm": 0.7716130614280701, "learning_rate": 2.3379595081725715e-06, "loss": 0.8521, "step": 282570 }, { "epoch": 1.8053230773162285, "grad_norm": 0.6751583218574524, "learning_rate": 2.3364433446692038e-06, "loss": 0.8518, "step": 282580 }, { "epoch": 1.8053869644659675, "grad_norm": 1.3516067266464233, "learning_rate": 2.3349276611790816e-06, "loss": 0.7958, "step": 282590 }, { "epoch": 1.805450851615706, "grad_norm": 0.9990543127059937, "learning_rate": 2.333412457717482e-06, "loss": 0.9145, "step": 282600 }, { "epoch": 1.8055147387654449, "grad_norm": 0.9455886483192444, "learning_rate": 2.3318977342996486e-06, "loss": 0.7915, "step": 282610 }, { "epoch": 1.8055786259151834, "grad_norm": 0.8298186659812927, "learning_rate": 2.330383490940852e-06, "loss": 0.7342, "step": 282620 }, { "epoch": 1.805642513064922, "grad_norm": 1.161590337753296, "learning_rate": 2.328869727656324e-06, "loss": 0.7807, "step": 282630 }, { "epoch": 1.8057064002146608, "grad_norm": 0.8286991119384766, "learning_rate": 2.3273564444613262e-06, "loss": 0.8986, "step": 282640 }, { "epoch": 1.8057702873643995, "grad_norm": 0.6541506052017212, "learning_rate": 2.325843641371084e-06, "loss": 0.9294, "step": 282650 }, { "epoch": 1.8058341745141382, "grad_norm": 0.7337067723274231, "learning_rate": 2.3243313184008463e-06, "loss": 0.8336, "step": 282660 }, { "epoch": 1.805898061663877, "grad_norm": 1.4487308263778687, "learning_rate": 2.3228194755658405e-06, "loss": 0.7292, "step": 282670 }, { "epoch": 1.8059619488136156, "grad_norm": 1.053105354309082, "learning_rate": 2.3213081128812818e-06, "loss": 0.7138, "step": 282680 }, { "epoch": 1.8060258359633543, "grad_norm": 0.898478090763092, "learning_rate": 2.3197972303624027e-06, "loss": 0.8208, "step": 282690 }, { "epoch": 1.806089723113093, "grad_norm": 1.4228217601776123, "learning_rate": 2.318286828024413e-06, "loss": 0.7939, "step": 282700 }, { "epoch": 1.8061536102628317, "grad_norm": 1.2164430618286133, "learning_rate": 2.3167769058825283e-06, "loss": 0.988, "step": 282710 }, { "epoch": 1.8062174974125704, "grad_norm": 1.0811247825622559, "learning_rate": 2.3152674639519476e-06, "loss": 0.855, "step": 282720 }, { "epoch": 1.8062813845623091, "grad_norm": 0.7520591616630554, "learning_rate": 2.313758502247887e-06, "loss": 1.1404, "step": 282730 }, { "epoch": 1.8063452717120478, "grad_norm": 0.7815333604812622, "learning_rate": 2.3122500207855225e-06, "loss": 0.7324, "step": 282740 }, { "epoch": 1.8064091588617865, "grad_norm": 0.845707356929779, "learning_rate": 2.310742019580059e-06, "loss": 0.8677, "step": 282750 }, { "epoch": 1.8064730460115253, "grad_norm": 1.769837498664856, "learning_rate": 2.309234498646684e-06, "loss": 1.0049, "step": 282760 }, { "epoch": 1.806536933161264, "grad_norm": 1.8039084672927856, "learning_rate": 2.307727458000575e-06, "loss": 0.9322, "step": 282770 }, { "epoch": 1.8066008203110027, "grad_norm": 0.7408353090286255, "learning_rate": 2.306220897656908e-06, "loss": 1.0801, "step": 282780 }, { "epoch": 1.8066647074607414, "grad_norm": 1.0164926052093506, "learning_rate": 2.304714817630854e-06, "loss": 0.9326, "step": 282790 }, { "epoch": 1.80672859461048, "grad_norm": 0.8177975416183472, "learning_rate": 2.3032092179375906e-06, "loss": 0.851, "step": 282800 }, { "epoch": 1.8067924817602188, "grad_norm": 1.1540977954864502, "learning_rate": 2.3017040985922668e-06, "loss": 0.8106, "step": 282810 }, { "epoch": 1.8068563689099575, "grad_norm": 0.9932825565338135, "learning_rate": 2.300199459610053e-06, "loss": 0.8606, "step": 282820 }, { "epoch": 1.8069202560596962, "grad_norm": 1.113124966621399, "learning_rate": 2.298695301006093e-06, "loss": 0.9581, "step": 282830 }, { "epoch": 1.806984143209435, "grad_norm": 1.4343130588531494, "learning_rate": 2.297191622795547e-06, "loss": 0.9953, "step": 282840 }, { "epoch": 1.8070480303591736, "grad_norm": 1.3366652727127075, "learning_rate": 2.295688424993536e-06, "loss": 0.861, "step": 282850 }, { "epoch": 1.8071119175089123, "grad_norm": 0.9450911283493042, "learning_rate": 2.294185707615226e-06, "loss": 0.9268, "step": 282860 }, { "epoch": 1.8071758046586508, "grad_norm": 0.7475114464759827, "learning_rate": 2.292683470675733e-06, "loss": 0.5739, "step": 282870 }, { "epoch": 1.8072396918083897, "grad_norm": 1.023122787475586, "learning_rate": 2.2911817141901883e-06, "loss": 0.7994, "step": 282880 }, { "epoch": 1.8073035789581282, "grad_norm": 1.0646544694900513, "learning_rate": 2.289680438173719e-06, "loss": 1.0825, "step": 282890 }, { "epoch": 1.8073674661078671, "grad_norm": 1.0106379985809326, "learning_rate": 2.288179642641447e-06, "loss": 0.936, "step": 282900 }, { "epoch": 1.8074313532576056, "grad_norm": 0.8868458271026611, "learning_rate": 2.286679327608471e-06, "loss": 0.7621, "step": 282910 }, { "epoch": 1.8074952404073445, "grad_norm": 0.7061320543289185, "learning_rate": 2.285179493089923e-06, "loss": 0.9102, "step": 282920 }, { "epoch": 1.807559127557083, "grad_norm": 1.1011165380477905, "learning_rate": 2.2836801391008913e-06, "loss": 1.0421, "step": 282930 }, { "epoch": 1.807623014706822, "grad_norm": 0.8100254535675049, "learning_rate": 2.2821812656564857e-06, "loss": 0.7593, "step": 282940 }, { "epoch": 1.8076869018565604, "grad_norm": 0.8423644304275513, "learning_rate": 2.2806828727717946e-06, "loss": 0.7817, "step": 282950 }, { "epoch": 1.8077507890062994, "grad_norm": 1.0324517488479614, "learning_rate": 2.279184960461911e-06, "loss": 1.0454, "step": 282960 }, { "epoch": 1.8078146761560379, "grad_norm": 1.0457466840744019, "learning_rate": 2.277687528741912e-06, "loss": 0.7965, "step": 282970 }, { "epoch": 1.8078785633057768, "grad_norm": 0.8995627164840698, "learning_rate": 2.276190577626891e-06, "loss": 0.7965, "step": 282980 }, { "epoch": 1.8079424504555153, "grad_norm": 0.7673644423484802, "learning_rate": 2.2746941071319194e-06, "loss": 0.7311, "step": 282990 }, { "epoch": 1.8080063376052542, "grad_norm": 1.0009177923202515, "learning_rate": 2.2731981172720627e-06, "loss": 1.0254, "step": 283000 }, { "epoch": 1.8080702247549927, "grad_norm": 1.051266074180603, "learning_rate": 2.271702608062393e-06, "loss": 0.7851, "step": 283010 }, { "epoch": 1.8081341119047316, "grad_norm": 1.0838903188705444, "learning_rate": 2.270207579517969e-06, "loss": 0.9183, "step": 283020 }, { "epoch": 1.80819799905447, "grad_norm": 0.7509300708770752, "learning_rate": 2.2687130316538463e-06, "loss": 0.7546, "step": 283030 }, { "epoch": 1.808261886204209, "grad_norm": 0.8449200987815857, "learning_rate": 2.267218964485074e-06, "loss": 0.8132, "step": 283040 }, { "epoch": 1.8083257733539475, "grad_norm": 0.5527771711349487, "learning_rate": 2.2657253780267062e-06, "loss": 0.9708, "step": 283050 }, { "epoch": 1.8083896605036864, "grad_norm": 0.9516294002532959, "learning_rate": 2.264232272293776e-06, "loss": 0.9998, "step": 283060 }, { "epoch": 1.808453547653425, "grad_norm": 0.8535572290420532, "learning_rate": 2.262739647301332e-06, "loss": 0.8959, "step": 283070 }, { "epoch": 1.8085174348031638, "grad_norm": 1.2427952289581299, "learning_rate": 2.2612475030643843e-06, "loss": 0.8699, "step": 283080 }, { "epoch": 1.8085813219529023, "grad_norm": 1.534712791442871, "learning_rate": 2.259755839597988e-06, "loss": 1.1143, "step": 283090 }, { "epoch": 1.8086452091026413, "grad_norm": 0.5216849446296692, "learning_rate": 2.2582646569171416e-06, "loss": 0.8651, "step": 283100 }, { "epoch": 1.8087090962523797, "grad_norm": 1.19882071018219, "learning_rate": 2.2567739550368783e-06, "loss": 0.92, "step": 283110 }, { "epoch": 1.8087729834021185, "grad_norm": 0.9051129817962646, "learning_rate": 2.255283733972202e-06, "loss": 0.8427, "step": 283120 }, { "epoch": 1.8088368705518572, "grad_norm": 1.0221871137619019, "learning_rate": 2.253793993738129e-06, "loss": 1.0342, "step": 283130 }, { "epoch": 1.8089007577015959, "grad_norm": 0.7139307260513306, "learning_rate": 2.252304734349647e-06, "loss": 0.7471, "step": 283140 }, { "epoch": 1.8089646448513346, "grad_norm": 1.5615328550338745, "learning_rate": 2.250815955821778e-06, "loss": 0.5934, "step": 283150 }, { "epoch": 1.8090285320010733, "grad_norm": 0.7617145776748657, "learning_rate": 2.2493276581694866e-06, "loss": 0.8384, "step": 283160 }, { "epoch": 1.809092419150812, "grad_norm": 3.028179407119751, "learning_rate": 2.2478398414077895e-06, "loss": 0.9085, "step": 283170 }, { "epoch": 1.8091563063005507, "grad_norm": 0.448412150144577, "learning_rate": 2.2463525055516465e-06, "loss": 0.8219, "step": 283180 }, { "epoch": 1.8092201934502894, "grad_norm": 1.0630991458892822, "learning_rate": 2.244865650616046e-06, "loss": 0.8723, "step": 283190 }, { "epoch": 1.809284080600028, "grad_norm": 1.0292770862579346, "learning_rate": 2.24337927661597e-06, "loss": 0.9843, "step": 283200 }, { "epoch": 1.8093479677497668, "grad_norm": 0.7813323736190796, "learning_rate": 2.241893383566368e-06, "loss": 0.9085, "step": 283210 }, { "epoch": 1.8094118548995055, "grad_norm": 0.8884089589118958, "learning_rate": 2.240407971482228e-06, "loss": 0.9564, "step": 283220 }, { "epoch": 1.8094757420492442, "grad_norm": 1.4575201272964478, "learning_rate": 2.2389230403784933e-06, "loss": 0.8366, "step": 283230 }, { "epoch": 1.809539629198983, "grad_norm": 0.9241499900817871, "learning_rate": 2.2374385902701357e-06, "loss": 0.8214, "step": 283240 }, { "epoch": 1.8096035163487216, "grad_norm": 0.8342140316963196, "learning_rate": 2.2361029964359847e-06, "loss": 0.9822, "step": 283250 }, { "epoch": 1.8096674034984603, "grad_norm": 0.6626742482185364, "learning_rate": 2.2346194602599955e-06, "loss": 0.7827, "step": 283260 }, { "epoch": 1.809731290648199, "grad_norm": 1.3511712551116943, "learning_rate": 2.2331364051226987e-06, "loss": 0.8839, "step": 283270 }, { "epoch": 1.8097951777979377, "grad_norm": 1.3152260780334473, "learning_rate": 2.2316538310390547e-06, "loss": 0.9771, "step": 283280 }, { "epoch": 1.8098590649476765, "grad_norm": 0.9161074161529541, "learning_rate": 2.2301717380239727e-06, "loss": 0.6344, "step": 283290 }, { "epoch": 1.8099229520974152, "grad_norm": 0.6363855004310608, "learning_rate": 2.2286901260923866e-06, "loss": 0.8171, "step": 283300 }, { "epoch": 1.8099868392471539, "grad_norm": 0.9332031607627869, "learning_rate": 2.227208995259228e-06, "loss": 1.1902, "step": 283310 }, { "epoch": 1.8100507263968926, "grad_norm": 0.9157412648200989, "learning_rate": 2.2257283455393964e-06, "loss": 0.5489, "step": 283320 }, { "epoch": 1.8101146135466313, "grad_norm": 1.413439154624939, "learning_rate": 2.2242481769478185e-06, "loss": 1.0107, "step": 283330 }, { "epoch": 1.81017850069637, "grad_norm": 1.1347328424453735, "learning_rate": 2.2227684894993882e-06, "loss": 0.9759, "step": 283340 }, { "epoch": 1.8102423878461087, "grad_norm": 0.6124105453491211, "learning_rate": 2.221289283209016e-06, "loss": 0.7626, "step": 283350 }, { "epoch": 1.8103062749958472, "grad_norm": 1.255640983581543, "learning_rate": 2.2198105580915895e-06, "loss": 0.7451, "step": 283360 }, { "epoch": 1.810370162145586, "grad_norm": 1.1673067808151245, "learning_rate": 2.218332314162014e-06, "loss": 0.8239, "step": 283370 }, { "epoch": 1.8104340492953246, "grad_norm": 0.4938371181488037, "learning_rate": 2.2168545514351656e-06, "loss": 0.6179, "step": 283380 }, { "epoch": 1.8104979364450635, "grad_norm": 1.0467393398284912, "learning_rate": 2.215377269925939e-06, "loss": 0.8178, "step": 283390 }, { "epoch": 1.810561823594802, "grad_norm": 1.0822023153305054, "learning_rate": 2.2139004696491885e-06, "loss": 0.7513, "step": 283400 }, { "epoch": 1.810625710744541, "grad_norm": 0.784705400466919, "learning_rate": 2.2124241506198074e-06, "loss": 0.8451, "step": 283410 }, { "epoch": 1.8106895978942794, "grad_norm": 1.294501781463623, "learning_rate": 2.2109483128526566e-06, "loss": 0.8586, "step": 283420 }, { "epoch": 1.8107534850440183, "grad_norm": 0.9332607388496399, "learning_rate": 2.209472956362596e-06, "loss": 0.8953, "step": 283430 }, { "epoch": 1.8108173721937568, "grad_norm": 1.299521803855896, "learning_rate": 2.2079980811644917e-06, "loss": 0.9633, "step": 283440 }, { "epoch": 1.8108812593434958, "grad_norm": 0.6523328423500061, "learning_rate": 2.2065236872731876e-06, "loss": 0.8005, "step": 283450 }, { "epoch": 1.8109451464932342, "grad_norm": 1.0625931024551392, "learning_rate": 2.205049774703544e-06, "loss": 0.7675, "step": 283460 }, { "epoch": 1.8110090336429732, "grad_norm": 0.5946071147918701, "learning_rate": 2.2035763434703928e-06, "loss": 0.7786, "step": 283470 }, { "epoch": 1.8110729207927116, "grad_norm": 1.482882022857666, "learning_rate": 2.2021033935885837e-06, "loss": 0.7202, "step": 283480 }, { "epoch": 1.8111368079424506, "grad_norm": 1.312387228012085, "learning_rate": 2.2006309250729384e-06, "loss": 1.0036, "step": 283490 }, { "epoch": 1.811200695092189, "grad_norm": 1.288197636604309, "learning_rate": 2.199158937938295e-06, "loss": 1.034, "step": 283500 }, { "epoch": 1.811264582241928, "grad_norm": 0.9824373126029968, "learning_rate": 2.1976874321994744e-06, "loss": 0.9011, "step": 283510 }, { "epoch": 1.8113284693916665, "grad_norm": 0.9257283210754395, "learning_rate": 2.196216407871299e-06, "loss": 0.8062, "step": 283520 }, { "epoch": 1.8113923565414054, "grad_norm": 0.9991766214370728, "learning_rate": 2.1947458649685727e-06, "loss": 0.7861, "step": 283530 }, { "epoch": 1.8114562436911439, "grad_norm": 0.8082060813903809, "learning_rate": 2.1932758035061228e-06, "loss": 0.8023, "step": 283540 }, { "epoch": 1.8115201308408828, "grad_norm": 0.8874626159667969, "learning_rate": 2.1918062234987323e-06, "loss": 0.9567, "step": 283550 }, { "epoch": 1.8115840179906213, "grad_norm": 1.4034126996994019, "learning_rate": 2.1903371249612227e-06, "loss": 0.7498, "step": 283560 }, { "epoch": 1.8116479051403602, "grad_norm": 1.0518414974212646, "learning_rate": 2.188868507908376e-06, "loss": 0.8519, "step": 283570 }, { "epoch": 1.8117117922900987, "grad_norm": 1.1383647918701172, "learning_rate": 2.187400372354986e-06, "loss": 1.0501, "step": 283580 }, { "epoch": 1.8117756794398374, "grad_norm": 1.1427676677703857, "learning_rate": 2.185932718315836e-06, "loss": 0.7585, "step": 283590 }, { "epoch": 1.8118395665895761, "grad_norm": 1.0937600135803223, "learning_rate": 2.184465545805714e-06, "loss": 0.8869, "step": 283600 }, { "epoch": 1.8119034537393148, "grad_norm": 1.1126290559768677, "learning_rate": 2.1829988548393855e-06, "loss": 0.9393, "step": 283610 }, { "epoch": 1.8119673408890535, "grad_norm": 0.9303832054138184, "learning_rate": 2.181532645431633e-06, "loss": 0.8961, "step": 283620 }, { "epoch": 1.8120312280387922, "grad_norm": 0.836120069026947, "learning_rate": 2.1800669175972068e-06, "loss": 0.9489, "step": 283630 }, { "epoch": 1.812095115188531, "grad_norm": 2.5739543437957764, "learning_rate": 2.178601671350877e-06, "loss": 0.8026, "step": 283640 }, { "epoch": 1.8121590023382697, "grad_norm": 1.465082049369812, "learning_rate": 2.1771369067074056e-06, "loss": 0.8689, "step": 283650 }, { "epoch": 1.8122228894880084, "grad_norm": 1.140472173690796, "learning_rate": 2.175672623681535e-06, "loss": 0.8422, "step": 283660 }, { "epoch": 1.812286776637747, "grad_norm": 1.1891902685165405, "learning_rate": 2.1742088222880207e-06, "loss": 0.8625, "step": 283670 }, { "epoch": 1.8123506637874858, "grad_norm": 0.7614114284515381, "learning_rate": 2.17274550254159e-06, "loss": 0.8563, "step": 283680 }, { "epoch": 1.8124145509372245, "grad_norm": 1.4389500617980957, "learning_rate": 2.1712826644569972e-06, "loss": 0.7662, "step": 283690 }, { "epoch": 1.8124784380869632, "grad_norm": 1.6076115369796753, "learning_rate": 2.169820308048959e-06, "loss": 0.9157, "step": 283700 }, { "epoch": 1.812542325236702, "grad_norm": 1.6042400598526, "learning_rate": 2.168358433332213e-06, "loss": 0.8883, "step": 283710 }, { "epoch": 1.8126062123864406, "grad_norm": 1.0674022436141968, "learning_rate": 2.166897040321475e-06, "loss": 0.8062, "step": 283720 }, { "epoch": 1.8126700995361793, "grad_norm": 0.7992262840270996, "learning_rate": 2.1654361290314674e-06, "loss": 0.9028, "step": 283730 }, { "epoch": 1.812733986685918, "grad_norm": 1.30583918094635, "learning_rate": 2.1639756994768944e-06, "loss": 0.8859, "step": 283740 }, { "epoch": 1.8127978738356567, "grad_norm": 0.9546254873275757, "learning_rate": 2.1625157516724837e-06, "loss": 0.8229, "step": 283750 }, { "epoch": 1.8128617609853954, "grad_norm": 1.1246157884597778, "learning_rate": 2.161056285632912e-06, "loss": 0.7242, "step": 283760 }, { "epoch": 1.8129256481351341, "grad_norm": 0.8130073547363281, "learning_rate": 2.1595973013728956e-06, "loss": 0.8151, "step": 283770 }, { "epoch": 1.8129895352848728, "grad_norm": 0.9397286772727966, "learning_rate": 2.1581387989071224e-06, "loss": 0.8207, "step": 283780 }, { "epoch": 1.8130534224346115, "grad_norm": 1.3959110975265503, "learning_rate": 2.156680778250281e-06, "loss": 1.1043, "step": 283790 }, { "epoch": 1.8131173095843502, "grad_norm": 1.0556739568710327, "learning_rate": 2.155223239417048e-06, "loss": 0.8429, "step": 283800 }, { "epoch": 1.813181196734089, "grad_norm": 1.2097365856170654, "learning_rate": 2.153766182422118e-06, "loss": 0.7923, "step": 283810 }, { "epoch": 1.8132450838838277, "grad_norm": 2.9758658409118652, "learning_rate": 2.1523096072801506e-06, "loss": 0.9167, "step": 283820 }, { "epoch": 1.8133089710335664, "grad_norm": 0.846839964389801, "learning_rate": 2.1508535140058184e-06, "loss": 1.0546, "step": 283830 }, { "epoch": 1.813372858183305, "grad_norm": 0.9610458016395569, "learning_rate": 2.1493979026137867e-06, "loss": 0.7903, "step": 283840 }, { "epoch": 1.8134367453330436, "grad_norm": 0.992201030254364, "learning_rate": 2.147942773118716e-06, "loss": 1.0594, "step": 283850 }, { "epoch": 1.8135006324827825, "grad_norm": 0.9246998429298401, "learning_rate": 2.146488125535262e-06, "loss": 0.9725, "step": 283860 }, { "epoch": 1.813564519632521, "grad_norm": 0.8264791369438171, "learning_rate": 2.1450339598780677e-06, "loss": 0.8062, "step": 283870 }, { "epoch": 1.81362840678226, "grad_norm": 1.0013701915740967, "learning_rate": 2.143580276161794e-06, "loss": 0.8676, "step": 283880 }, { "epoch": 1.8136922939319984, "grad_norm": 0.7805340886116028, "learning_rate": 2.142127074401057e-06, "loss": 0.7871, "step": 283890 }, { "epoch": 1.8137561810817373, "grad_norm": 0.8282870650291443, "learning_rate": 2.140674354610506e-06, "loss": 0.6807, "step": 283900 }, { "epoch": 1.8138200682314758, "grad_norm": 1.1535768508911133, "learning_rate": 2.1392221168047677e-06, "loss": 0.7917, "step": 283910 }, { "epoch": 1.8138839553812147, "grad_norm": 1.1144474744796753, "learning_rate": 2.13777036099847e-06, "loss": 0.8424, "step": 283920 }, { "epoch": 1.8139478425309532, "grad_norm": 1.0237375497817993, "learning_rate": 2.1363190872062234e-06, "loss": 0.7886, "step": 283930 }, { "epoch": 1.8140117296806921, "grad_norm": 1.0485482215881348, "learning_rate": 2.1348682954426602e-06, "loss": 0.8336, "step": 283940 }, { "epoch": 1.8140756168304306, "grad_norm": 0.586771547794342, "learning_rate": 2.1334179857223803e-06, "loss": 1.0313, "step": 283950 }, { "epoch": 1.8141395039801695, "grad_norm": 0.8109305500984192, "learning_rate": 2.131968158059988e-06, "loss": 1.0908, "step": 283960 }, { "epoch": 1.814203391129908, "grad_norm": 1.0652990341186523, "learning_rate": 2.1305188124700946e-06, "loss": 0.8719, "step": 283970 }, { "epoch": 1.814267278279647, "grad_norm": 1.1867952346801758, "learning_rate": 2.1290699489672827e-06, "loss": 0.8067, "step": 283980 }, { "epoch": 1.8143311654293854, "grad_norm": 0.8866208791732788, "learning_rate": 2.127621567566157e-06, "loss": 1.0105, "step": 283990 }, { "epoch": 1.8143950525791244, "grad_norm": 1.064661979675293, "learning_rate": 2.126173668281295e-06, "loss": 0.8171, "step": 284000 }, { "epoch": 1.8144589397288629, "grad_norm": 0.8402761816978455, "learning_rate": 2.124726251127279e-06, "loss": 0.7702, "step": 284010 }, { "epoch": 1.8145228268786018, "grad_norm": 1.0374879837036133, "learning_rate": 2.123279316118687e-06, "loss": 0.8017, "step": 284020 }, { "epoch": 1.8145867140283403, "grad_norm": 0.9119057655334473, "learning_rate": 2.121832863270101e-06, "loss": 0.9122, "step": 284030 }, { "epoch": 1.8146506011780792, "grad_norm": 1.0535051822662354, "learning_rate": 2.1203868925960655e-06, "loss": 0.7125, "step": 284040 }, { "epoch": 1.8147144883278177, "grad_norm": 0.5721514821052551, "learning_rate": 2.118941404111169e-06, "loss": 0.7486, "step": 284050 }, { "epoch": 1.8147783754775566, "grad_norm": 1.2209059000015259, "learning_rate": 2.1174963978299432e-06, "loss": 0.7943, "step": 284060 }, { "epoch": 1.814842262627295, "grad_norm": 0.9881746172904968, "learning_rate": 2.116051873766961e-06, "loss": 0.832, "step": 284070 }, { "epoch": 1.8149061497770338, "grad_norm": 0.7639790773391724, "learning_rate": 2.1146078319367547e-06, "loss": 0.8015, "step": 284080 }, { "epoch": 1.8149700369267725, "grad_norm": 0.627166211605072, "learning_rate": 2.113164272353885e-06, "loss": 0.6067, "step": 284090 }, { "epoch": 1.8150339240765112, "grad_norm": 0.7728124856948853, "learning_rate": 2.1117211950328674e-06, "loss": 0.8219, "step": 284100 }, { "epoch": 1.81509781122625, "grad_norm": 0.9364749193191528, "learning_rate": 2.110278599988258e-06, "loss": 1.1201, "step": 284110 }, { "epoch": 1.8151616983759886, "grad_norm": 1.0755594968795776, "learning_rate": 2.1088364872345667e-06, "loss": 1.0939, "step": 284120 }, { "epoch": 1.8152255855257273, "grad_norm": 2.0930886268615723, "learning_rate": 2.107394856786327e-06, "loss": 0.9332, "step": 284130 }, { "epoch": 1.815289472675466, "grad_norm": 1.3515701293945312, "learning_rate": 2.1059537086580485e-06, "loss": 1.0684, "step": 284140 }, { "epoch": 1.8153533598252047, "grad_norm": 1.5052813291549683, "learning_rate": 2.104513042864248e-06, "loss": 0.8437, "step": 284150 }, { "epoch": 1.8154172469749434, "grad_norm": 1.600309133529663, "learning_rate": 2.103072859419447e-06, "loss": 0.8595, "step": 284160 }, { "epoch": 1.8154811341246821, "grad_norm": 1.3267834186553955, "learning_rate": 2.1016331583381344e-06, "loss": 0.8628, "step": 284170 }, { "epoch": 1.8155450212744209, "grad_norm": 0.9317293763160706, "learning_rate": 2.1001939396348147e-06, "loss": 0.7491, "step": 284180 }, { "epoch": 1.8156089084241596, "grad_norm": 2.272695541381836, "learning_rate": 2.0987552033239765e-06, "loss": 0.8343, "step": 284190 }, { "epoch": 1.8156727955738983, "grad_norm": 0.9980998039245605, "learning_rate": 2.097316949420125e-06, "loss": 0.9049, "step": 284200 }, { "epoch": 1.815736682723637, "grad_norm": 0.7396201491355896, "learning_rate": 2.095879177937721e-06, "loss": 0.8789, "step": 284210 }, { "epoch": 1.8158005698733757, "grad_norm": 0.8641042113304138, "learning_rate": 2.094441888891263e-06, "loss": 1.0926, "step": 284220 }, { "epoch": 1.8158644570231144, "grad_norm": 1.1559175252914429, "learning_rate": 2.0930050822952186e-06, "loss": 1.0042, "step": 284230 }, { "epoch": 1.815928344172853, "grad_norm": 0.7350589036941528, "learning_rate": 2.091568758164059e-06, "loss": 0.8153, "step": 284240 }, { "epoch": 1.8159922313225918, "grad_norm": 0.7597360610961914, "learning_rate": 2.0901329165122496e-06, "loss": 0.8937, "step": 284250 }, { "epoch": 1.8160561184723305, "grad_norm": 1.366105556488037, "learning_rate": 2.0886975573542465e-06, "loss": 0.763, "step": 284260 }, { "epoch": 1.8161200056220692, "grad_norm": 1.1255083084106445, "learning_rate": 2.0872626807045104e-06, "loss": 0.6321, "step": 284270 }, { "epoch": 1.816183892771808, "grad_norm": 0.6423645615577698, "learning_rate": 2.0858282865774905e-06, "loss": 1.0003, "step": 284280 }, { "epoch": 1.8162477799215466, "grad_norm": 0.853395938873291, "learning_rate": 2.0843943749876248e-06, "loss": 0.9148, "step": 284290 }, { "epoch": 1.8163116670712853, "grad_norm": 1.3879306316375732, "learning_rate": 2.0829609459493692e-06, "loss": 0.9273, "step": 284300 }, { "epoch": 1.816375554221024, "grad_norm": 2.7183949947357178, "learning_rate": 2.0815279994771454e-06, "loss": 0.9227, "step": 284310 }, { "epoch": 1.8164394413707625, "grad_norm": 0.8922224044799805, "learning_rate": 2.080095535585397e-06, "loss": 0.935, "step": 284320 }, { "epoch": 1.8165033285205014, "grad_norm": 1.177586555480957, "learning_rate": 2.078663554288535e-06, "loss": 0.7243, "step": 284330 }, { "epoch": 1.81656721567024, "grad_norm": 1.1185641288757324, "learning_rate": 2.077232055600997e-06, "loss": 0.7291, "step": 284340 }, { "epoch": 1.8166311028199789, "grad_norm": 0.8241533041000366, "learning_rate": 2.0758010395371842e-06, "loss": 0.7383, "step": 284350 }, { "epoch": 1.8166949899697173, "grad_norm": 1.203342318534851, "learning_rate": 2.074370506111517e-06, "loss": 0.7972, "step": 284360 }, { "epoch": 1.8167588771194563, "grad_norm": 0.8048965334892273, "learning_rate": 2.072940455338407e-06, "loss": 1.1044, "step": 284370 }, { "epoch": 1.8168227642691948, "grad_norm": 0.7029027938842773, "learning_rate": 2.0715108872322363e-06, "loss": 1.0691, "step": 284380 }, { "epoch": 1.8168866514189337, "grad_norm": 0.8525398373603821, "learning_rate": 2.0700818018074276e-06, "loss": 0.8284, "step": 284390 }, { "epoch": 1.8169505385686722, "grad_norm": 1.0670232772827148, "learning_rate": 2.068653199078352e-06, "loss": 0.9253, "step": 284400 }, { "epoch": 1.817014425718411, "grad_norm": 2.1545190811157227, "learning_rate": 2.0672250790594095e-06, "loss": 0.7876, "step": 284410 }, { "epoch": 1.8170783128681496, "grad_norm": 0.5782620310783386, "learning_rate": 2.065797441764977e-06, "loss": 0.7137, "step": 284420 }, { "epoch": 1.8171422000178885, "grad_norm": 0.8786792755126953, "learning_rate": 2.0643702872094327e-06, "loss": 0.7858, "step": 284430 }, { "epoch": 1.817206087167627, "grad_norm": 1.353689432144165, "learning_rate": 2.062943615407148e-06, "loss": 1.114, "step": 284440 }, { "epoch": 1.817269974317366, "grad_norm": 1.3771668672561646, "learning_rate": 2.0615174263725e-06, "loss": 0.7619, "step": 284450 }, { "epoch": 1.8173338614671044, "grad_norm": 1.0626250505447388, "learning_rate": 2.060091720119833e-06, "loss": 0.6769, "step": 284460 }, { "epoch": 1.8173977486168433, "grad_norm": 0.7767465710639954, "learning_rate": 2.058666496663525e-06, "loss": 0.8848, "step": 284470 }, { "epoch": 1.8174616357665818, "grad_norm": 0.9560141563415527, "learning_rate": 2.0572417560179192e-06, "loss": 0.9424, "step": 284480 }, { "epoch": 1.8175255229163207, "grad_norm": 0.9091188311576843, "learning_rate": 2.05581749819736e-06, "loss": 0.8649, "step": 284490 }, { "epoch": 1.8175894100660592, "grad_norm": 0.8125816583633423, "learning_rate": 2.0543937232162026e-06, "loss": 0.7962, "step": 284500 }, { "epoch": 1.8176532972157982, "grad_norm": 0.9906743168830872, "learning_rate": 2.052970431088774e-06, "loss": 0.8921, "step": 284510 }, { "epoch": 1.8177171843655366, "grad_norm": 1.846880316734314, "learning_rate": 2.0515476218294193e-06, "loss": 0.571, "step": 284520 }, { "epoch": 1.8177810715152756, "grad_norm": 0.8297739624977112, "learning_rate": 2.050125295452454e-06, "loss": 0.9286, "step": 284530 }, { "epoch": 1.817844958665014, "grad_norm": 0.6898651123046875, "learning_rate": 2.048703451972217e-06, "loss": 0.6276, "step": 284540 }, { "epoch": 1.817908845814753, "grad_norm": 0.741755485534668, "learning_rate": 2.0472820914030134e-06, "loss": 0.9608, "step": 284550 }, { "epoch": 1.8179727329644915, "grad_norm": 1.0275651216506958, "learning_rate": 2.04586121375917e-06, "loss": 1.0575, "step": 284560 }, { "epoch": 1.8180366201142302, "grad_norm": 1.8861565589904785, "learning_rate": 2.0444408190549824e-06, "loss": 0.8141, "step": 284570 }, { "epoch": 1.8181005072639689, "grad_norm": 1.1138237714767456, "learning_rate": 2.0430209073047767e-06, "loss": 1.0783, "step": 284580 }, { "epoch": 1.8181643944137076, "grad_norm": 0.9914634823799133, "learning_rate": 2.0416014785228253e-06, "loss": 0.7674, "step": 284590 }, { "epoch": 1.8182282815634463, "grad_norm": 0.7898954153060913, "learning_rate": 2.0401825327234446e-06, "loss": 0.851, "step": 284600 }, { "epoch": 1.818292168713185, "grad_norm": 1.3129220008850098, "learning_rate": 2.038764069920912e-06, "loss": 1.0338, "step": 284610 }, { "epoch": 1.8183560558629237, "grad_norm": 0.6844892501831055, "learning_rate": 2.037346090129527e-06, "loss": 0.787, "step": 284620 }, { "epoch": 1.8184199430126624, "grad_norm": 0.5973532795906067, "learning_rate": 2.035928593363551e-06, "loss": 0.639, "step": 284630 }, { "epoch": 1.8184838301624011, "grad_norm": 0.8840516209602356, "learning_rate": 2.0345115796372715e-06, "loss": 0.8543, "step": 284640 }, { "epoch": 1.8185477173121398, "grad_norm": 2.3488833904266357, "learning_rate": 2.033095048964956e-06, "loss": 0.8962, "step": 284650 }, { "epoch": 1.8186116044618785, "grad_norm": 1.3273308277130127, "learning_rate": 2.0316790013608753e-06, "loss": 0.9147, "step": 284660 }, { "epoch": 1.8186754916116172, "grad_norm": 0.9628444910049438, "learning_rate": 2.0302634368392803e-06, "loss": 1.0421, "step": 284670 }, { "epoch": 1.818739378761356, "grad_norm": 0.9748392105102539, "learning_rate": 2.0288483554144254e-06, "loss": 0.7174, "step": 284680 }, { "epoch": 1.8188032659110946, "grad_norm": 1.3948633670806885, "learning_rate": 2.027433757100583e-06, "loss": 0.7698, "step": 284690 }, { "epoch": 1.8188671530608334, "grad_norm": 1.6660586595535278, "learning_rate": 2.0260196419119747e-06, "loss": 0.8549, "step": 284700 }, { "epoch": 1.818931040210572, "grad_norm": 0.9206308722496033, "learning_rate": 2.0246060098628616e-06, "loss": 0.698, "step": 284710 }, { "epoch": 1.8189949273603108, "grad_norm": 1.0280396938323975, "learning_rate": 2.02319286096746e-06, "loss": 1.1205, "step": 284720 }, { "epoch": 1.8190588145100495, "grad_norm": 0.9921185374259949, "learning_rate": 2.0217801952400196e-06, "loss": 0.618, "step": 284730 }, { "epoch": 1.8191227016597882, "grad_norm": 0.6777898073196411, "learning_rate": 2.0203680126947565e-06, "loss": 1.006, "step": 284740 }, { "epoch": 1.8191865888095269, "grad_norm": 0.919839084148407, "learning_rate": 2.018956313345899e-06, "loss": 0.895, "step": 284750 }, { "epoch": 1.8192504759592656, "grad_norm": 1.8676611185073853, "learning_rate": 2.0175450972076516e-06, "loss": 1.0452, "step": 284760 }, { "epoch": 1.8193143631090043, "grad_norm": 0.9780756235122681, "learning_rate": 2.0161343642942476e-06, "loss": 0.8915, "step": 284770 }, { "epoch": 1.819378250258743, "grad_norm": 3.6714227199554443, "learning_rate": 2.014724114619876e-06, "loss": 1.0235, "step": 284780 }, { "epoch": 1.8194421374084817, "grad_norm": 2.075040340423584, "learning_rate": 2.013314348198747e-06, "loss": 0.8235, "step": 284790 }, { "epoch": 1.8195060245582204, "grad_norm": 0.809281587600708, "learning_rate": 2.0119050650450556e-06, "loss": 0.7084, "step": 284800 }, { "epoch": 1.819569911707959, "grad_norm": 1.4547041654586792, "learning_rate": 2.010496265173001e-06, "loss": 1.0448, "step": 284810 }, { "epoch": 1.8196337988576978, "grad_norm": 1.625180959701538, "learning_rate": 2.009087948596761e-06, "loss": 0.9319, "step": 284820 }, { "epoch": 1.8196976860074363, "grad_norm": 1.166869044303894, "learning_rate": 2.007680115330529e-06, "loss": 1.0157, "step": 284830 }, { "epoch": 1.8197615731571752, "grad_norm": 0.996919572353363, "learning_rate": 2.006272765388467e-06, "loss": 1.0949, "step": 284840 }, { "epoch": 1.8198254603069137, "grad_norm": 1.169065237045288, "learning_rate": 2.004865898784769e-06, "loss": 0.8891, "step": 284850 }, { "epoch": 1.8198893474566527, "grad_norm": 0.8748899102210999, "learning_rate": 2.00345951553359e-06, "loss": 0.9441, "step": 284860 }, { "epoch": 1.8199532346063911, "grad_norm": 1.0787811279296875, "learning_rate": 2.0020536156490964e-06, "loss": 0.7037, "step": 284870 }, { "epoch": 1.82001712175613, "grad_norm": 0.8389084935188293, "learning_rate": 2.0006481991454495e-06, "loss": 0.8076, "step": 284880 }, { "epoch": 1.8200810089058685, "grad_norm": 1.2037585973739624, "learning_rate": 1.9992432660367933e-06, "loss": 0.7315, "step": 284890 }, { "epoch": 1.8201448960556075, "grad_norm": 0.9362344741821289, "learning_rate": 1.9978388163372885e-06, "loss": 0.9803, "step": 284900 }, { "epoch": 1.820208783205346, "grad_norm": 0.7869431376457214, "learning_rate": 1.996434850061074e-06, "loss": 0.9087, "step": 284910 }, { "epoch": 1.8202726703550849, "grad_norm": 1.0346866846084595, "learning_rate": 1.9950313672222944e-06, "loss": 0.6123, "step": 284920 }, { "epoch": 1.8203365575048234, "grad_norm": 1.109389305114746, "learning_rate": 1.993628367835071e-06, "loss": 0.7859, "step": 284930 }, { "epoch": 1.8204004446545623, "grad_norm": 1.341280221939087, "learning_rate": 1.992225851913543e-06, "loss": 0.7019, "step": 284940 }, { "epoch": 1.8204643318043008, "grad_norm": 0.7633939385414124, "learning_rate": 1.9908238194718323e-06, "loss": 0.8567, "step": 284950 }, { "epoch": 1.8205282189540397, "grad_norm": 0.699362576007843, "learning_rate": 1.989422270524066e-06, "loss": 0.8734, "step": 284960 }, { "epoch": 1.8205921061037782, "grad_norm": 0.8467990756034851, "learning_rate": 1.988021205084345e-06, "loss": 0.9091, "step": 284970 }, { "epoch": 1.8206559932535171, "grad_norm": 1.1297391653060913, "learning_rate": 1.986620623166796e-06, "loss": 0.989, "step": 284980 }, { "epoch": 1.8207198804032556, "grad_norm": 0.8497728705406189, "learning_rate": 1.9852205247855083e-06, "loss": 0.5939, "step": 284990 }, { "epoch": 1.8207837675529945, "grad_norm": 1.0683796405792236, "learning_rate": 1.983820909954587e-06, "loss": 0.7312, "step": 285000 }, { "epoch": 1.820847654702733, "grad_norm": 1.2456072568893433, "learning_rate": 1.9824217786881316e-06, "loss": 0.7684, "step": 285010 }, { "epoch": 1.820911541852472, "grad_norm": 1.282422661781311, "learning_rate": 1.9810231310002256e-06, "loss": 0.7891, "step": 285020 }, { "epoch": 1.8209754290022104, "grad_norm": 1.13451087474823, "learning_rate": 1.9796249669049694e-06, "loss": 1.3058, "step": 285030 }, { "epoch": 1.8210393161519494, "grad_norm": 0.85088711977005, "learning_rate": 1.978227286416423e-06, "loss": 0.7471, "step": 285040 }, { "epoch": 1.8211032033016878, "grad_norm": 0.8464387655258179, "learning_rate": 1.9768300895486813e-06, "loss": 0.9672, "step": 285050 }, { "epoch": 1.8211670904514266, "grad_norm": 0.8177611827850342, "learning_rate": 1.9754333763157994e-06, "loss": 0.9349, "step": 285060 }, { "epoch": 1.8212309776011653, "grad_norm": 1.3880621194839478, "learning_rate": 1.9740371467318555e-06, "loss": 0.9024, "step": 285070 }, { "epoch": 1.821294864750904, "grad_norm": 1.3378520011901855, "learning_rate": 1.972641400810904e-06, "loss": 0.7301, "step": 285080 }, { "epoch": 1.8213587519006427, "grad_norm": 0.9654492735862732, "learning_rate": 1.9712461385670065e-06, "loss": 0.9758, "step": 285090 }, { "epoch": 1.8214226390503814, "grad_norm": 0.9789209961891174, "learning_rate": 1.969851360014202e-06, "loss": 0.8543, "step": 285100 }, { "epoch": 1.82148652620012, "grad_norm": 0.5984829068183899, "learning_rate": 1.9684570651665566e-06, "loss": 0.7107, "step": 285110 }, { "epoch": 1.8215504133498588, "grad_norm": 0.8956750631332397, "learning_rate": 1.967063254038093e-06, "loss": 0.8092, "step": 285120 }, { "epoch": 1.8216143004995975, "grad_norm": 1.5672224760055542, "learning_rate": 1.965669926642866e-06, "loss": 1.1351, "step": 285130 }, { "epoch": 1.8216781876493362, "grad_norm": 1.1448312997817993, "learning_rate": 1.9642770829948864e-06, "loss": 0.9314, "step": 285140 }, { "epoch": 1.821742074799075, "grad_norm": 1.4996016025543213, "learning_rate": 1.9628847231082047e-06, "loss": 0.9699, "step": 285150 }, { "epoch": 1.8218059619488136, "grad_norm": 0.7268596291542053, "learning_rate": 1.9614928469968264e-06, "loss": 0.899, "step": 285160 }, { "epoch": 1.8218698490985523, "grad_norm": 0.6328108906745911, "learning_rate": 1.9601014546747787e-06, "loss": 1.0637, "step": 285170 }, { "epoch": 1.821933736248291, "grad_norm": 2.3796627521514893, "learning_rate": 1.958710546156062e-06, "loss": 0.787, "step": 285180 }, { "epoch": 1.8219976233980297, "grad_norm": 1.022774338722229, "learning_rate": 1.957320121454698e-06, "loss": 0.9771, "step": 285190 }, { "epoch": 1.8220615105477684, "grad_norm": 0.7801091074943542, "learning_rate": 1.955930180584681e-06, "loss": 0.7787, "step": 285200 }, { "epoch": 1.8221253976975071, "grad_norm": 1.1597459316253662, "learning_rate": 1.9545407235600054e-06, "loss": 0.7169, "step": 285210 }, { "epoch": 1.8221892848472458, "grad_norm": 0.8490064144134521, "learning_rate": 1.953151750394683e-06, "loss": 0.8965, "step": 285220 }, { "epoch": 1.8222531719969846, "grad_norm": 0.5219688415527344, "learning_rate": 1.951763261102679e-06, "loss": 0.7575, "step": 285230 }, { "epoch": 1.8223170591467233, "grad_norm": 1.8479723930358887, "learning_rate": 1.950375255697989e-06, "loss": 0.7659, "step": 285240 }, { "epoch": 1.822380946296462, "grad_norm": 1.2113089561462402, "learning_rate": 1.9489877341945906e-06, "loss": 0.6914, "step": 285250 }, { "epoch": 1.8224448334462007, "grad_norm": 0.5127272605895996, "learning_rate": 1.9476006966064555e-06, "loss": 0.7166, "step": 285260 }, { "epoch": 1.8225087205959394, "grad_norm": 0.689415454864502, "learning_rate": 1.94621414294755e-06, "loss": 0.7155, "step": 285270 }, { "epoch": 1.822572607745678, "grad_norm": 1.3685857057571411, "learning_rate": 1.944828073231847e-06, "loss": 1.0151, "step": 285280 }, { "epoch": 1.8226364948954168, "grad_norm": 1.7720893621444702, "learning_rate": 1.94344248747329e-06, "loss": 0.9299, "step": 285290 }, { "epoch": 1.8227003820451553, "grad_norm": 0.789768397808075, "learning_rate": 1.9420573856858526e-06, "loss": 0.8446, "step": 285300 }, { "epoch": 1.8227642691948942, "grad_norm": 0.7223408818244934, "learning_rate": 1.940672767883461e-06, "loss": 0.6014, "step": 285310 }, { "epoch": 1.8228281563446327, "grad_norm": 1.5340150594711304, "learning_rate": 1.939288634080083e-06, "loss": 0.6325, "step": 285320 }, { "epoch": 1.8228920434943716, "grad_norm": 0.9605322480201721, "learning_rate": 1.937904984289646e-06, "loss": 0.9184, "step": 285330 }, { "epoch": 1.82295593064411, "grad_norm": 0.9917205572128296, "learning_rate": 1.936521818526077e-06, "loss": 0.9888, "step": 285340 }, { "epoch": 1.823019817793849, "grad_norm": 0.8223958611488342, "learning_rate": 1.935139136803321e-06, "loss": 0.7055, "step": 285350 }, { "epoch": 1.8230837049435875, "grad_norm": 0.7280120849609375, "learning_rate": 1.9337569391352896e-06, "loss": 1.1515, "step": 285360 }, { "epoch": 1.8231475920933264, "grad_norm": 0.5626384615898132, "learning_rate": 1.932375225535915e-06, "loss": 0.7377, "step": 285370 }, { "epoch": 1.823211479243065, "grad_norm": 0.7775994539260864, "learning_rate": 1.9309939960191036e-06, "loss": 0.9574, "step": 285380 }, { "epoch": 1.8232753663928039, "grad_norm": 0.4441358149051666, "learning_rate": 1.929613250598772e-06, "loss": 0.6747, "step": 285390 }, { "epoch": 1.8233392535425423, "grad_norm": 1.1189658641815186, "learning_rate": 1.928232989288814e-06, "loss": 0.885, "step": 285400 }, { "epoch": 1.8234031406922813, "grad_norm": 2.306811809539795, "learning_rate": 1.926853212103147e-06, "loss": 0.9191, "step": 285410 }, { "epoch": 1.8234670278420197, "grad_norm": 0.9467042088508606, "learning_rate": 1.925473919055648e-06, "loss": 1.0129, "step": 285420 }, { "epoch": 1.8235309149917587, "grad_norm": 1.0102781057357788, "learning_rate": 1.9240951101602287e-06, "loss": 0.918, "step": 285430 }, { "epoch": 1.8235948021414972, "grad_norm": 0.7928588390350342, "learning_rate": 1.9227167854307506e-06, "loss": 0.7995, "step": 285440 }, { "epoch": 1.823658689291236, "grad_norm": 2.1351563930511475, "learning_rate": 1.921338944881118e-06, "loss": 1.0241, "step": 285450 }, { "epoch": 1.8237225764409746, "grad_norm": 0.6350265741348267, "learning_rate": 1.9199615885251878e-06, "loss": 0.7882, "step": 285460 }, { "epoch": 1.8237864635907135, "grad_norm": 1.3224437236785889, "learning_rate": 1.918584716376842e-06, "loss": 0.9932, "step": 285470 }, { "epoch": 1.823850350740452, "grad_norm": 0.47573602199554443, "learning_rate": 1.9172083284499432e-06, "loss": 0.8303, "step": 285480 }, { "epoch": 1.823914237890191, "grad_norm": 1.2311193943023682, "learning_rate": 1.9158324247583624e-06, "loss": 0.9476, "step": 285490 }, { "epoch": 1.8239781250399294, "grad_norm": 1.1468632221221924, "learning_rate": 1.9144570053159393e-06, "loss": 0.8672, "step": 285500 }, { "epoch": 1.8240420121896683, "grad_norm": 0.8073374629020691, "learning_rate": 1.9130820701365404e-06, "loss": 1.0607, "step": 285510 }, { "epoch": 1.8241058993394068, "grad_norm": 1.221170425415039, "learning_rate": 1.911707619233999e-06, "loss": 0.7194, "step": 285520 }, { "epoch": 1.8241697864891457, "grad_norm": 0.8472967743873596, "learning_rate": 1.9103336526221595e-06, "loss": 0.7551, "step": 285530 }, { "epoch": 1.8242336736388842, "grad_norm": 0.6870689988136292, "learning_rate": 1.9089601703148773e-06, "loss": 0.7329, "step": 285540 }, { "epoch": 1.824297560788623, "grad_norm": 0.8167701363563538, "learning_rate": 1.907587172325959e-06, "loss": 0.7073, "step": 285550 }, { "epoch": 1.8243614479383616, "grad_norm": 0.9098202586174011, "learning_rate": 1.906214658669253e-06, "loss": 0.8432, "step": 285560 }, { "epoch": 1.8244253350881003, "grad_norm": 5.167019367218018, "learning_rate": 1.9048426293585663e-06, "loss": 0.7477, "step": 285570 }, { "epoch": 1.824489222237839, "grad_norm": 0.9570047855377197, "learning_rate": 1.903471084407732e-06, "loss": 0.9087, "step": 285580 }, { "epoch": 1.8245531093875778, "grad_norm": 1.1526376008987427, "learning_rate": 1.9021000238305441e-06, "loss": 0.8563, "step": 285590 }, { "epoch": 1.8246169965373165, "grad_norm": 0.6891779899597168, "learning_rate": 1.9007294476408256e-06, "loss": 0.6378, "step": 285600 }, { "epoch": 1.8246808836870552, "grad_norm": 0.8805965781211853, "learning_rate": 1.8993593558523648e-06, "loss": 0.6564, "step": 285610 }, { "epoch": 1.8247447708367939, "grad_norm": 1.0953179597854614, "learning_rate": 1.8979897484789789e-06, "loss": 0.9682, "step": 285620 }, { "epoch": 1.8248086579865326, "grad_norm": 0.8742592930793762, "learning_rate": 1.8966206255344398e-06, "loss": 1.0923, "step": 285630 }, { "epoch": 1.8248725451362713, "grad_norm": 1.3812202215194702, "learning_rate": 1.8952519870325535e-06, "loss": 0.9457, "step": 285640 }, { "epoch": 1.82493643228601, "grad_norm": 1.8078464269638062, "learning_rate": 1.8938838329870922e-06, "loss": 1.048, "step": 285650 }, { "epoch": 1.8250003194357487, "grad_norm": 1.5746678113937378, "learning_rate": 1.892516163411845e-06, "loss": 1.0377, "step": 285660 }, { "epoch": 1.8250642065854874, "grad_norm": 1.2783678770065308, "learning_rate": 1.8911489783205672e-06, "loss": 1.2767, "step": 285670 }, { "epoch": 1.825128093735226, "grad_norm": 1.2706398963928223, "learning_rate": 1.8897822777270536e-06, "loss": 0.8448, "step": 285680 }, { "epoch": 1.8251919808849648, "grad_norm": 1.1190025806427002, "learning_rate": 1.888416061645043e-06, "loss": 0.9448, "step": 285690 }, { "epoch": 1.8252558680347035, "grad_norm": 1.6733462810516357, "learning_rate": 1.887050330088308e-06, "loss": 1.0124, "step": 285700 }, { "epoch": 1.8253197551844422, "grad_norm": 1.2430444955825806, "learning_rate": 1.8856850830705985e-06, "loss": 0.9413, "step": 285710 }, { "epoch": 1.825383642334181, "grad_norm": 1.307869553565979, "learning_rate": 1.8843203206056704e-06, "loss": 0.9672, "step": 285720 }, { "epoch": 1.8254475294839196, "grad_norm": 1.8900365829467773, "learning_rate": 1.8829560427072569e-06, "loss": 1.2764, "step": 285730 }, { "epoch": 1.8255114166336583, "grad_norm": 0.8162425756454468, "learning_rate": 1.8815922493891024e-06, "loss": 0.7358, "step": 285740 }, { "epoch": 1.825575303783397, "grad_norm": 1.0894966125488281, "learning_rate": 1.8802289406649464e-06, "loss": 0.9248, "step": 285750 }, { "epoch": 1.8256391909331358, "grad_norm": 0.7507548332214355, "learning_rate": 1.878866116548511e-06, "loss": 0.9988, "step": 285760 }, { "epoch": 1.8257030780828745, "grad_norm": 1.6045938730239868, "learning_rate": 1.8775037770535298e-06, "loss": 0.7603, "step": 285770 }, { "epoch": 1.8257669652326132, "grad_norm": 0.7645987272262573, "learning_rate": 1.8761419221937138e-06, "loss": 1.0192, "step": 285780 }, { "epoch": 1.8258308523823517, "grad_norm": 1.4978324174880981, "learning_rate": 1.8747805519827855e-06, "loss": 0.7535, "step": 285790 }, { "epoch": 1.8258947395320906, "grad_norm": 0.6685463786125183, "learning_rate": 1.873419666434445e-06, "loss": 0.8815, "step": 285800 }, { "epoch": 1.825958626681829, "grad_norm": 1.0744636058807373, "learning_rate": 1.872059265562409e-06, "loss": 0.7381, "step": 285810 }, { "epoch": 1.826022513831568, "grad_norm": 0.9824548959732056, "learning_rate": 1.8706993493803726e-06, "loss": 1.048, "step": 285820 }, { "epoch": 1.8260864009813065, "grad_norm": 1.2852534055709839, "learning_rate": 1.8693399179020353e-06, "loss": 0.6329, "step": 285830 }, { "epoch": 1.8261502881310454, "grad_norm": 0.7904815077781677, "learning_rate": 1.8679809711410757e-06, "loss": 0.7611, "step": 285840 }, { "epoch": 1.826214175280784, "grad_norm": 1.1488205194473267, "learning_rate": 1.8666225091111878e-06, "loss": 0.802, "step": 285850 }, { "epoch": 1.8262780624305228, "grad_norm": 0.7961608171463013, "learning_rate": 1.8652645318260608e-06, "loss": 0.9429, "step": 285860 }, { "epoch": 1.8263419495802613, "grad_norm": 1.1986531019210815, "learning_rate": 1.8639070392993506e-06, "loss": 0.8178, "step": 285870 }, { "epoch": 1.8264058367300002, "grad_norm": 1.0353028774261475, "learning_rate": 1.862550031544752e-06, "loss": 0.7877, "step": 285880 }, { "epoch": 1.8264697238797387, "grad_norm": 0.8865573406219482, "learning_rate": 1.8611935085759092e-06, "loss": 0.7353, "step": 285890 }, { "epoch": 1.8265336110294776, "grad_norm": 0.6933355331420898, "learning_rate": 1.8598374704065002e-06, "loss": 0.9469, "step": 285900 }, { "epoch": 1.8265974981792161, "grad_norm": 0.6500905752182007, "learning_rate": 1.8584819170501755e-06, "loss": 0.8656, "step": 285910 }, { "epoch": 1.826661385328955, "grad_norm": 0.8234435319900513, "learning_rate": 1.8571268485205851e-06, "loss": 0.937, "step": 285920 }, { "epoch": 1.8267252724786935, "grad_norm": 2.764732837677002, "learning_rate": 1.8557722648313735e-06, "loss": 0.5897, "step": 285930 }, { "epoch": 1.8267891596284325, "grad_norm": 1.0919827222824097, "learning_rate": 1.8544181659961911e-06, "loss": 0.6466, "step": 285940 }, { "epoch": 1.826853046778171, "grad_norm": 1.1406519412994385, "learning_rate": 1.853064552028666e-06, "loss": 0.9709, "step": 285950 }, { "epoch": 1.8269169339279099, "grad_norm": 2.4636154174804688, "learning_rate": 1.851711422942437e-06, "loss": 1.1361, "step": 285960 }, { "epoch": 1.8269808210776484, "grad_norm": 0.872745931148529, "learning_rate": 1.8503587787511212e-06, "loss": 0.8822, "step": 285970 }, { "epoch": 1.8270447082273873, "grad_norm": 1.3167917728424072, "learning_rate": 1.8490066194683575e-06, "loss": 0.8776, "step": 285980 }, { "epoch": 1.8271085953771258, "grad_norm": 0.6231662631034851, "learning_rate": 1.8476549451077462e-06, "loss": 0.9667, "step": 285990 }, { "epoch": 1.8271724825268647, "grad_norm": 1.095565915107727, "learning_rate": 1.84630375568291e-06, "loss": 0.9367, "step": 286000 }, { "epoch": 1.8272363696766032, "grad_norm": 0.926649808883667, "learning_rate": 1.8449530512074542e-06, "loss": 1.0359, "step": 286010 }, { "epoch": 1.827300256826342, "grad_norm": 5.4127068519592285, "learning_rate": 1.8436028316949793e-06, "loss": 0.9705, "step": 286020 }, { "epoch": 1.8273641439760806, "grad_norm": 1.2995518445968628, "learning_rate": 1.8422530971590856e-06, "loss": 0.8106, "step": 286030 }, { "epoch": 1.8274280311258193, "grad_norm": 1.374437928199768, "learning_rate": 1.8409038476133678e-06, "loss": 0.8928, "step": 286040 }, { "epoch": 1.827491918275558, "grad_norm": 1.131495714187622, "learning_rate": 1.8395550830714093e-06, "loss": 0.8288, "step": 286050 }, { "epoch": 1.8275558054252967, "grad_norm": 1.104823112487793, "learning_rate": 1.8382068035467937e-06, "loss": 1.0923, "step": 286060 }, { "epoch": 1.8276196925750354, "grad_norm": 1.117811679840088, "learning_rate": 1.8368590090531102e-06, "loss": 0.8615, "step": 286070 }, { "epoch": 1.8276835797247741, "grad_norm": 0.7175796031951904, "learning_rate": 1.8355116996039146e-06, "loss": 0.7234, "step": 286080 }, { "epoch": 1.8277474668745128, "grad_norm": 1.426501750946045, "learning_rate": 1.8341648752127905e-06, "loss": 0.8822, "step": 286090 }, { "epoch": 1.8278113540242515, "grad_norm": 1.320160150527954, "learning_rate": 1.8328185358932882e-06, "loss": 0.8113, "step": 286100 }, { "epoch": 1.8278752411739903, "grad_norm": 1.6251180171966553, "learning_rate": 1.83147268165898e-06, "loss": 0.8898, "step": 286110 }, { "epoch": 1.827939128323729, "grad_norm": 0.7918174862861633, "learning_rate": 1.830127312523411e-06, "loss": 0.8613, "step": 286120 }, { "epoch": 1.8280030154734677, "grad_norm": 0.9489846229553223, "learning_rate": 1.8287824285001365e-06, "loss": 0.9402, "step": 286130 }, { "epoch": 1.8280669026232064, "grad_norm": 1.3466228246688843, "learning_rate": 1.8274380296026905e-06, "loss": 0.8081, "step": 286140 }, { "epoch": 1.828130789772945, "grad_norm": 0.9148550033569336, "learning_rate": 1.8260941158446288e-06, "loss": 0.7575, "step": 286150 }, { "epoch": 1.8281946769226838, "grad_norm": 0.531804621219635, "learning_rate": 1.8247506872394681e-06, "loss": 0.8309, "step": 286160 }, { "epoch": 1.8282585640724225, "grad_norm": 0.610683798789978, "learning_rate": 1.823407743800748e-06, "loss": 0.6663, "step": 286170 }, { "epoch": 1.8283224512221612, "grad_norm": 0.8566081523895264, "learning_rate": 1.822065285541985e-06, "loss": 1.0289, "step": 286180 }, { "epoch": 1.8283863383719, "grad_norm": 0.7995787858963013, "learning_rate": 1.8207233124767132e-06, "loss": 0.8781, "step": 286190 }, { "epoch": 1.8284502255216386, "grad_norm": 1.0090097188949585, "learning_rate": 1.8193818246184323e-06, "loss": 0.7488, "step": 286200 }, { "epoch": 1.8285141126713773, "grad_norm": 1.12493896484375, "learning_rate": 1.8180408219806655e-06, "loss": 0.9024, "step": 286210 }, { "epoch": 1.828577999821116, "grad_norm": 0.7762433290481567, "learning_rate": 1.8167003045769016e-06, "loss": 0.9644, "step": 286220 }, { "epoch": 1.8286418869708547, "grad_norm": 1.022645115852356, "learning_rate": 1.8153602724206576e-06, "loss": 0.9341, "step": 286230 }, { "epoch": 1.8287057741205934, "grad_norm": 1.4305933713912964, "learning_rate": 1.814020725525417e-06, "loss": 1.0233, "step": 286240 }, { "epoch": 1.8287696612703321, "grad_norm": 0.9852200746536255, "learning_rate": 1.8126816639046751e-06, "loss": 0.8914, "step": 286250 }, { "epoch": 1.8288335484200706, "grad_norm": 1.5261846780776978, "learning_rate": 1.811343087571915e-06, "loss": 0.9038, "step": 286260 }, { "epoch": 1.8288974355698095, "grad_norm": 1.0979644060134888, "learning_rate": 1.8100049965406206e-06, "loss": 0.8995, "step": 286270 }, { "epoch": 1.828961322719548, "grad_norm": 1.460039496421814, "learning_rate": 1.8086673908242702e-06, "loss": 0.8506, "step": 286280 }, { "epoch": 1.829025209869287, "grad_norm": 1.119490623474121, "learning_rate": 1.8073302704363248e-06, "loss": 0.8478, "step": 286290 }, { "epoch": 1.8290890970190254, "grad_norm": 0.6196275353431702, "learning_rate": 1.8059936353902684e-06, "loss": 0.7895, "step": 286300 }, { "epoch": 1.8291529841687644, "grad_norm": 0.7568504810333252, "learning_rate": 1.8046574856995345e-06, "loss": 0.7273, "step": 286310 }, { "epoch": 1.8292168713185029, "grad_norm": 1.0650426149368286, "learning_rate": 1.803321821377607e-06, "loss": 0.9648, "step": 286320 }, { "epoch": 1.8292807584682418, "grad_norm": 1.2894755601882935, "learning_rate": 1.801986642437914e-06, "loss": 0.8205, "step": 286330 }, { "epoch": 1.8293446456179803, "grad_norm": 0.8295124769210815, "learning_rate": 1.800651948893922e-06, "loss": 0.8621, "step": 286340 }, { "epoch": 1.8294085327677192, "grad_norm": 1.0976676940917969, "learning_rate": 1.7993177407590544e-06, "loss": 0.8081, "step": 286350 }, { "epoch": 1.8294724199174577, "grad_norm": 1.242518424987793, "learning_rate": 1.7979840180467666e-06, "loss": 0.9352, "step": 286360 }, { "epoch": 1.8295363070671966, "grad_norm": 0.7539724707603455, "learning_rate": 1.7966507807704701e-06, "loss": 0.8317, "step": 286370 }, { "epoch": 1.829600194216935, "grad_norm": 0.8778108954429626, "learning_rate": 1.7953180289436044e-06, "loss": 0.7374, "step": 286380 }, { "epoch": 1.829664081366674, "grad_norm": 1.0061482191085815, "learning_rate": 1.793985762579592e-06, "loss": 0.9777, "step": 286390 }, { "epoch": 1.8297279685164125, "grad_norm": 0.8564932346343994, "learning_rate": 1.7926539816918443e-06, "loss": 0.9104, "step": 286400 }, { "epoch": 1.8297918556661514, "grad_norm": 0.9976624250411987, "learning_rate": 1.7913226862937837e-06, "loss": 0.9519, "step": 286410 }, { "epoch": 1.82985574281589, "grad_norm": 0.8627698421478271, "learning_rate": 1.7899918763988e-06, "loss": 0.6869, "step": 286420 }, { "epoch": 1.8299196299656288, "grad_norm": 1.1749376058578491, "learning_rate": 1.7886615520203097e-06, "loss": 0.9258, "step": 286430 }, { "epoch": 1.8299835171153673, "grad_norm": 0.8614131212234497, "learning_rate": 1.7873317131717082e-06, "loss": 0.8995, "step": 286440 }, { "epoch": 1.8300474042651063, "grad_norm": 1.5317989587783813, "learning_rate": 1.7860023598663844e-06, "loss": 1.0053, "step": 286450 }, { "epoch": 1.8301112914148447, "grad_norm": 1.088987112045288, "learning_rate": 1.7846734921177276e-06, "loss": 0.8924, "step": 286460 }, { "epoch": 1.8301751785645837, "grad_norm": 1.2498538494110107, "learning_rate": 1.7833451099391274e-06, "loss": 0.879, "step": 286470 }, { "epoch": 1.8302390657143222, "grad_norm": 0.6987992525100708, "learning_rate": 1.782017213343945e-06, "loss": 0.8498, "step": 286480 }, { "epoch": 1.830302952864061, "grad_norm": 2.299447774887085, "learning_rate": 1.7806898023455698e-06, "loss": 0.8671, "step": 286490 }, { "epoch": 1.8303668400137996, "grad_norm": 0.996013879776001, "learning_rate": 1.7793628769573633e-06, "loss": 0.9277, "step": 286500 }, { "epoch": 1.8304307271635383, "grad_norm": 0.5067451596260071, "learning_rate": 1.7780364371926927e-06, "loss": 0.9661, "step": 286510 }, { "epoch": 1.830494614313277, "grad_norm": 0.6430745124816895, "learning_rate": 1.776710483064903e-06, "loss": 1.0379, "step": 286520 }, { "epoch": 1.8305585014630157, "grad_norm": 0.7153158783912659, "learning_rate": 1.7753850145873664e-06, "loss": 1.0634, "step": 286530 }, { "epoch": 1.8306223886127544, "grad_norm": 0.8899746537208557, "learning_rate": 1.774060031773417e-06, "loss": 0.898, "step": 286540 }, { "epoch": 1.830686275762493, "grad_norm": 1.5335935354232788, "learning_rate": 1.7727355346364105e-06, "loss": 0.9678, "step": 286550 }, { "epoch": 1.8307501629122318, "grad_norm": 0.967056393623352, "learning_rate": 1.7714115231896755e-06, "loss": 0.9838, "step": 286560 }, { "epoch": 1.8308140500619705, "grad_norm": 2.244809150695801, "learning_rate": 1.7700879974465568e-06, "loss": 0.9862, "step": 286570 }, { "epoch": 1.8308779372117092, "grad_norm": 1.108665943145752, "learning_rate": 1.7687649574203714e-06, "loss": 1.0522, "step": 286580 }, { "epoch": 1.830941824361448, "grad_norm": 1.2201392650604248, "learning_rate": 1.7674424031244418e-06, "loss": 0.6931, "step": 286590 }, { "epoch": 1.8310057115111866, "grad_norm": 0.8409663438796997, "learning_rate": 1.7661203345721078e-06, "loss": 0.9458, "step": 286600 }, { "epoch": 1.8310695986609253, "grad_norm": 1.0214024782180786, "learning_rate": 1.7647987517766585e-06, "loss": 0.6715, "step": 286610 }, { "epoch": 1.831133485810664, "grad_norm": 1.5210096836090088, "learning_rate": 1.7634776547514275e-06, "loss": 0.8119, "step": 286620 }, { "epoch": 1.8311973729604027, "grad_norm": 1.4261603355407715, "learning_rate": 1.7621570435096934e-06, "loss": 0.7577, "step": 286630 }, { "epoch": 1.8312612601101415, "grad_norm": 1.194935917854309, "learning_rate": 1.7608369180647788e-06, "loss": 0.8755, "step": 286640 }, { "epoch": 1.8313251472598802, "grad_norm": 2.938659191131592, "learning_rate": 1.7595172784299673e-06, "loss": 0.6624, "step": 286650 }, { "epoch": 1.8313890344096189, "grad_norm": 0.9520533680915833, "learning_rate": 1.7581981246185542e-06, "loss": 0.7869, "step": 286660 }, { "epoch": 1.8314529215593576, "grad_norm": 2.5831644535064697, "learning_rate": 1.7568794566438118e-06, "loss": 0.7728, "step": 286670 }, { "epoch": 1.8315168087090963, "grad_norm": 0.9544631242752075, "learning_rate": 1.755561274519041e-06, "loss": 0.8714, "step": 286680 }, { "epoch": 1.831580695858835, "grad_norm": 0.8048824071884155, "learning_rate": 1.7542435782574974e-06, "loss": 0.9109, "step": 286690 }, { "epoch": 1.8316445830085737, "grad_norm": 0.8290845155715942, "learning_rate": 1.752926367872465e-06, "loss": 0.7139, "step": 286700 }, { "epoch": 1.8317084701583124, "grad_norm": 1.0401479005813599, "learning_rate": 1.7516096433771944e-06, "loss": 0.8462, "step": 286710 }, { "epoch": 1.831772357308051, "grad_norm": 0.9665109515190125, "learning_rate": 1.7502934047849639e-06, "loss": 1.1502, "step": 286720 }, { "epoch": 1.8318362444577898, "grad_norm": 1.5829625129699707, "learning_rate": 1.7489776521090184e-06, "loss": 0.7414, "step": 286730 }, { "epoch": 1.8319001316075285, "grad_norm": 0.9078747034072876, "learning_rate": 1.747662385362614e-06, "loss": 0.8966, "step": 286740 }, { "epoch": 1.831964018757267, "grad_norm": 1.2583354711532593, "learning_rate": 1.7463476045589844e-06, "loss": 1.0177, "step": 286750 }, { "epoch": 1.832027905907006, "grad_norm": 0.9693160653114319, "learning_rate": 1.7450333097113913e-06, "loss": 0.7749, "step": 286760 }, { "epoch": 1.8320917930567444, "grad_norm": 1.3210150003433228, "learning_rate": 1.743719500833052e-06, "loss": 1.3723, "step": 286770 }, { "epoch": 1.8321556802064833, "grad_norm": 0.4251079559326172, "learning_rate": 1.7424061779372114e-06, "loss": 0.7269, "step": 286780 }, { "epoch": 1.8322195673562218, "grad_norm": 1.2777466773986816, "learning_rate": 1.7410933410370867e-06, "loss": 0.8654, "step": 286790 }, { "epoch": 1.8322834545059608, "grad_norm": 1.4226998090744019, "learning_rate": 1.739780990145895e-06, "loss": 0.9033, "step": 286800 }, { "epoch": 1.8323473416556992, "grad_norm": 0.9256820678710938, "learning_rate": 1.7384691252768704e-06, "loss": 1.1099, "step": 286810 }, { "epoch": 1.8324112288054382, "grad_norm": 1.0882539749145508, "learning_rate": 1.737157746443202e-06, "loss": 0.7031, "step": 286820 }, { "epoch": 1.8324751159551766, "grad_norm": 0.7053834795951843, "learning_rate": 1.7358468536581185e-06, "loss": 0.9929, "step": 286830 }, { "epoch": 1.8325390031049156, "grad_norm": 2.0581183433532715, "learning_rate": 1.7345364469348035e-06, "loss": 1.1567, "step": 286840 }, { "epoch": 1.832602890254654, "grad_norm": 0.852189302444458, "learning_rate": 1.7332265262864744e-06, "loss": 0.828, "step": 286850 }, { "epoch": 1.832666777404393, "grad_norm": 1.145141363143921, "learning_rate": 1.7319170917262983e-06, "loss": 0.8113, "step": 286860 }, { "epoch": 1.8327306645541315, "grad_norm": 1.4547284841537476, "learning_rate": 1.7306081432674814e-06, "loss": 0.8355, "step": 286870 }, { "epoch": 1.8327945517038704, "grad_norm": 0.9284454584121704, "learning_rate": 1.7292996809231965e-06, "loss": 0.8272, "step": 286880 }, { "epoch": 1.8328584388536089, "grad_norm": 1.0113308429718018, "learning_rate": 1.7279917047066275e-06, "loss": 0.9879, "step": 286890 }, { "epoch": 1.8329223260033478, "grad_norm": 2.9800875186920166, "learning_rate": 1.7266842146309358e-06, "loss": 0.5337, "step": 286900 }, { "epoch": 1.8329862131530863, "grad_norm": 0.674667239189148, "learning_rate": 1.7253772107093003e-06, "loss": 0.8118, "step": 286910 }, { "epoch": 1.8330501003028252, "grad_norm": 2.0343778133392334, "learning_rate": 1.7240706929548822e-06, "loss": 0.8474, "step": 286920 }, { "epoch": 1.8331139874525637, "grad_norm": 0.5409013628959656, "learning_rate": 1.7227646613808324e-06, "loss": 0.802, "step": 286930 }, { "epoch": 1.8331778746023026, "grad_norm": 1.0042235851287842, "learning_rate": 1.7214591160003124e-06, "loss": 0.8092, "step": 286940 }, { "epoch": 1.8332417617520411, "grad_norm": 0.6316293478012085, "learning_rate": 1.7201540568264673e-06, "loss": 0.7423, "step": 286950 }, { "epoch": 1.83330564890178, "grad_norm": 3.186025857925415, "learning_rate": 1.7188494838724367e-06, "loss": 0.8688, "step": 286960 }, { "epoch": 1.8333695360515185, "grad_norm": 1.0892722606658936, "learning_rate": 1.71754539715136e-06, "loss": 0.7347, "step": 286970 }, { "epoch": 1.8334334232012575, "grad_norm": 0.6171730160713196, "learning_rate": 1.716241796676371e-06, "loss": 0.9285, "step": 286980 }, { "epoch": 1.833497310350996, "grad_norm": 1.5199726819992065, "learning_rate": 1.7149386824605983e-06, "loss": 0.9471, "step": 286990 }, { "epoch": 1.8335611975007347, "grad_norm": 0.735580325126648, "learning_rate": 1.7136360545171703e-06, "loss": 0.8594, "step": 287000 }, { "epoch": 1.8336250846504734, "grad_norm": 1.267344355583191, "learning_rate": 1.712333912859193e-06, "loss": 0.981, "step": 287010 }, { "epoch": 1.833688971800212, "grad_norm": 2.0481607913970947, "learning_rate": 1.711032257499795e-06, "loss": 1.0415, "step": 287020 }, { "epoch": 1.8337528589499508, "grad_norm": 1.4587774276733398, "learning_rate": 1.7097310884520768e-06, "loss": 0.9411, "step": 287030 }, { "epoch": 1.8338167460996895, "grad_norm": 0.8664649724960327, "learning_rate": 1.7084304057291445e-06, "loss": 1.0081, "step": 287040 }, { "epoch": 1.8338806332494282, "grad_norm": 1.8076395988464355, "learning_rate": 1.7071302093440878e-06, "loss": 0.9189, "step": 287050 }, { "epoch": 1.8339445203991669, "grad_norm": 0.7583857178688049, "learning_rate": 1.7058304993100183e-06, "loss": 0.9083, "step": 287060 }, { "epoch": 1.8340084075489056, "grad_norm": 1.2719414234161377, "learning_rate": 1.7045312756400145e-06, "loss": 0.8046, "step": 287070 }, { "epoch": 1.8340722946986443, "grad_norm": 1.1557854413986206, "learning_rate": 1.7032325383471604e-06, "loss": 0.888, "step": 287080 }, { "epoch": 1.834136181848383, "grad_norm": 1.6682637929916382, "learning_rate": 1.7019342874445343e-06, "loss": 0.8401, "step": 287090 }, { "epoch": 1.8342000689981217, "grad_norm": 1.3640217781066895, "learning_rate": 1.7006365229452204e-06, "loss": 0.9644, "step": 287100 }, { "epoch": 1.8342639561478604, "grad_norm": 1.4971295595169067, "learning_rate": 1.6993392448622747e-06, "loss": 1.241, "step": 287110 }, { "epoch": 1.8343278432975991, "grad_norm": 0.9940297603607178, "learning_rate": 1.6980424532087702e-06, "loss": 0.9292, "step": 287120 }, { "epoch": 1.8343917304473378, "grad_norm": 1.4869452714920044, "learning_rate": 1.696746147997774e-06, "loss": 0.9535, "step": 287130 }, { "epoch": 1.8344556175970765, "grad_norm": 1.4654951095581055, "learning_rate": 1.6954503292423207e-06, "loss": 0.7897, "step": 287140 }, { "epoch": 1.8345195047468152, "grad_norm": 0.8898797631263733, "learning_rate": 1.6941549969554826e-06, "loss": 0.8698, "step": 287150 }, { "epoch": 1.834583391896554, "grad_norm": 1.1146529912948608, "learning_rate": 1.6928601511502828e-06, "loss": 0.8981, "step": 287160 }, { "epoch": 1.8346472790462927, "grad_norm": 1.2310070991516113, "learning_rate": 1.6915657918397831e-06, "loss": 1.1368, "step": 287170 }, { "epoch": 1.8347111661960314, "grad_norm": 1.4128965139389038, "learning_rate": 1.690271919037001e-06, "loss": 1.0198, "step": 287180 }, { "epoch": 1.83477505334577, "grad_norm": 1.618044376373291, "learning_rate": 1.6889785327549811e-06, "loss": 1.0275, "step": 287190 }, { "epoch": 1.8348389404955088, "grad_norm": 1.2252626419067383, "learning_rate": 1.6876856330067359e-06, "loss": 1.1003, "step": 287200 }, { "epoch": 1.8349028276452475, "grad_norm": 0.8357836008071899, "learning_rate": 1.686393219805299e-06, "loss": 0.9564, "step": 287210 }, { "epoch": 1.8349667147949862, "grad_norm": 1.0785801410675049, "learning_rate": 1.6851012931636767e-06, "loss": 0.8547, "step": 287220 }, { "epoch": 1.835030601944725, "grad_norm": 0.7203280329704285, "learning_rate": 1.683809853094881e-06, "loss": 0.9503, "step": 287230 }, { "epoch": 1.8350944890944634, "grad_norm": 0.9999285936355591, "learning_rate": 1.6825188996119178e-06, "loss": 0.7627, "step": 287240 }, { "epoch": 1.8351583762442023, "grad_norm": 0.6865089535713196, "learning_rate": 1.6812284327277994e-06, "loss": 0.8916, "step": 287250 }, { "epoch": 1.8352222633939408, "grad_norm": 1.0471559762954712, "learning_rate": 1.679938452455504e-06, "loss": 0.8243, "step": 287260 }, { "epoch": 1.8352861505436797, "grad_norm": 1.1230990886688232, "learning_rate": 1.6786489588080322e-06, "loss": 0.884, "step": 287270 }, { "epoch": 1.8353500376934182, "grad_norm": 1.3938952684402466, "learning_rate": 1.677359951798374e-06, "loss": 0.8603, "step": 287280 }, { "epoch": 1.8354139248431571, "grad_norm": 1.2799034118652344, "learning_rate": 1.676071431439502e-06, "loss": 1.0646, "step": 287290 }, { "epoch": 1.8354778119928956, "grad_norm": 1.3219281435012817, "learning_rate": 1.6747833977444006e-06, "loss": 0.9428, "step": 287300 }, { "epoch": 1.8355416991426345, "grad_norm": 0.9605557322502136, "learning_rate": 1.6734958507260313e-06, "loss": 0.6734, "step": 287310 }, { "epoch": 1.835605586292373, "grad_norm": 0.8685451745986938, "learning_rate": 1.6722087903973726e-06, "loss": 0.8308, "step": 287320 }, { "epoch": 1.835669473442112, "grad_norm": 0.6971920132637024, "learning_rate": 1.6709222167713756e-06, "loss": 0.7354, "step": 287330 }, { "epoch": 1.8357333605918504, "grad_norm": 0.8624512553215027, "learning_rate": 1.6696361298610075e-06, "loss": 0.8008, "step": 287340 }, { "epoch": 1.8357972477415894, "grad_norm": 1.2721669673919678, "learning_rate": 1.668350529679208e-06, "loss": 0.7032, "step": 287350 }, { "epoch": 1.8358611348913279, "grad_norm": 1.4571473598480225, "learning_rate": 1.6670654162389387e-06, "loss": 0.9812, "step": 287360 }, { "epoch": 1.8359250220410668, "grad_norm": 0.6332854628562927, "learning_rate": 1.6657807895531342e-06, "loss": 0.6715, "step": 287370 }, { "epoch": 1.8359889091908053, "grad_norm": 0.9317585825920105, "learning_rate": 1.6644966496347336e-06, "loss": 0.643, "step": 287380 }, { "epoch": 1.8360527963405442, "grad_norm": 0.5826150178909302, "learning_rate": 1.6632129964966602e-06, "loss": 0.9093, "step": 287390 }, { "epoch": 1.8361166834902827, "grad_norm": 1.3244376182556152, "learning_rate": 1.6619298301518594e-06, "loss": 0.8643, "step": 287400 }, { "epoch": 1.8361805706400216, "grad_norm": 1.741381049156189, "learning_rate": 1.6606471506132315e-06, "loss": 0.828, "step": 287410 }, { "epoch": 1.83624445778976, "grad_norm": 1.6920572519302368, "learning_rate": 1.6593649578937165e-06, "loss": 0.941, "step": 287420 }, { "epoch": 1.836308344939499, "grad_norm": 1.039543867111206, "learning_rate": 1.6580832520062095e-06, "loss": 1.0667, "step": 287430 }, { "epoch": 1.8363722320892375, "grad_norm": 0.8765792846679688, "learning_rate": 1.656802032963628e-06, "loss": 0.7044, "step": 287440 }, { "epoch": 1.8364361192389764, "grad_norm": 0.897909939289093, "learning_rate": 1.6555213007788784e-06, "loss": 0.9957, "step": 287450 }, { "epoch": 1.836500006388715, "grad_norm": 1.0654629468917847, "learning_rate": 1.6542410554648446e-06, "loss": 0.8229, "step": 287460 }, { "epoch": 1.8365638935384538, "grad_norm": 0.8408827781677246, "learning_rate": 1.6529612970344389e-06, "loss": 0.8093, "step": 287470 }, { "epoch": 1.8366277806881923, "grad_norm": 0.887602686882019, "learning_rate": 1.6516820255005284e-06, "loss": 0.8046, "step": 287480 }, { "epoch": 1.836691667837931, "grad_norm": 0.9669412970542908, "learning_rate": 1.650403240876014e-06, "loss": 0.8687, "step": 287490 }, { "epoch": 1.8367555549876697, "grad_norm": 1.320043921470642, "learning_rate": 1.6491249431737633e-06, "loss": 0.9349, "step": 287500 }, { "epoch": 1.8368194421374084, "grad_norm": 1.1708213090896606, "learning_rate": 1.6478471324066603e-06, "loss": 1.0136, "step": 287510 }, { "epoch": 1.8368833292871471, "grad_norm": 0.8634885549545288, "learning_rate": 1.6465698085875558e-06, "loss": 0.9404, "step": 287520 }, { "epoch": 1.8369472164368859, "grad_norm": 0.9964984059333801, "learning_rate": 1.6452929717293398e-06, "loss": 0.7876, "step": 287530 }, { "epoch": 1.8370111035866246, "grad_norm": 0.9559676647186279, "learning_rate": 1.644016621844846e-06, "loss": 1.0267, "step": 287540 }, { "epoch": 1.8370749907363633, "grad_norm": 0.8222513198852539, "learning_rate": 1.6427407589469424e-06, "loss": 0.8964, "step": 287550 }, { "epoch": 1.837138877886102, "grad_norm": 0.8146112561225891, "learning_rate": 1.6415928987229767e-06, "loss": 0.8593, "step": 287560 }, { "epoch": 1.8372027650358407, "grad_norm": 0.6883629560470581, "learning_rate": 1.6403179611349794e-06, "loss": 0.9835, "step": 287570 }, { "epoch": 1.8372666521855794, "grad_norm": 0.7211798429489136, "learning_rate": 1.6390435105708256e-06, "loss": 0.9449, "step": 287580 }, { "epoch": 1.837330539335318, "grad_norm": 1.4879876375198364, "learning_rate": 1.637769547043333e-06, "loss": 0.8503, "step": 287590 }, { "epoch": 1.8373944264850568, "grad_norm": 0.8715039491653442, "learning_rate": 1.636496070565341e-06, "loss": 1.0713, "step": 287600 }, { "epoch": 1.8374583136347955, "grad_norm": 0.9951567053794861, "learning_rate": 1.6352230811496726e-06, "loss": 0.992, "step": 287610 }, { "epoch": 1.8375222007845342, "grad_norm": 1.3647722005844116, "learning_rate": 1.6339505788091568e-06, "loss": 0.6799, "step": 287620 }, { "epoch": 1.837586087934273, "grad_norm": 1.3705017566680908, "learning_rate": 1.6326785635565944e-06, "loss": 0.8357, "step": 287630 }, { "epoch": 1.8376499750840116, "grad_norm": 0.980297327041626, "learning_rate": 1.6314070354048082e-06, "loss": 0.9319, "step": 287640 }, { "epoch": 1.8377138622337503, "grad_norm": 1.1601104736328125, "learning_rate": 1.630135994366594e-06, "loss": 0.8532, "step": 287650 }, { "epoch": 1.837777749383489, "grad_norm": 2.0433003902435303, "learning_rate": 1.6288654404547576e-06, "loss": 0.5762, "step": 287660 }, { "epoch": 1.8378416365332277, "grad_norm": 1.367573857307434, "learning_rate": 1.6275953736820893e-06, "loss": 0.692, "step": 287670 }, { "epoch": 1.8379055236829664, "grad_norm": 0.7693257927894592, "learning_rate": 1.6263257940613895e-06, "loss": 0.7481, "step": 287680 }, { "epoch": 1.8379694108327052, "grad_norm": 0.8016722202301025, "learning_rate": 1.6250567016054374e-06, "loss": 0.8896, "step": 287690 }, { "epoch": 1.8380332979824439, "grad_norm": 0.7425395846366882, "learning_rate": 1.6237880963270113e-06, "loss": 0.6763, "step": 287700 }, { "epoch": 1.8380971851321826, "grad_norm": 1.4685392379760742, "learning_rate": 1.62251997823889e-06, "loss": 0.9643, "step": 287710 }, { "epoch": 1.8381610722819213, "grad_norm": 0.5433425903320312, "learning_rate": 1.6212523473538521e-06, "loss": 1.035, "step": 287720 }, { "epoch": 1.8382249594316598, "grad_norm": 0.8438354134559631, "learning_rate": 1.6199852036846486e-06, "loss": 0.7417, "step": 287730 }, { "epoch": 1.8382888465813987, "grad_norm": 0.7638826966285706, "learning_rate": 1.6187185472440525e-06, "loss": 0.9106, "step": 287740 }, { "epoch": 1.8383527337311372, "grad_norm": 0.8857637643814087, "learning_rate": 1.6174523780448148e-06, "loss": 0.8946, "step": 287750 }, { "epoch": 1.838416620880876, "grad_norm": 0.9542201161384583, "learning_rate": 1.6161866960996864e-06, "loss": 0.6759, "step": 287760 }, { "epoch": 1.8384805080306146, "grad_norm": 2.320218324661255, "learning_rate": 1.6149215014214236e-06, "loss": 0.9944, "step": 287770 }, { "epoch": 1.8385443951803535, "grad_norm": 4.098390579223633, "learning_rate": 1.61365679402275e-06, "loss": 0.9212, "step": 287780 }, { "epoch": 1.838608282330092, "grad_norm": 2.041461944580078, "learning_rate": 1.6123925739164213e-06, "loss": 0.9272, "step": 287790 }, { "epoch": 1.838672169479831, "grad_norm": 1.1152058839797974, "learning_rate": 1.6111288411151559e-06, "loss": 0.9968, "step": 287800 }, { "epoch": 1.8387360566295694, "grad_norm": 0.7218173146247864, "learning_rate": 1.6098655956316878e-06, "loss": 1.0449, "step": 287810 }, { "epoch": 1.8387999437793083, "grad_norm": 1.862316370010376, "learning_rate": 1.6086028374787343e-06, "loss": 0.8643, "step": 287820 }, { "epoch": 1.8388638309290468, "grad_norm": 1.164618730545044, "learning_rate": 1.6073405666690188e-06, "loss": 0.9774, "step": 287830 }, { "epoch": 1.8389277180787857, "grad_norm": 1.2470999956130981, "learning_rate": 1.6060787832152425e-06, "loss": 0.9728, "step": 287840 }, { "epoch": 1.8389916052285242, "grad_norm": 0.8947863578796387, "learning_rate": 1.604817487130128e-06, "loss": 1.2326, "step": 287850 }, { "epoch": 1.8390554923782632, "grad_norm": 1.0724859237670898, "learning_rate": 1.6035566784263656e-06, "loss": 0.9335, "step": 287860 }, { "epoch": 1.8391193795280016, "grad_norm": 0.7424639463424683, "learning_rate": 1.602296357116656e-06, "loss": 1.0477, "step": 287870 }, { "epoch": 1.8391832666777406, "grad_norm": 1.7759449481964111, "learning_rate": 1.6010365232136893e-06, "loss": 0.6222, "step": 287880 }, { "epoch": 1.839247153827479, "grad_norm": 1.2649390697479248, "learning_rate": 1.5997771767301605e-06, "loss": 0.7512, "step": 287890 }, { "epoch": 1.839311040977218, "grad_norm": 0.6217682957649231, "learning_rate": 1.5985183176787433e-06, "loss": 0.8029, "step": 287900 }, { "epoch": 1.8393749281269565, "grad_norm": 0.551947832107544, "learning_rate": 1.597259946072127e-06, "loss": 0.9035, "step": 287910 }, { "epoch": 1.8394388152766954, "grad_norm": 0.8400312066078186, "learning_rate": 1.5960020619229743e-06, "loss": 0.6912, "step": 287920 }, { "epoch": 1.8395027024264339, "grad_norm": 0.7524799108505249, "learning_rate": 1.5947446652439524e-06, "loss": 0.7257, "step": 287930 }, { "epoch": 1.8395665895761728, "grad_norm": 0.602583646774292, "learning_rate": 1.5934877560477347e-06, "loss": 0.6298, "step": 287940 }, { "epoch": 1.8396304767259113, "grad_norm": 1.2925169467926025, "learning_rate": 1.5922313343469607e-06, "loss": 0.8135, "step": 287950 }, { "epoch": 1.8396943638756502, "grad_norm": 0.9474080801010132, "learning_rate": 1.5909754001543097e-06, "loss": 0.8911, "step": 287960 }, { "epoch": 1.8397582510253887, "grad_norm": 1.1987119913101196, "learning_rate": 1.5897199534824048e-06, "loss": 1.0228, "step": 287970 }, { "epoch": 1.8398221381751274, "grad_norm": 0.922034502029419, "learning_rate": 1.5884649943439134e-06, "loss": 0.7103, "step": 287980 }, { "epoch": 1.8398860253248661, "grad_norm": 1.1555603742599487, "learning_rate": 1.5872105227514477e-06, "loss": 1.0575, "step": 287990 }, { "epoch": 1.8399499124746048, "grad_norm": 1.153542160987854, "learning_rate": 1.5859565387176644e-06, "loss": 0.8817, "step": 288000 }, { "epoch": 1.8400137996243435, "grad_norm": 0.9662725925445557, "learning_rate": 1.5847030422551812e-06, "loss": 0.8067, "step": 288010 }, { "epoch": 1.8400776867740822, "grad_norm": 1.2538738250732422, "learning_rate": 1.5834500333766212e-06, "loss": 1.019, "step": 288020 }, { "epoch": 1.840141573923821, "grad_norm": 0.8852131962776184, "learning_rate": 1.5821975120946076e-06, "loss": 0.7435, "step": 288030 }, { "epoch": 1.8402054610735596, "grad_norm": 1.0147486925125122, "learning_rate": 1.5809454784217525e-06, "loss": 0.8279, "step": 288040 }, { "epoch": 1.8402693482232984, "grad_norm": 1.0600953102111816, "learning_rate": 1.5796939323706628e-06, "loss": 0.9367, "step": 288050 }, { "epoch": 1.840333235373037, "grad_norm": 0.8614944219589233, "learning_rate": 1.5784428739539503e-06, "loss": 0.7678, "step": 288060 }, { "epoch": 1.8403971225227758, "grad_norm": 0.8316081762313843, "learning_rate": 1.5771923031841994e-06, "loss": 0.9097, "step": 288070 }, { "epoch": 1.8404610096725145, "grad_norm": 0.9914312362670898, "learning_rate": 1.575942220074017e-06, "loss": 0.7828, "step": 288080 }, { "epoch": 1.8405248968222532, "grad_norm": 0.7772271037101746, "learning_rate": 1.5746926246359929e-06, "loss": 0.82, "step": 288090 }, { "epoch": 1.8405887839719919, "grad_norm": 0.935269832611084, "learning_rate": 1.5734435168827055e-06, "loss": 0.8284, "step": 288100 }, { "epoch": 1.8406526711217306, "grad_norm": 1.687303900718689, "learning_rate": 1.5721948968267398e-06, "loss": 0.9816, "step": 288110 }, { "epoch": 1.8407165582714693, "grad_norm": 1.192875862121582, "learning_rate": 1.5709467644806631e-06, "loss": 1.1521, "step": 288120 }, { "epoch": 1.840780445421208, "grad_norm": 0.7837314009666443, "learning_rate": 1.5696991198570488e-06, "loss": 0.8343, "step": 288130 }, { "epoch": 1.8408443325709467, "grad_norm": 1.3365057706832886, "learning_rate": 1.5684519629684646e-06, "loss": 0.5837, "step": 288140 }, { "epoch": 1.8409082197206854, "grad_norm": 1.0137981176376343, "learning_rate": 1.5672052938274729e-06, "loss": 0.8087, "step": 288150 }, { "epoch": 1.8409721068704241, "grad_norm": 0.8681532740592957, "learning_rate": 1.565959112446619e-06, "loss": 0.9406, "step": 288160 }, { "epoch": 1.8410359940201628, "grad_norm": 0.8951544165611267, "learning_rate": 1.5647134188384593e-06, "loss": 1.009, "step": 288170 }, { "epoch": 1.8410998811699015, "grad_norm": 0.882915198802948, "learning_rate": 1.5634682130155343e-06, "loss": 0.7041, "step": 288180 }, { "epoch": 1.8411637683196402, "grad_norm": 0.8915712833404541, "learning_rate": 1.5622234949903947e-06, "loss": 0.7062, "step": 288190 }, { "epoch": 1.841227655469379, "grad_norm": 1.0253684520721436, "learning_rate": 1.5609792647755638e-06, "loss": 0.8977, "step": 288200 }, { "epoch": 1.8412915426191176, "grad_norm": 0.9191253185272217, "learning_rate": 1.5597355223835818e-06, "loss": 0.7529, "step": 288210 }, { "epoch": 1.8413554297688561, "grad_norm": 1.1533896923065186, "learning_rate": 1.5584922678269665e-06, "loss": 0.8378, "step": 288220 }, { "epoch": 1.841419316918595, "grad_norm": 1.0386509895324707, "learning_rate": 1.5572495011182464e-06, "loss": 1.2127, "step": 288230 }, { "epoch": 1.8414832040683335, "grad_norm": 0.6427866220474243, "learning_rate": 1.5560072222699284e-06, "loss": 0.8755, "step": 288240 }, { "epoch": 1.8415470912180725, "grad_norm": 1.0928500890731812, "learning_rate": 1.5547654312945303e-06, "loss": 0.7724, "step": 288250 }, { "epoch": 1.841610978367811, "grad_norm": 0.9827240705490112, "learning_rate": 1.5535241282045533e-06, "loss": 0.8836, "step": 288260 }, { "epoch": 1.8416748655175499, "grad_norm": 0.6995918154716492, "learning_rate": 1.5522833130125036e-06, "loss": 0.7469, "step": 288270 }, { "epoch": 1.8417387526672884, "grad_norm": 1.1925073862075806, "learning_rate": 1.5510429857308717e-06, "loss": 0.9589, "step": 288280 }, { "epoch": 1.8418026398170273, "grad_norm": 1.1228277683258057, "learning_rate": 1.5498031463721475e-06, "loss": 0.8682, "step": 288290 }, { "epoch": 1.8418665269667658, "grad_norm": 0.6336076259613037, "learning_rate": 1.5485637949488262e-06, "loss": 1.0068, "step": 288300 }, { "epoch": 1.8419304141165047, "grad_norm": 0.8404168486595154, "learning_rate": 1.5473249314733818e-06, "loss": 0.9751, "step": 288310 }, { "epoch": 1.8419943012662432, "grad_norm": 0.8288664221763611, "learning_rate": 1.5460865559582983e-06, "loss": 0.8593, "step": 288320 }, { "epoch": 1.8420581884159821, "grad_norm": 0.7925769090652466, "learning_rate": 1.5448486684160325e-06, "loss": 0.9489, "step": 288330 }, { "epoch": 1.8421220755657206, "grad_norm": 1.0064537525177002, "learning_rate": 1.5436112688590688e-06, "loss": 0.9611, "step": 288340 }, { "epoch": 1.8421859627154595, "grad_norm": 0.691359281539917, "learning_rate": 1.5423743572998527e-06, "loss": 0.722, "step": 288350 }, { "epoch": 1.842249849865198, "grad_norm": 1.405712366104126, "learning_rate": 1.5411379337508524e-06, "loss": 0.9765, "step": 288360 }, { "epoch": 1.842313737014937, "grad_norm": 0.6220401525497437, "learning_rate": 1.539901998224519e-06, "loss": 0.5838, "step": 288370 }, { "epoch": 1.8423776241646754, "grad_norm": 0.6910040378570557, "learning_rate": 1.538666550733292e-06, "loss": 0.8711, "step": 288380 }, { "epoch": 1.8424415113144144, "grad_norm": 0.8294619917869568, "learning_rate": 1.537431591289623e-06, "loss": 0.9425, "step": 288390 }, { "epoch": 1.8425053984641528, "grad_norm": 0.6665295958518982, "learning_rate": 1.5361971199059412e-06, "loss": 0.8017, "step": 288400 }, { "epoch": 1.8425692856138918, "grad_norm": 1.0412260293960571, "learning_rate": 1.5349631365946805e-06, "loss": 0.9082, "step": 288410 }, { "epoch": 1.8426331727636303, "grad_norm": 3.229555368423462, "learning_rate": 1.5337296413682644e-06, "loss": 0.7304, "step": 288420 }, { "epoch": 1.8426970599133692, "grad_norm": 2.063892364501953, "learning_rate": 1.5324966342391333e-06, "loss": 0.8002, "step": 288430 }, { "epoch": 1.8427609470631077, "grad_norm": 1.6300920248031616, "learning_rate": 1.5312641152196772e-06, "loss": 1.0348, "step": 288440 }, { "epoch": 1.8428248342128464, "grad_norm": 0.600158154964447, "learning_rate": 1.5300320843223304e-06, "loss": 0.8067, "step": 288450 }, { "epoch": 1.842888721362585, "grad_norm": 0.8928928375244141, "learning_rate": 1.5288005415594886e-06, "loss": 1.1524, "step": 288460 }, { "epoch": 1.8429526085123238, "grad_norm": 0.9556632041931152, "learning_rate": 1.5275694869435698e-06, "loss": 0.9155, "step": 288470 }, { "epoch": 1.8430164956620625, "grad_norm": 0.8034154176712036, "learning_rate": 1.5263389204869472e-06, "loss": 0.8602, "step": 288480 }, { "epoch": 1.8430803828118012, "grad_norm": 0.9617866277694702, "learning_rate": 1.5251088422020389e-06, "loss": 0.7674, "step": 288490 }, { "epoch": 1.84314426996154, "grad_norm": 0.8912802934646606, "learning_rate": 1.5238792521012124e-06, "loss": 0.8824, "step": 288500 }, { "epoch": 1.8432081571112786, "grad_norm": 0.8857015371322632, "learning_rate": 1.5226501501968636e-06, "loss": 0.8232, "step": 288510 }, { "epoch": 1.8432720442610173, "grad_norm": 0.6874731779098511, "learning_rate": 1.5214215365013661e-06, "loss": 0.7386, "step": 288520 }, { "epoch": 1.843335931410756, "grad_norm": 1.0015004873275757, "learning_rate": 1.5201934110270932e-06, "loss": 0.9648, "step": 288530 }, { "epoch": 1.8433998185604947, "grad_norm": 1.2330206632614136, "learning_rate": 1.5189657737864127e-06, "loss": 0.8617, "step": 288540 }, { "epoch": 1.8434637057102334, "grad_norm": 0.8535049557685852, "learning_rate": 1.5177386247916925e-06, "loss": 0.8291, "step": 288550 }, { "epoch": 1.8435275928599721, "grad_norm": 1.0177775621414185, "learning_rate": 1.516511964055284e-06, "loss": 0.857, "step": 288560 }, { "epoch": 1.8435914800097108, "grad_norm": 1.0535317659378052, "learning_rate": 1.5152857915895436e-06, "loss": 0.9683, "step": 288570 }, { "epoch": 1.8436553671594496, "grad_norm": 0.6651054620742798, "learning_rate": 1.5140601074068228e-06, "loss": 0.9026, "step": 288580 }, { "epoch": 1.8437192543091883, "grad_norm": 0.7523433566093445, "learning_rate": 1.5128349115194619e-06, "loss": 0.6263, "step": 288590 }, { "epoch": 1.843783141458927, "grad_norm": 0.7467684149742126, "learning_rate": 1.5116102039398005e-06, "loss": 0.8923, "step": 288600 }, { "epoch": 1.8438470286086657, "grad_norm": 1.0226775407791138, "learning_rate": 1.5103859846801738e-06, "loss": 0.999, "step": 288610 }, { "epoch": 1.8439109157584044, "grad_norm": 1.6206825971603394, "learning_rate": 1.5091622537529105e-06, "loss": 0.9215, "step": 288620 }, { "epoch": 1.843974802908143, "grad_norm": 1.0661346912384033, "learning_rate": 1.5079390111703285e-06, "loss": 0.6762, "step": 288630 }, { "epoch": 1.8440386900578818, "grad_norm": 0.7579408288002014, "learning_rate": 1.5067162569447623e-06, "loss": 0.7026, "step": 288640 }, { "epoch": 1.8441025772076205, "grad_norm": 0.6882103085517883, "learning_rate": 1.5054939910885079e-06, "loss": 0.8362, "step": 288650 }, { "epoch": 1.8441664643573592, "grad_norm": 0.9294477701187134, "learning_rate": 1.5042722136138887e-06, "loss": 0.8447, "step": 288660 }, { "epoch": 1.844230351507098, "grad_norm": 0.9065951108932495, "learning_rate": 1.5030509245331947e-06, "loss": 0.9052, "step": 288670 }, { "epoch": 1.8442942386568366, "grad_norm": 1.4120420217514038, "learning_rate": 1.501830123858744e-06, "loss": 0.8919, "step": 288680 }, { "epoch": 1.844358125806575, "grad_norm": 0.5979344844818115, "learning_rate": 1.50060981160281e-06, "loss": 0.905, "step": 288690 }, { "epoch": 1.844422012956314, "grad_norm": 1.209514856338501, "learning_rate": 1.4993899877776995e-06, "loss": 0.9565, "step": 288700 }, { "epoch": 1.8444859001060525, "grad_norm": 0.7128211259841919, "learning_rate": 1.4981706523956918e-06, "loss": 0.714, "step": 288710 }, { "epoch": 1.8445497872557914, "grad_norm": 0.9613263607025146, "learning_rate": 1.4969518054690657e-06, "loss": 0.8484, "step": 288720 }, { "epoch": 1.84461367440553, "grad_norm": 0.8404272794723511, "learning_rate": 1.4957334470100892e-06, "loss": 0.6542, "step": 288730 }, { "epoch": 1.8446775615552689, "grad_norm": 1.007049560546875, "learning_rate": 1.494515577031047e-06, "loss": 0.8816, "step": 288740 }, { "epoch": 1.8447414487050073, "grad_norm": 1.3427143096923828, "learning_rate": 1.4932981955441906e-06, "loss": 1.0498, "step": 288750 }, { "epoch": 1.8448053358547463, "grad_norm": 1.0202215909957886, "learning_rate": 1.492081302561793e-06, "loss": 0.9685, "step": 288760 }, { "epoch": 1.8448692230044847, "grad_norm": 1.1484458446502686, "learning_rate": 1.490864898096095e-06, "loss": 1.004, "step": 288770 }, { "epoch": 1.8449331101542237, "grad_norm": 0.7522885203361511, "learning_rate": 1.4896489821593584e-06, "loss": 0.7973, "step": 288780 }, { "epoch": 1.8449969973039622, "grad_norm": 1.5658546686172485, "learning_rate": 1.4884335547638185e-06, "loss": 1.1108, "step": 288790 }, { "epoch": 1.845060884453701, "grad_norm": 1.085937738418579, "learning_rate": 1.487218615921726e-06, "loss": 0.9513, "step": 288800 }, { "epoch": 1.8451247716034396, "grad_norm": 1.7706001996994019, "learning_rate": 1.4860041656453106e-06, "loss": 0.9771, "step": 288810 }, { "epoch": 1.8451886587531785, "grad_norm": 0.6471644043922424, "learning_rate": 1.4847902039467953e-06, "loss": 0.9614, "step": 288820 }, { "epoch": 1.845252545902917, "grad_norm": 0.7767409682273865, "learning_rate": 1.4835767308384264e-06, "loss": 0.8322, "step": 288830 }, { "epoch": 1.845316433052656, "grad_norm": 1.066179633140564, "learning_rate": 1.4823637463324047e-06, "loss": 0.7936, "step": 288840 }, { "epoch": 1.8453803202023944, "grad_norm": 0.8228965997695923, "learning_rate": 1.4811512504409597e-06, "loss": 0.8987, "step": 288850 }, { "epoch": 1.8454442073521333, "grad_norm": 1.1014748811721802, "learning_rate": 1.4799392431762927e-06, "loss": 0.7715, "step": 288860 }, { "epoch": 1.8455080945018718, "grad_norm": 0.8230012059211731, "learning_rate": 1.478727724550616e-06, "loss": 0.7198, "step": 288870 }, { "epoch": 1.8455719816516107, "grad_norm": 0.8320961594581604, "learning_rate": 1.4775166945761309e-06, "loss": 0.9715, "step": 288880 }, { "epoch": 1.8456358688013492, "grad_norm": 0.9189849495887756, "learning_rate": 1.476306153265028e-06, "loss": 1.0131, "step": 288890 }, { "epoch": 1.8456997559510882, "grad_norm": 1.1729072332382202, "learning_rate": 1.4750961006294972e-06, "loss": 0.9605, "step": 288900 }, { "epoch": 1.8457636431008266, "grad_norm": 0.8219770193099976, "learning_rate": 1.4738865366817345e-06, "loss": 0.7105, "step": 288910 }, { "epoch": 1.8458275302505656, "grad_norm": 1.2123422622680664, "learning_rate": 1.4726774614339079e-06, "loss": 1.0589, "step": 288920 }, { "epoch": 1.845891417400304, "grad_norm": 0.8864356875419617, "learning_rate": 1.4714688748982075e-06, "loss": 1.0387, "step": 288930 }, { "epoch": 1.8459553045500428, "grad_norm": 1.4217449426651, "learning_rate": 1.470260777086796e-06, "loss": 0.9907, "step": 288940 }, { "epoch": 1.8460191916997815, "grad_norm": 0.843344509601593, "learning_rate": 1.4690531680118413e-06, "loss": 1.0661, "step": 288950 }, { "epoch": 1.8460830788495202, "grad_norm": 0.7461937069892883, "learning_rate": 1.4678460476855116e-06, "loss": 0.9238, "step": 288960 }, { "epoch": 1.8461469659992589, "grad_norm": 1.053181529045105, "learning_rate": 1.4666394161199527e-06, "loss": 0.8147, "step": 288970 }, { "epoch": 1.8462108531489976, "grad_norm": 1.020796537399292, "learning_rate": 1.4654332733273269e-06, "loss": 0.8422, "step": 288980 }, { "epoch": 1.8462747402987363, "grad_norm": 1.2979925870895386, "learning_rate": 1.4642276193197747e-06, "loss": 0.9054, "step": 288990 }, { "epoch": 1.846338627448475, "grad_norm": 0.9862684011459351, "learning_rate": 1.4630224541094417e-06, "loss": 0.8083, "step": 289000 }, { "epoch": 1.8464025145982137, "grad_norm": 0.7166469097137451, "learning_rate": 1.4618177777084574e-06, "loss": 0.6495, "step": 289010 }, { "epoch": 1.8464664017479524, "grad_norm": 0.8175358772277832, "learning_rate": 1.4606135901289618e-06, "loss": 0.881, "step": 289020 }, { "epoch": 1.846530288897691, "grad_norm": 1.0863618850708008, "learning_rate": 1.4594098913830788e-06, "loss": 0.7885, "step": 289030 }, { "epoch": 1.8465941760474298, "grad_norm": 0.9460238814353943, "learning_rate": 1.4582066814829376e-06, "loss": 0.9725, "step": 289040 }, { "epoch": 1.8466580631971685, "grad_norm": 0.8985845446586609, "learning_rate": 1.457003960440645e-06, "loss": 0.8003, "step": 289050 }, { "epoch": 1.8467219503469072, "grad_norm": 0.9575114250183105, "learning_rate": 1.4558017282683189e-06, "loss": 0.8808, "step": 289060 }, { "epoch": 1.846785837496646, "grad_norm": 0.9031855463981628, "learning_rate": 1.4545999849780668e-06, "loss": 1.0921, "step": 289070 }, { "epoch": 1.8468497246463846, "grad_norm": 0.9414730072021484, "learning_rate": 1.4533987305819953e-06, "loss": 0.8704, "step": 289080 }, { "epoch": 1.8469136117961233, "grad_norm": 1.9726576805114746, "learning_rate": 1.4521979650921891e-06, "loss": 0.7585, "step": 289090 }, { "epoch": 1.846977498945862, "grad_norm": 0.775337815284729, "learning_rate": 1.4509976885207555e-06, "loss": 0.9518, "step": 289100 }, { "epoch": 1.8470413860956008, "grad_norm": 1.590173602104187, "learning_rate": 1.4497979008797679e-06, "loss": 0.856, "step": 289110 }, { "epoch": 1.8471052732453395, "grad_norm": 0.996599555015564, "learning_rate": 1.448598602181328e-06, "loss": 0.828, "step": 289120 }, { "epoch": 1.8471691603950782, "grad_norm": 0.8098533749580383, "learning_rate": 1.4473997924374927e-06, "loss": 0.8946, "step": 289130 }, { "epoch": 1.8472330475448169, "grad_norm": 0.5876967906951904, "learning_rate": 1.4462014716603466e-06, "loss": 0.7622, "step": 289140 }, { "epoch": 1.8472969346945556, "grad_norm": 1.886121392250061, "learning_rate": 1.4450036398619637e-06, "loss": 0.7134, "step": 289150 }, { "epoch": 1.8473608218442943, "grad_norm": 1.2650415897369385, "learning_rate": 1.4438062970543953e-06, "loss": 0.8986, "step": 289160 }, { "epoch": 1.847424708994033, "grad_norm": 0.9794378280639648, "learning_rate": 1.4426094432497096e-06, "loss": 1.0858, "step": 289170 }, { "epoch": 1.8474885961437715, "grad_norm": 1.2420276403427124, "learning_rate": 1.4414130784599466e-06, "loss": 0.9369, "step": 289180 }, { "epoch": 1.8475524832935104, "grad_norm": 1.1234242916107178, "learning_rate": 1.4402172026971694e-06, "loss": 0.6944, "step": 289190 }, { "epoch": 1.847616370443249, "grad_norm": 1.2630137205123901, "learning_rate": 1.4390218159734125e-06, "loss": 0.9539, "step": 289200 }, { "epoch": 1.8476802575929878, "grad_norm": 1.18568754196167, "learning_rate": 1.437826918300722e-06, "loss": 0.8495, "step": 289210 }, { "epoch": 1.8477441447427263, "grad_norm": 1.0551838874816895, "learning_rate": 1.4366325096911215e-06, "loss": 0.8087, "step": 289220 }, { "epoch": 1.8478080318924652, "grad_norm": 0.7045217156410217, "learning_rate": 1.435438590156646e-06, "loss": 0.9491, "step": 289230 }, { "epoch": 1.8478719190422037, "grad_norm": 0.5378367304801941, "learning_rate": 1.4342451597093187e-06, "loss": 0.6279, "step": 289240 }, { "epoch": 1.8479358061919426, "grad_norm": 1.6226013898849487, "learning_rate": 1.4330522183611583e-06, "loss": 0.7129, "step": 289250 }, { "epoch": 1.8479996933416811, "grad_norm": 0.9038100242614746, "learning_rate": 1.4318597661241773e-06, "loss": 0.7994, "step": 289260 }, { "epoch": 1.84806358049142, "grad_norm": 0.9648569822311401, "learning_rate": 1.4306678030103881e-06, "loss": 0.9585, "step": 289270 }, { "epoch": 1.8481274676411585, "grad_norm": 0.7719781398773193, "learning_rate": 1.4294763290317926e-06, "loss": 0.8269, "step": 289280 }, { "epoch": 1.8481913547908975, "grad_norm": 0.5771094560623169, "learning_rate": 1.4282853442003918e-06, "loss": 0.6463, "step": 289290 }, { "epoch": 1.848255241940636, "grad_norm": 0.7412580847740173, "learning_rate": 1.427094848528171e-06, "loss": 0.9148, "step": 289300 }, { "epoch": 1.8483191290903749, "grad_norm": 1.0679619312286377, "learning_rate": 1.4259048420271315e-06, "loss": 0.7854, "step": 289310 }, { "epoch": 1.8483830162401134, "grad_norm": 0.9758639931678772, "learning_rate": 1.4247153247092527e-06, "loss": 0.9488, "step": 289320 }, { "epoch": 1.8484469033898523, "grad_norm": 1.6436443328857422, "learning_rate": 1.4235262965865137e-06, "loss": 1.0376, "step": 289330 }, { "epoch": 1.8485107905395908, "grad_norm": 0.9758222103118896, "learning_rate": 1.4223377576708884e-06, "loss": 0.6294, "step": 289340 }, { "epoch": 1.8485746776893297, "grad_norm": 1.415855050086975, "learning_rate": 1.4211497079743452e-06, "loss": 1.0529, "step": 289350 }, { "epoch": 1.8486385648390682, "grad_norm": 1.0808721780776978, "learning_rate": 1.4199621475088576e-06, "loss": 0.918, "step": 289360 }, { "epoch": 1.8487024519888071, "grad_norm": 1.55045747756958, "learning_rate": 1.4187750762863773e-06, "loss": 0.8256, "step": 289370 }, { "epoch": 1.8487663391385456, "grad_norm": 1.0587116479873657, "learning_rate": 1.417588494318861e-06, "loss": 0.8804, "step": 289380 }, { "epoch": 1.8488302262882845, "grad_norm": 0.9089915156364441, "learning_rate": 1.4164024016182553e-06, "loss": 0.9252, "step": 289390 }, { "epoch": 1.848894113438023, "grad_norm": 0.6744896173477173, "learning_rate": 1.4152167981965114e-06, "loss": 0.7266, "step": 289400 }, { "epoch": 1.848958000587762, "grad_norm": 1.4949560165405273, "learning_rate": 1.4140316840655587e-06, "loss": 0.6444, "step": 289410 }, { "epoch": 1.8490218877375004, "grad_norm": 0.991797149181366, "learning_rate": 1.4128470592373488e-06, "loss": 0.7479, "step": 289420 }, { "epoch": 1.8490857748872391, "grad_norm": 0.8915597200393677, "learning_rate": 1.4116629237237944e-06, "loss": 1.0406, "step": 289430 }, { "epoch": 1.8491496620369778, "grad_norm": 0.934266209602356, "learning_rate": 1.410479277536836e-06, "loss": 1.1014, "step": 289440 }, { "epoch": 1.8492135491867165, "grad_norm": 0.8267480731010437, "learning_rate": 1.409296120688386e-06, "loss": 0.7946, "step": 289450 }, { "epoch": 1.8492774363364552, "grad_norm": 1.9359806776046753, "learning_rate": 1.408231697919049e-06, "loss": 1.0949, "step": 289460 }, { "epoch": 1.849341323486194, "grad_norm": 1.0157192945480347, "learning_rate": 1.4070494708465886e-06, "loss": 0.7052, "step": 289470 }, { "epoch": 1.8494052106359327, "grad_norm": 2.140254020690918, "learning_rate": 1.4058677331471814e-06, "loss": 0.9596, "step": 289480 }, { "epoch": 1.8494690977856714, "grad_norm": 1.4555343389511108, "learning_rate": 1.4046864848327236e-06, "loss": 0.774, "step": 289490 }, { "epoch": 1.84953298493541, "grad_norm": 1.036714792251587, "learning_rate": 1.4035057259151108e-06, "loss": 0.7356, "step": 289500 }, { "epoch": 1.8495968720851488, "grad_norm": 0.5356025695800781, "learning_rate": 1.4023254564062505e-06, "loss": 0.6487, "step": 289510 }, { "epoch": 1.8496607592348875, "grad_norm": 1.2562586069107056, "learning_rate": 1.4011456763180053e-06, "loss": 0.9194, "step": 289520 }, { "epoch": 1.8497246463846262, "grad_norm": 1.0846186876296997, "learning_rate": 1.3999663856622714e-06, "loss": 0.7698, "step": 289530 }, { "epoch": 1.849788533534365, "grad_norm": 1.2222474813461304, "learning_rate": 1.3987875844509113e-06, "loss": 0.6781, "step": 289540 }, { "epoch": 1.8498524206841036, "grad_norm": 0.9994776844978333, "learning_rate": 1.3976092726958157e-06, "loss": 0.8111, "step": 289550 }, { "epoch": 1.8499163078338423, "grad_norm": 1.1925878524780273, "learning_rate": 1.396431450408836e-06, "loss": 0.8134, "step": 289560 }, { "epoch": 1.849980194983581, "grad_norm": 0.8951390385627747, "learning_rate": 1.3952541176018407e-06, "loss": 0.8301, "step": 289570 }, { "epoch": 1.8500440821333197, "grad_norm": 1.0971461534500122, "learning_rate": 1.3940772742866926e-06, "loss": 1.1852, "step": 289580 }, { "epoch": 1.8501079692830584, "grad_norm": 2.3525285720825195, "learning_rate": 1.3929009204752263e-06, "loss": 0.95, "step": 289590 }, { "epoch": 1.8501718564327971, "grad_norm": 1.19795823097229, "learning_rate": 1.391725056179305e-06, "loss": 1.1132, "step": 289600 }, { "epoch": 1.8502357435825358, "grad_norm": 1.1813701391220093, "learning_rate": 1.3905496814107633e-06, "loss": 0.8383, "step": 289610 }, { "epoch": 1.8502996307322745, "grad_norm": 0.970208466053009, "learning_rate": 1.389374796181442e-06, "loss": 0.9159, "step": 289620 }, { "epoch": 1.8503635178820133, "grad_norm": 1.0656824111938477, "learning_rate": 1.3882004005031645e-06, "loss": 1.0625, "step": 289630 }, { "epoch": 1.850427405031752, "grad_norm": 0.7011227011680603, "learning_rate": 1.387026494387772e-06, "loss": 0.8958, "step": 289640 }, { "epoch": 1.8504912921814907, "grad_norm": 0.8881285786628723, "learning_rate": 1.3858530778470714e-06, "loss": 0.9315, "step": 289650 }, { "epoch": 1.8505551793312294, "grad_norm": 0.6982859969139099, "learning_rate": 1.384680150892892e-06, "loss": 0.8761, "step": 289660 }, { "epoch": 1.8506190664809679, "grad_norm": 1.166447639465332, "learning_rate": 1.3835077135370355e-06, "loss": 1.014, "step": 289670 }, { "epoch": 1.8506829536307068, "grad_norm": 0.8452163338661194, "learning_rate": 1.382335765791326e-06, "loss": 0.9305, "step": 289680 }, { "epoch": 1.8507468407804453, "grad_norm": 3.4163830280303955, "learning_rate": 1.3811643076675484e-06, "loss": 0.8241, "step": 289690 }, { "epoch": 1.8508107279301842, "grad_norm": 1.1839015483856201, "learning_rate": 1.37999333917751e-06, "loss": 0.8769, "step": 289700 }, { "epoch": 1.8508746150799227, "grad_norm": 0.5793390274047852, "learning_rate": 1.3788228603329955e-06, "loss": 0.8559, "step": 289710 }, { "epoch": 1.8509385022296616, "grad_norm": 1.1785459518432617, "learning_rate": 1.377652871145807e-06, "loss": 0.9978, "step": 289720 }, { "epoch": 1.8510023893794, "grad_norm": 0.8161349892616272, "learning_rate": 1.3764833716277126e-06, "loss": 0.7114, "step": 289730 }, { "epoch": 1.851066276529139, "grad_norm": 0.8620417714118958, "learning_rate": 1.3753143617904974e-06, "loss": 0.7987, "step": 289740 }, { "epoch": 1.8511301636788775, "grad_norm": 0.9723739624023438, "learning_rate": 1.374145841645935e-06, "loss": 0.9672, "step": 289750 }, { "epoch": 1.8511940508286164, "grad_norm": 0.8605661988258362, "learning_rate": 1.3729778112057889e-06, "loss": 1.0071, "step": 289760 }, { "epoch": 1.851257937978355, "grad_norm": 0.6105464696884155, "learning_rate": 1.3718102704818215e-06, "loss": 1.0402, "step": 289770 }, { "epoch": 1.8513218251280938, "grad_norm": 0.5702158212661743, "learning_rate": 1.3706432194857954e-06, "loss": 0.7694, "step": 289780 }, { "epoch": 1.8513857122778323, "grad_norm": 0.7528900504112244, "learning_rate": 1.3694766582294682e-06, "loss": 0.9634, "step": 289790 }, { "epoch": 1.8514495994275713, "grad_norm": 0.6970102787017822, "learning_rate": 1.3683105867245748e-06, "loss": 0.8428, "step": 289800 }, { "epoch": 1.8515134865773097, "grad_norm": 0.9566949605941772, "learning_rate": 1.3671450049828782e-06, "loss": 0.9473, "step": 289810 }, { "epoch": 1.8515773737270487, "grad_norm": 0.6424906253814697, "learning_rate": 1.365979913016091e-06, "loss": 0.8256, "step": 289820 }, { "epoch": 1.8516412608767872, "grad_norm": 1.8221441507339478, "learning_rate": 1.3648153108359708e-06, "loss": 0.7576, "step": 289830 }, { "epoch": 1.851705148026526, "grad_norm": 1.3779975175857544, "learning_rate": 1.3636511984542299e-06, "loss": 0.8611, "step": 289840 }, { "epoch": 1.8517690351762646, "grad_norm": 0.9683537483215332, "learning_rate": 1.3624875758825984e-06, "loss": 0.8873, "step": 289850 }, { "epoch": 1.8518329223260035, "grad_norm": 0.961801290512085, "learning_rate": 1.3613244431327943e-06, "loss": 0.7379, "step": 289860 }, { "epoch": 1.851896809475742, "grad_norm": 1.2155733108520508, "learning_rate": 1.3601618002165362e-06, "loss": 0.9428, "step": 289870 }, { "epoch": 1.851960696625481, "grad_norm": 0.9074127674102783, "learning_rate": 1.3589996471455202e-06, "loss": 0.868, "step": 289880 }, { "epoch": 1.8520245837752194, "grad_norm": 1.2908611297607422, "learning_rate": 1.3578379839314647e-06, "loss": 0.669, "step": 289890 }, { "epoch": 1.8520884709249583, "grad_norm": 2.684487819671631, "learning_rate": 1.3566768105860606e-06, "loss": 0.8911, "step": 289900 }, { "epoch": 1.8521523580746968, "grad_norm": 1.0935617685317993, "learning_rate": 1.3555161271210037e-06, "loss": 0.7533, "step": 289910 }, { "epoch": 1.8522162452244355, "grad_norm": 0.655015766620636, "learning_rate": 1.3543559335479793e-06, "loss": 0.8738, "step": 289920 }, { "epoch": 1.8522801323741742, "grad_norm": 1.0216906070709229, "learning_rate": 1.3531962298786838e-06, "loss": 0.9277, "step": 289930 }, { "epoch": 1.852344019523913, "grad_norm": 0.6868817210197449, "learning_rate": 1.3520370161247798e-06, "loss": 0.7341, "step": 289940 }, { "epoch": 1.8524079066736516, "grad_norm": 0.8469321131706238, "learning_rate": 1.3508782922979524e-06, "loss": 0.9673, "step": 289950 }, { "epoch": 1.8524717938233903, "grad_norm": 0.8194224238395691, "learning_rate": 1.3497200584098645e-06, "loss": 0.7645, "step": 289960 }, { "epoch": 1.852535680973129, "grad_norm": 0.7928370237350464, "learning_rate": 1.3485623144721904e-06, "loss": 1.3606, "step": 289970 }, { "epoch": 1.8525995681228677, "grad_norm": 1.1735436916351318, "learning_rate": 1.347405060496576e-06, "loss": 0.9206, "step": 289980 }, { "epoch": 1.8526634552726065, "grad_norm": 0.6793167591094971, "learning_rate": 1.3462482964946844e-06, "loss": 0.6639, "step": 289990 }, { "epoch": 1.8527273424223452, "grad_norm": 0.9222388863563538, "learning_rate": 1.3450920224781727e-06, "loss": 0.7729, "step": 290000 }, { "epoch": 1.8527912295720839, "grad_norm": 1.0564346313476562, "learning_rate": 1.3439362384586706e-06, "loss": 1.0737, "step": 290010 }, { "epoch": 1.8528551167218226, "grad_norm": 1.6825110912322998, "learning_rate": 1.3427809444478246e-06, "loss": 1.3275, "step": 290020 }, { "epoch": 1.8529190038715613, "grad_norm": 0.9517335295677185, "learning_rate": 1.3416261404572694e-06, "loss": 1.2495, "step": 290030 }, { "epoch": 1.8529828910213, "grad_norm": 0.9224988222122192, "learning_rate": 1.340471826498635e-06, "loss": 1.0336, "step": 290040 }, { "epoch": 1.8530467781710387, "grad_norm": 0.8164480328559875, "learning_rate": 1.3393180025835395e-06, "loss": 0.7448, "step": 290050 }, { "epoch": 1.8531106653207774, "grad_norm": 0.8956406116485596, "learning_rate": 1.3381646687236182e-06, "loss": 0.7648, "step": 290060 }, { "epoch": 1.853174552470516, "grad_norm": 1.2125521898269653, "learning_rate": 1.337011824930473e-06, "loss": 0.81, "step": 290070 }, { "epoch": 1.8532384396202548, "grad_norm": 1.1064581871032715, "learning_rate": 1.3358594712157169e-06, "loss": 0.8545, "step": 290080 }, { "epoch": 1.8533023267699935, "grad_norm": 0.8873088955879211, "learning_rate": 1.334707607590957e-06, "loss": 1.0518, "step": 290090 }, { "epoch": 1.8533662139197322, "grad_norm": 2.381409168243408, "learning_rate": 1.3335562340677898e-06, "loss": 1.0322, "step": 290100 }, { "epoch": 1.853430101069471, "grad_norm": 0.8438361883163452, "learning_rate": 1.3324053506578226e-06, "loss": 0.7972, "step": 290110 }, { "epoch": 1.8534939882192096, "grad_norm": 0.9788957238197327, "learning_rate": 1.3312549573726295e-06, "loss": 0.8298, "step": 290120 }, { "epoch": 1.8535578753689483, "grad_norm": 0.5684459209442139, "learning_rate": 1.330105054223807e-06, "loss": 0.8578, "step": 290130 }, { "epoch": 1.853621762518687, "grad_norm": 1.5629345178604126, "learning_rate": 1.3289556412229287e-06, "loss": 0.9286, "step": 290140 }, { "epoch": 1.8536856496684258, "grad_norm": 0.9215679168701172, "learning_rate": 1.3278067183815801e-06, "loss": 0.997, "step": 290150 }, { "epoch": 1.8537495368181642, "grad_norm": 0.9726619124412537, "learning_rate": 1.3266582857113185e-06, "loss": 0.8652, "step": 290160 }, { "epoch": 1.8538134239679032, "grad_norm": 0.964089572429657, "learning_rate": 1.3255103432237181e-06, "loss": 0.7237, "step": 290170 }, { "epoch": 1.8538773111176416, "grad_norm": 0.9652983546257019, "learning_rate": 1.3243628909303363e-06, "loss": 0.837, "step": 290180 }, { "epoch": 1.8539411982673806, "grad_norm": 0.981907308101654, "learning_rate": 1.323215928842736e-06, "loss": 0.9947, "step": 290190 }, { "epoch": 1.854005085417119, "grad_norm": 0.6423254609107971, "learning_rate": 1.322069456972458e-06, "loss": 0.8177, "step": 290200 }, { "epoch": 1.854068972566858, "grad_norm": 1.2880287170410156, "learning_rate": 1.3209234753310595e-06, "loss": 0.6577, "step": 290210 }, { "epoch": 1.8541328597165965, "grad_norm": 1.2770487070083618, "learning_rate": 1.319777983930065e-06, "loss": 0.8499, "step": 290220 }, { "epoch": 1.8541967468663354, "grad_norm": 1.0457178354263306, "learning_rate": 1.3186329827810317e-06, "loss": 0.9701, "step": 290230 }, { "epoch": 1.8542606340160739, "grad_norm": 1.2421879768371582, "learning_rate": 1.3174884718954727e-06, "loss": 0.7487, "step": 290240 }, { "epoch": 1.8543245211658128, "grad_norm": 0.7995020151138306, "learning_rate": 1.3163444512849232e-06, "loss": 1.0551, "step": 290250 }, { "epoch": 1.8543884083155513, "grad_norm": 0.7688214182853699, "learning_rate": 1.3152009209608963e-06, "loss": 0.8622, "step": 290260 }, { "epoch": 1.8544522954652902, "grad_norm": 0.9706707000732422, "learning_rate": 1.3140578809349212e-06, "loss": 1.0842, "step": 290270 }, { "epoch": 1.8545161826150287, "grad_norm": 1.2777706384658813, "learning_rate": 1.3129153312185006e-06, "loss": 0.8934, "step": 290280 }, { "epoch": 1.8545800697647676, "grad_norm": 1.1299149990081787, "learning_rate": 1.3117732718231468e-06, "loss": 0.9253, "step": 290290 }, { "epoch": 1.8546439569145061, "grad_norm": 0.8032364845275879, "learning_rate": 1.310631702760351e-06, "loss": 1.0305, "step": 290300 }, { "epoch": 1.854707844064245, "grad_norm": 1.5992950201034546, "learning_rate": 1.309490624041615e-06, "loss": 0.7963, "step": 290310 }, { "epoch": 1.8547717312139835, "grad_norm": 0.9917843341827393, "learning_rate": 1.3083500356784405e-06, "loss": 0.7164, "step": 290320 }, { "epoch": 1.8548356183637225, "grad_norm": 0.9065639972686768, "learning_rate": 1.3072099376822966e-06, "loss": 0.9515, "step": 290330 }, { "epoch": 1.854899505513461, "grad_norm": 0.980902373790741, "learning_rate": 1.3060703300646848e-06, "loss": 0.7473, "step": 290340 }, { "epoch": 1.8549633926631999, "grad_norm": 1.5900095701217651, "learning_rate": 1.3049312128370629e-06, "loss": 0.8436, "step": 290350 }, { "epoch": 1.8550272798129384, "grad_norm": 1.0598942041397095, "learning_rate": 1.3037925860109101e-06, "loss": 0.9816, "step": 290360 }, { "epoch": 1.8550911669626773, "grad_norm": 1.210523247718811, "learning_rate": 1.3026544495976955e-06, "loss": 0.7237, "step": 290370 }, { "epoch": 1.8551550541124158, "grad_norm": 1.4614925384521484, "learning_rate": 1.301516803608882e-06, "loss": 1.0234, "step": 290380 }, { "epoch": 1.8552189412621545, "grad_norm": 0.6827429533004761, "learning_rate": 1.3003796480559217e-06, "loss": 0.7856, "step": 290390 }, { "epoch": 1.8552828284118932, "grad_norm": 0.9089744687080383, "learning_rate": 1.2992429829502772e-06, "loss": 0.9732, "step": 290400 }, { "epoch": 1.8553467155616319, "grad_norm": 1.2129607200622559, "learning_rate": 1.2981068083033787e-06, "loss": 0.7271, "step": 290410 }, { "epoch": 1.8554106027113706, "grad_norm": 1.3265371322631836, "learning_rate": 1.2969711241266836e-06, "loss": 0.9427, "step": 290420 }, { "epoch": 1.8554744898611093, "grad_norm": 1.1904284954071045, "learning_rate": 1.2958359304316159e-06, "loss": 0.9471, "step": 290430 }, { "epoch": 1.855538377010848, "grad_norm": 1.0264545679092407, "learning_rate": 1.2947012272296221e-06, "loss": 1.1399, "step": 290440 }, { "epoch": 1.8556022641605867, "grad_norm": 1.146247386932373, "learning_rate": 1.2935670145321211e-06, "loss": 0.952, "step": 290450 }, { "epoch": 1.8556661513103254, "grad_norm": 1.2765698432922363, "learning_rate": 1.2924332923505367e-06, "loss": 0.6927, "step": 290460 }, { "epoch": 1.8557300384600641, "grad_norm": 2.164412498474121, "learning_rate": 1.2913000606962878e-06, "loss": 1.0325, "step": 290470 }, { "epoch": 1.8557939256098028, "grad_norm": 1.5119715929031372, "learning_rate": 1.2901673195807873e-06, "loss": 0.8881, "step": 290480 }, { "epoch": 1.8558578127595415, "grad_norm": 1.2955068349838257, "learning_rate": 1.289035069015443e-06, "loss": 0.8746, "step": 290490 }, { "epoch": 1.8559216999092802, "grad_norm": 1.5162767171859741, "learning_rate": 1.2879033090116565e-06, "loss": 0.6947, "step": 290500 }, { "epoch": 1.855985587059019, "grad_norm": 0.8264356255531311, "learning_rate": 1.2867720395808247e-06, "loss": 0.8881, "step": 290510 }, { "epoch": 1.8560494742087577, "grad_norm": 0.6280115842819214, "learning_rate": 1.2856412607343382e-06, "loss": 0.9813, "step": 290520 }, { "epoch": 1.8561133613584964, "grad_norm": 1.1387577056884766, "learning_rate": 1.2845109724835935e-06, "loss": 0.9298, "step": 290530 }, { "epoch": 1.856177248508235, "grad_norm": 0.8114959597587585, "learning_rate": 1.2833811748399593e-06, "loss": 0.766, "step": 290540 }, { "epoch": 1.8562411356579738, "grad_norm": 1.303884506225586, "learning_rate": 1.2822518678148321e-06, "loss": 0.8965, "step": 290550 }, { "epoch": 1.8563050228077125, "grad_norm": 0.871311604976654, "learning_rate": 1.2811230514195693e-06, "loss": 0.6701, "step": 290560 }, { "epoch": 1.8563689099574512, "grad_norm": 1.7185081243515015, "learning_rate": 1.279994725665551e-06, "loss": 0.8616, "step": 290570 }, { "epoch": 1.85643279710719, "grad_norm": 1.096208930015564, "learning_rate": 1.2788668905641287e-06, "loss": 0.7098, "step": 290580 }, { "epoch": 1.8564966842569286, "grad_norm": 0.938231348991394, "learning_rate": 1.2777395461266716e-06, "loss": 1.1072, "step": 290590 }, { "epoch": 1.8565605714066673, "grad_norm": 1.491259217262268, "learning_rate": 1.2766126923645205e-06, "loss": 0.6843, "step": 290600 }, { "epoch": 1.856624458556406, "grad_norm": 1.6792606115341187, "learning_rate": 1.2754863292890385e-06, "loss": 0.7934, "step": 290610 }, { "epoch": 1.8566883457061447, "grad_norm": 1.2424652576446533, "learning_rate": 1.2743604569115607e-06, "loss": 1.0605, "step": 290620 }, { "epoch": 1.8567522328558834, "grad_norm": 1.131174921989441, "learning_rate": 1.273235075243423e-06, "loss": 0.8315, "step": 290630 }, { "epoch": 1.8568161200056221, "grad_norm": 1.2208259105682373, "learning_rate": 1.272110184295966e-06, "loss": 0.8992, "step": 290640 }, { "epoch": 1.8568800071553606, "grad_norm": 1.2590363025665283, "learning_rate": 1.270985784080514e-06, "loss": 1.0244, "step": 290650 }, { "epoch": 1.8569438943050995, "grad_norm": 0.8811933398246765, "learning_rate": 1.2698618746083912e-06, "loss": 0.8166, "step": 290660 }, { "epoch": 1.857007781454838, "grad_norm": 0.8141939640045166, "learning_rate": 1.2687384558909165e-06, "loss": 0.9293, "step": 290670 }, { "epoch": 1.857071668604577, "grad_norm": 1.0431616306304932, "learning_rate": 1.2676155279394087e-06, "loss": 0.918, "step": 290680 }, { "epoch": 1.8571355557543154, "grad_norm": 0.7412940263748169, "learning_rate": 1.2664930907651695e-06, "loss": 0.8712, "step": 290690 }, { "epoch": 1.8571994429040544, "grad_norm": 0.980758011341095, "learning_rate": 1.2653711443795069e-06, "loss": 0.8441, "step": 290700 }, { "epoch": 1.8572633300537928, "grad_norm": 0.8978270888328552, "learning_rate": 1.2642496887937117e-06, "loss": 0.761, "step": 290710 }, { "epoch": 1.8573272172035318, "grad_norm": 3.439507246017456, "learning_rate": 1.2631287240190915e-06, "loss": 0.9548, "step": 290720 }, { "epoch": 1.8573911043532703, "grad_norm": 0.7687893509864807, "learning_rate": 1.2620082500669205e-06, "loss": 0.7786, "step": 290730 }, { "epoch": 1.8574549915030092, "grad_norm": 0.965783417224884, "learning_rate": 1.2608882669485012e-06, "loss": 0.7568, "step": 290740 }, { "epoch": 1.8575188786527477, "grad_norm": 1.017303705215454, "learning_rate": 1.2597687746750963e-06, "loss": 1.0047, "step": 290750 }, { "epoch": 1.8575827658024866, "grad_norm": 0.7670354247093201, "learning_rate": 1.2586497732579916e-06, "loss": 0.7342, "step": 290760 }, { "epoch": 1.857646652952225, "grad_norm": 0.6807330250740051, "learning_rate": 1.257531262708439e-06, "loss": 0.948, "step": 290770 }, { "epoch": 1.857710540101964, "grad_norm": 1.206899881362915, "learning_rate": 1.2564132430377296e-06, "loss": 0.6925, "step": 290780 }, { "epoch": 1.8577744272517025, "grad_norm": 1.0641649961471558, "learning_rate": 1.2552957142570986e-06, "loss": 0.9098, "step": 290790 }, { "epoch": 1.8578383144014414, "grad_norm": 0.9408820271492004, "learning_rate": 1.2541786763778152e-06, "loss": 0.7734, "step": 290800 }, { "epoch": 1.85790220155118, "grad_norm": 1.2925841808319092, "learning_rate": 1.2530621294111145e-06, "loss": 0.8562, "step": 290810 }, { "epoch": 1.8579660887009188, "grad_norm": 1.395602822303772, "learning_rate": 1.2519460733682598e-06, "loss": 0.7759, "step": 290820 }, { "epoch": 1.8580299758506573, "grad_norm": 0.9474008083343506, "learning_rate": 1.2508305082604754e-06, "loss": 0.8571, "step": 290830 }, { "epoch": 1.8580938630003963, "grad_norm": 0.8584325313568115, "learning_rate": 1.2497154340990024e-06, "loss": 0.7673, "step": 290840 }, { "epoch": 1.8581577501501347, "grad_norm": 1.0238312482833862, "learning_rate": 1.2486008508950763e-06, "loss": 0.7351, "step": 290850 }, { "epoch": 1.8582216372998737, "grad_norm": 1.515025019645691, "learning_rate": 1.247486758659905e-06, "loss": 0.8174, "step": 290860 }, { "epoch": 1.8582855244496121, "grad_norm": 1.1440699100494385, "learning_rate": 1.2463731574047288e-06, "loss": 0.9326, "step": 290870 }, { "epoch": 1.8583494115993509, "grad_norm": 0.7021576762199402, "learning_rate": 1.2452600471407449e-06, "loss": 1.0455, "step": 290880 }, { "epoch": 1.8584132987490896, "grad_norm": 1.4968088865280151, "learning_rate": 1.2441474278791775e-06, "loss": 0.7411, "step": 290890 }, { "epoch": 1.8584771858988283, "grad_norm": 2.256848096847534, "learning_rate": 1.2430352996312234e-06, "loss": 0.9038, "step": 290900 }, { "epoch": 1.858541073048567, "grad_norm": 0.9790036678314209, "learning_rate": 1.2419236624080844e-06, "loss": 0.9509, "step": 290910 }, { "epoch": 1.8586049601983057, "grad_norm": 1.1517078876495361, "learning_rate": 1.2408125162209571e-06, "loss": 0.8685, "step": 290920 }, { "epoch": 1.8586688473480444, "grad_norm": 1.1216986179351807, "learning_rate": 1.239701861081033e-06, "loss": 0.8746, "step": 290930 }, { "epoch": 1.858732734497783, "grad_norm": 1.2162419557571411, "learning_rate": 1.238591696999486e-06, "loss": 0.8214, "step": 290940 }, { "epoch": 1.8587966216475218, "grad_norm": 0.6631404161453247, "learning_rate": 1.2374820239875129e-06, "loss": 0.7694, "step": 290950 }, { "epoch": 1.8588605087972605, "grad_norm": 0.6616999506950378, "learning_rate": 1.236372842056277e-06, "loss": 0.8894, "step": 290960 }, { "epoch": 1.8589243959469992, "grad_norm": 0.9456769824028015, "learning_rate": 1.2352641512169583e-06, "loss": 0.8276, "step": 290970 }, { "epoch": 1.858988283096738, "grad_norm": 0.9639400839805603, "learning_rate": 1.2341559514807144e-06, "loss": 0.8073, "step": 290980 }, { "epoch": 1.8590521702464766, "grad_norm": 0.9093899726867676, "learning_rate": 1.2330482428587031e-06, "loss": 0.9685, "step": 290990 }, { "epoch": 1.8591160573962153, "grad_norm": 2.9453232288360596, "learning_rate": 1.2319410253620933e-06, "loss": 0.8626, "step": 291000 }, { "epoch": 1.859179944545954, "grad_norm": 1.9953830242156982, "learning_rate": 1.230834299002026e-06, "loss": 0.9299, "step": 291010 }, { "epoch": 1.8592438316956927, "grad_norm": 0.8659750819206238, "learning_rate": 1.229728063789648e-06, "loss": 0.9576, "step": 291020 }, { "epoch": 1.8593077188454314, "grad_norm": 1.3473628759384155, "learning_rate": 1.2286223197360947e-06, "loss": 0.8491, "step": 291030 }, { "epoch": 1.8593716059951702, "grad_norm": 1.3287653923034668, "learning_rate": 1.2275170668525183e-06, "loss": 0.9237, "step": 291040 }, { "epoch": 1.8594354931449089, "grad_norm": 0.8199862837791443, "learning_rate": 1.226412305150032e-06, "loss": 1.0027, "step": 291050 }, { "epoch": 1.8594993802946476, "grad_norm": 1.043298363685608, "learning_rate": 1.2253080346397717e-06, "loss": 0.9655, "step": 291060 }, { "epoch": 1.8595632674443863, "grad_norm": 1.0660456418991089, "learning_rate": 1.224204255332856e-06, "loss": 0.8745, "step": 291070 }, { "epoch": 1.859627154594125, "grad_norm": 1.1216233968734741, "learning_rate": 1.2231009672403981e-06, "loss": 0.9918, "step": 291080 }, { "epoch": 1.8596910417438637, "grad_norm": 0.8022388219833374, "learning_rate": 1.2219981703735117e-06, "loss": 0.6397, "step": 291090 }, { "epoch": 1.8597549288936024, "grad_norm": 0.7386797666549683, "learning_rate": 1.2208958647433045e-06, "loss": 0.9875, "step": 291100 }, { "epoch": 1.859818816043341, "grad_norm": 1.1579437255859375, "learning_rate": 1.2197940503608728e-06, "loss": 0.8379, "step": 291110 }, { "epoch": 1.8598827031930796, "grad_norm": 3.6156814098358154, "learning_rate": 1.218692727237325e-06, "loss": 0.8989, "step": 291120 }, { "epoch": 1.8599465903428185, "grad_norm": 0.734523355960846, "learning_rate": 1.2175918953837296e-06, "loss": 0.6623, "step": 291130 }, { "epoch": 1.860010477492557, "grad_norm": 0.9780398607254028, "learning_rate": 1.2164915548111998e-06, "loss": 0.78, "step": 291140 }, { "epoch": 1.860074364642296, "grad_norm": 0.7017815113067627, "learning_rate": 1.215391705530794e-06, "loss": 0.7613, "step": 291150 }, { "epoch": 1.8601382517920344, "grad_norm": 0.9394316077232361, "learning_rate": 1.2142923475535973e-06, "loss": 0.8459, "step": 291160 }, { "epoch": 1.8602021389417733, "grad_norm": 1.0311943292617798, "learning_rate": 1.2131934808906898e-06, "loss": 1.0536, "step": 291170 }, { "epoch": 1.8602660260915118, "grad_norm": 0.7530616521835327, "learning_rate": 1.2120951055531294e-06, "loss": 0.987, "step": 291180 }, { "epoch": 1.8603299132412507, "grad_norm": 0.7854243516921997, "learning_rate": 1.2109972215519793e-06, "loss": 0.753, "step": 291190 }, { "epoch": 1.8603938003909892, "grad_norm": 1.3844327926635742, "learning_rate": 1.2098998288982866e-06, "loss": 0.9866, "step": 291200 }, { "epoch": 1.8604576875407282, "grad_norm": 0.8715339303016663, "learning_rate": 1.2088029276031255e-06, "loss": 0.8878, "step": 291210 }, { "epoch": 1.8605215746904666, "grad_norm": 0.9319707751274109, "learning_rate": 1.2077065176775204e-06, "loss": 0.8751, "step": 291220 }, { "epoch": 1.8605854618402056, "grad_norm": 3.1977455615997314, "learning_rate": 1.2066105991325238e-06, "loss": 1.1112, "step": 291230 }, { "epoch": 1.860649348989944, "grad_norm": 0.8579459190368652, "learning_rate": 1.2055151719791714e-06, "loss": 0.8311, "step": 291240 }, { "epoch": 1.860713236139683, "grad_norm": 0.9029688239097595, "learning_rate": 1.2044202362284984e-06, "loss": 0.9702, "step": 291250 }, { "epoch": 1.8607771232894215, "grad_norm": 1.4221550226211548, "learning_rate": 1.2033257918915185e-06, "loss": 0.651, "step": 291260 }, { "epoch": 1.8608410104391604, "grad_norm": 1.015944004058838, "learning_rate": 1.202231838979273e-06, "loss": 0.8291, "step": 291270 }, { "epoch": 1.8609048975888989, "grad_norm": 0.8289602398872375, "learning_rate": 1.2011383775027585e-06, "loss": 1.0116, "step": 291280 }, { "epoch": 1.8609687847386378, "grad_norm": 1.497787356376648, "learning_rate": 1.200045407473005e-06, "loss": 1.0533, "step": 291290 }, { "epoch": 1.8610326718883763, "grad_norm": 2.19814133644104, "learning_rate": 1.1989529289010093e-06, "loss": 1.2857, "step": 291300 }, { "epoch": 1.8610965590381152, "grad_norm": 0.7582222819328308, "learning_rate": 1.1978609417977793e-06, "loss": 0.8011, "step": 291310 }, { "epoch": 1.8611604461878537, "grad_norm": 1.9151540994644165, "learning_rate": 1.1967694461743063e-06, "loss": 0.7417, "step": 291320 }, { "epoch": 1.8612243333375926, "grad_norm": 0.6472824811935425, "learning_rate": 1.1956784420415923e-06, "loss": 0.8088, "step": 291330 }, { "epoch": 1.8612882204873311, "grad_norm": 1.377766728401184, "learning_rate": 1.1945879294106123e-06, "loss": 0.782, "step": 291340 }, { "epoch": 1.86135210763707, "grad_norm": 1.153731346130371, "learning_rate": 1.193497908292357e-06, "loss": 1.0337, "step": 291350 }, { "epoch": 1.8614159947868085, "grad_norm": 1.072165846824646, "learning_rate": 1.1924083786977958e-06, "loss": 0.6777, "step": 291360 }, { "epoch": 1.8614798819365472, "grad_norm": 0.8234143853187561, "learning_rate": 1.1913193406379086e-06, "loss": 1.0006, "step": 291370 }, { "epoch": 1.861543769086286, "grad_norm": 1.5603644847869873, "learning_rate": 1.1902307941236646e-06, "loss": 1.0589, "step": 291380 }, { "epoch": 1.8616076562360246, "grad_norm": 0.6928756237030029, "learning_rate": 1.1891427391660215e-06, "loss": 0.7702, "step": 291390 }, { "epoch": 1.8616715433857633, "grad_norm": 1.0293365716934204, "learning_rate": 1.1880551757759428e-06, "loss": 1.0253, "step": 291400 }, { "epoch": 1.861735430535502, "grad_norm": 0.9429540634155273, "learning_rate": 1.18696810396437e-06, "loss": 0.7044, "step": 291410 }, { "epoch": 1.8617993176852408, "grad_norm": 1.1565495729446411, "learning_rate": 1.1858815237422604e-06, "loss": 0.8934, "step": 291420 }, { "epoch": 1.8618632048349795, "grad_norm": 1.087480902671814, "learning_rate": 1.1847954351205503e-06, "loss": 0.8431, "step": 291430 }, { "epoch": 1.8619270919847182, "grad_norm": 0.8481289744377136, "learning_rate": 1.1837098381101919e-06, "loss": 0.8539, "step": 291440 }, { "epoch": 1.8619909791344569, "grad_norm": 0.8765703439712524, "learning_rate": 1.1826247327220986e-06, "loss": 0.8162, "step": 291450 }, { "epoch": 1.8620548662841956, "grad_norm": 0.9616901874542236, "learning_rate": 1.181540118967217e-06, "loss": 0.9633, "step": 291460 }, { "epoch": 1.8621187534339343, "grad_norm": 0.8359056711196899, "learning_rate": 1.1804559968564498e-06, "loss": 0.7899, "step": 291470 }, { "epoch": 1.862182640583673, "grad_norm": 1.9986016750335693, "learning_rate": 1.1793723664007218e-06, "loss": 0.7305, "step": 291480 }, { "epoch": 1.8622465277334117, "grad_norm": 1.48747980594635, "learning_rate": 1.1782892276109625e-06, "loss": 0.8578, "step": 291490 }, { "epoch": 1.8623104148831504, "grad_norm": 1.4891974925994873, "learning_rate": 1.177206580498058e-06, "loss": 1.1286, "step": 291500 }, { "epoch": 1.8623743020328891, "grad_norm": 0.9178832769393921, "learning_rate": 1.1761244250729275e-06, "loss": 0.6851, "step": 291510 }, { "epoch": 1.8624381891826278, "grad_norm": 0.8637306094169617, "learning_rate": 1.1750427613464566e-06, "loss": 0.8119, "step": 291520 }, { "epoch": 1.8625020763323665, "grad_norm": 0.7352100014686584, "learning_rate": 1.173961589329553e-06, "loss": 0.7242, "step": 291530 }, { "epoch": 1.8625659634821052, "grad_norm": 1.4195234775543213, "learning_rate": 1.172880909033086e-06, "loss": 0.7602, "step": 291540 }, { "epoch": 1.862629850631844, "grad_norm": 0.9579132199287415, "learning_rate": 1.1718007204679582e-06, "loss": 0.9725, "step": 291550 }, { "epoch": 1.8626937377815826, "grad_norm": 0.652401328086853, "learning_rate": 1.1707210236450382e-06, "loss": 0.8777, "step": 291560 }, { "epoch": 1.8627576249313214, "grad_norm": 1.022739052772522, "learning_rate": 1.169641818575201e-06, "loss": 0.8411, "step": 291570 }, { "epoch": 1.86282151208106, "grad_norm": 1.5225722789764404, "learning_rate": 1.1685631052693103e-06, "loss": 0.7824, "step": 291580 }, { "epoch": 1.8628853992307988, "grad_norm": 0.7213976383209229, "learning_rate": 1.1674848837382402e-06, "loss": 0.6937, "step": 291590 }, { "epoch": 1.8629492863805375, "grad_norm": 0.5849156975746155, "learning_rate": 1.1664071539928378e-06, "loss": 0.85, "step": 291600 }, { "epoch": 1.863013173530276, "grad_norm": 0.8378762602806091, "learning_rate": 1.1653299160439667e-06, "loss": 1.06, "step": 291610 }, { "epoch": 1.8630770606800149, "grad_norm": 0.7279397249221802, "learning_rate": 1.1642531699024684e-06, "loss": 0.6376, "step": 291620 }, { "epoch": 1.8631409478297534, "grad_norm": 0.9828930497169495, "learning_rate": 1.163176915579195e-06, "loss": 0.9272, "step": 291630 }, { "epoch": 1.8632048349794923, "grad_norm": 1.0489158630371094, "learning_rate": 1.1621011530849713e-06, "loss": 1.0979, "step": 291640 }, { "epoch": 1.8632687221292308, "grad_norm": 0.6363334059715271, "learning_rate": 1.1610258824306496e-06, "loss": 0.7668, "step": 291650 }, { "epoch": 1.8633326092789697, "grad_norm": 0.6473334431648254, "learning_rate": 1.1599511036270383e-06, "loss": 0.8058, "step": 291660 }, { "epoch": 1.8633964964287082, "grad_norm": 0.8759834170341492, "learning_rate": 1.158876816684984e-06, "loss": 1.0041, "step": 291670 }, { "epoch": 1.8634603835784471, "grad_norm": 0.7590951323509216, "learning_rate": 1.1578030216152835e-06, "loss": 0.7303, "step": 291680 }, { "epoch": 1.8635242707281856, "grad_norm": 0.8698746562004089, "learning_rate": 1.1567297184287618e-06, "loss": 0.6939, "step": 291690 }, { "epoch": 1.8635881578779245, "grad_norm": 0.7157119512557983, "learning_rate": 1.1556569071362322e-06, "loss": 1.029, "step": 291700 }, { "epoch": 1.863652045027663, "grad_norm": 1.00758695602417, "learning_rate": 1.1545845877484917e-06, "loss": 0.8832, "step": 291710 }, { "epoch": 1.863715932177402, "grad_norm": 4.0602288246154785, "learning_rate": 1.153512760276343e-06, "loss": 0.8208, "step": 291720 }, { "epoch": 1.8637798193271404, "grad_norm": 0.9860572218894958, "learning_rate": 1.152441424730577e-06, "loss": 0.7314, "step": 291730 }, { "epoch": 1.8638437064768794, "grad_norm": 1.118744134902954, "learning_rate": 1.151370581121991e-06, "loss": 1.145, "step": 291740 }, { "epoch": 1.8639075936266178, "grad_norm": 1.2803094387054443, "learning_rate": 1.150300229461354e-06, "loss": 0.7106, "step": 291750 }, { "epoch": 1.8639714807763568, "grad_norm": 0.9607510566711426, "learning_rate": 1.1492303697594632e-06, "loss": 0.9641, "step": 291760 }, { "epoch": 1.8640353679260953, "grad_norm": 1.4401522874832153, "learning_rate": 1.1481610020270761e-06, "loss": 1.2303, "step": 291770 }, { "epoch": 1.8640992550758342, "grad_norm": 1.5572141408920288, "learning_rate": 1.1470921262749789e-06, "loss": 0.7103, "step": 291780 }, { "epoch": 1.8641631422255727, "grad_norm": 0.9234890937805176, "learning_rate": 1.1460237425139242e-06, "loss": 0.6631, "step": 291790 }, { "epoch": 1.8642270293753116, "grad_norm": 0.7566113471984863, "learning_rate": 1.1449558507546754e-06, "loss": 1.0075, "step": 291800 }, { "epoch": 1.86429091652505, "grad_norm": 0.6989375352859497, "learning_rate": 1.143888451007985e-06, "loss": 0.8058, "step": 291810 }, { "epoch": 1.864354803674789, "grad_norm": 1.4395372867584229, "learning_rate": 1.1428215432846056e-06, "loss": 0.8574, "step": 291820 }, { "epoch": 1.8644186908245275, "grad_norm": 1.168811321258545, "learning_rate": 1.1417551275952786e-06, "loss": 0.9201, "step": 291830 }, { "epoch": 1.8644825779742664, "grad_norm": 0.9244377017021179, "learning_rate": 1.1406892039507511e-06, "loss": 0.992, "step": 291840 }, { "epoch": 1.864546465124005, "grad_norm": 0.9813092350959778, "learning_rate": 1.1396237723617476e-06, "loss": 0.891, "step": 291850 }, { "epoch": 1.8646103522737436, "grad_norm": 0.8621505498886108, "learning_rate": 1.1385588328390096e-06, "loss": 0.7602, "step": 291860 }, { "epoch": 1.8646742394234823, "grad_norm": 1.225386142730713, "learning_rate": 1.1374943853932452e-06, "loss": 0.8276, "step": 291870 }, { "epoch": 1.864738126573221, "grad_norm": 1.4588954448699951, "learning_rate": 1.1364304300351958e-06, "loss": 0.9811, "step": 291880 }, { "epoch": 1.8648020137229597, "grad_norm": 0.9999317526817322, "learning_rate": 1.1353669667755529e-06, "loss": 0.6851, "step": 291890 }, { "epoch": 1.8648659008726984, "grad_norm": 1.1840487718582153, "learning_rate": 1.1343039956250467e-06, "loss": 1.0295, "step": 291900 }, { "epoch": 1.8649297880224371, "grad_norm": 0.9932942986488342, "learning_rate": 1.1332415165943743e-06, "loss": 1.1241, "step": 291910 }, { "epoch": 1.8649936751721758, "grad_norm": 1.07947838306427, "learning_rate": 1.1321795296942273e-06, "loss": 0.9967, "step": 291920 }, { "epoch": 1.8650575623219146, "grad_norm": 0.7847141623497009, "learning_rate": 1.131118034935319e-06, "loss": 0.797, "step": 291930 }, { "epoch": 1.8651214494716533, "grad_norm": 0.7838201522827148, "learning_rate": 1.1301631104418842e-06, "loss": 0.7981, "step": 291940 }, { "epoch": 1.865185336621392, "grad_norm": 0.748388409614563, "learning_rate": 1.1291025507807584e-06, "loss": 1.0643, "step": 291950 }, { "epoch": 1.8652492237711307, "grad_norm": 0.8686913847923279, "learning_rate": 1.1280424832918413e-06, "loss": 0.9263, "step": 291960 }, { "epoch": 1.8653131109208694, "grad_norm": 0.9542390704154968, "learning_rate": 1.126982907985824e-06, "loss": 0.7934, "step": 291970 }, { "epoch": 1.865376998070608, "grad_norm": 0.9301961660385132, "learning_rate": 1.125923824873365e-06, "loss": 0.9645, "step": 291980 }, { "epoch": 1.8654408852203468, "grad_norm": 1.346179723739624, "learning_rate": 1.1248652339651388e-06, "loss": 0.7489, "step": 291990 }, { "epoch": 1.8655047723700855, "grad_norm": 0.9386990070343018, "learning_rate": 1.1238071352717984e-06, "loss": 0.7801, "step": 292000 }, { "epoch": 1.8655686595198242, "grad_norm": 1.1930619478225708, "learning_rate": 1.1227495288040013e-06, "loss": 0.9082, "step": 292010 }, { "epoch": 1.865632546669563, "grad_norm": 1.2605247497558594, "learning_rate": 1.1216924145724117e-06, "loss": 0.701, "step": 292020 }, { "epoch": 1.8656964338193016, "grad_norm": 0.5679419636726379, "learning_rate": 1.1206357925876542e-06, "loss": 0.9249, "step": 292030 }, { "epoch": 1.8657603209690403, "grad_norm": 0.7029687166213989, "learning_rate": 1.1195796628603927e-06, "loss": 0.6778, "step": 292040 }, { "epoch": 1.865824208118779, "grad_norm": 0.5767757296562195, "learning_rate": 1.1185240254012408e-06, "loss": 0.8903, "step": 292050 }, { "epoch": 1.8658880952685177, "grad_norm": 0.8139119744300842, "learning_rate": 1.1174688802208455e-06, "loss": 1.1197, "step": 292060 }, { "epoch": 1.8659519824182564, "grad_norm": 0.6697121262550354, "learning_rate": 1.1164142273298262e-06, "loss": 1.1021, "step": 292070 }, { "epoch": 1.8660158695679951, "grad_norm": 0.991718053817749, "learning_rate": 1.1153600667388132e-06, "loss": 0.8899, "step": 292080 }, { "epoch": 1.8660797567177339, "grad_norm": 1.0249316692352295, "learning_rate": 1.114306398458409e-06, "loss": 0.7283, "step": 292090 }, { "epoch": 1.8661436438674723, "grad_norm": 1.8425500392913818, "learning_rate": 1.113253222499233e-06, "loss": 0.7682, "step": 292100 }, { "epoch": 1.8662075310172113, "grad_norm": 0.7939163446426392, "learning_rate": 1.112200538871888e-06, "loss": 0.9963, "step": 292110 }, { "epoch": 1.8662714181669497, "grad_norm": 1.0355535745620728, "learning_rate": 1.1111483475869767e-06, "loss": 0.6348, "step": 292120 }, { "epoch": 1.8663353053166887, "grad_norm": 2.9539527893066406, "learning_rate": 1.1100966486551013e-06, "loss": 0.8386, "step": 292130 }, { "epoch": 1.8663991924664272, "grad_norm": 0.8861859440803528, "learning_rate": 1.1090454420868425e-06, "loss": 0.8075, "step": 292140 }, { "epoch": 1.866463079616166, "grad_norm": 0.9066374897956848, "learning_rate": 1.1079947278927971e-06, "loss": 0.7879, "step": 292150 }, { "epoch": 1.8665269667659046, "grad_norm": 0.9951545596122742, "learning_rate": 1.1069445060835403e-06, "loss": 1.1387, "step": 292160 }, { "epoch": 1.8665908539156435, "grad_norm": 1.09367036819458, "learning_rate": 1.1058947766696526e-06, "loss": 0.7966, "step": 292170 }, { "epoch": 1.866654741065382, "grad_norm": 1.531260371208191, "learning_rate": 1.104845539661703e-06, "loss": 0.727, "step": 292180 }, { "epoch": 1.866718628215121, "grad_norm": 2.0904078483581543, "learning_rate": 1.103796795070261e-06, "loss": 0.8937, "step": 292190 }, { "epoch": 1.8667825153648594, "grad_norm": 1.1105701923370361, "learning_rate": 1.1027485429058847e-06, "loss": 0.838, "step": 292200 }, { "epoch": 1.8668464025145983, "grad_norm": 1.3237465620040894, "learning_rate": 1.1017007831791326e-06, "loss": 0.6458, "step": 292210 }, { "epoch": 1.8669102896643368, "grad_norm": 0.9129582643508911, "learning_rate": 1.1006535159005571e-06, "loss": 0.9384, "step": 292220 }, { "epoch": 1.8669741768140757, "grad_norm": 1.4068692922592163, "learning_rate": 1.0996067410807053e-06, "loss": 1.1028, "step": 292230 }, { "epoch": 1.8670380639638142, "grad_norm": 0.8618285059928894, "learning_rate": 1.0985604587301135e-06, "loss": 0.9018, "step": 292240 }, { "epoch": 1.8671019511135531, "grad_norm": 1.1460120677947998, "learning_rate": 1.0975146688593341e-06, "loss": 0.9585, "step": 292250 }, { "epoch": 1.8671658382632916, "grad_norm": 0.9741784930229187, "learning_rate": 1.0964693714788753e-06, "loss": 0.8074, "step": 292260 }, { "epoch": 1.8672297254130306, "grad_norm": 0.9012332558631897, "learning_rate": 1.09542456659929e-06, "loss": 1.1284, "step": 292270 }, { "epoch": 1.867293612562769, "grad_norm": 1.258500337600708, "learning_rate": 1.094380254231081e-06, "loss": 0.7771, "step": 292280 }, { "epoch": 1.867357499712508, "grad_norm": 0.8321709036827087, "learning_rate": 1.093336434384773e-06, "loss": 0.7485, "step": 292290 }, { "epoch": 1.8674213868622465, "grad_norm": 0.9962365627288818, "learning_rate": 1.0922931070708742e-06, "loss": 0.8935, "step": 292300 }, { "epoch": 1.8674852740119854, "grad_norm": 0.6500903964042664, "learning_rate": 1.0912502722999042e-06, "loss": 0.7122, "step": 292310 }, { "epoch": 1.8675491611617239, "grad_norm": 1.1838070154190063, "learning_rate": 1.0902079300823487e-06, "loss": 0.8033, "step": 292320 }, { "epoch": 1.8676130483114628, "grad_norm": 0.6644697189331055, "learning_rate": 1.0891660804287108e-06, "loss": 0.7257, "step": 292330 }, { "epoch": 1.8676769354612013, "grad_norm": 0.8090490102767944, "learning_rate": 1.0881247233494928e-06, "loss": 0.9717, "step": 292340 }, { "epoch": 1.86774082261094, "grad_norm": 1.0317925214767456, "learning_rate": 1.0870838588551647e-06, "loss": 0.8417, "step": 292350 }, { "epoch": 1.8678047097606787, "grad_norm": 1.298757791519165, "learning_rate": 1.086043486956223e-06, "loss": 0.9991, "step": 292360 }, { "epoch": 1.8678685969104174, "grad_norm": 0.9334030151367188, "learning_rate": 1.0850036076631375e-06, "loss": 1.0006, "step": 292370 }, { "epoch": 1.867932484060156, "grad_norm": 1.458702564239502, "learning_rate": 1.0839642209863831e-06, "loss": 0.6428, "step": 292380 }, { "epoch": 1.8679963712098948, "grad_norm": 1.0316441059112549, "learning_rate": 1.0829253269364292e-06, "loss": 0.7904, "step": 292390 }, { "epoch": 1.8680602583596335, "grad_norm": 1.0853855609893799, "learning_rate": 1.0818869255237396e-06, "loss": 0.7114, "step": 292400 }, { "epoch": 1.8681241455093722, "grad_norm": 1.5720373392105103, "learning_rate": 1.0808490167587616e-06, "loss": 1.0899, "step": 292410 }, { "epoch": 1.868188032659111, "grad_norm": 0.6928960084915161, "learning_rate": 1.0798116006519587e-06, "loss": 0.6736, "step": 292420 }, { "epoch": 1.8682519198088496, "grad_norm": 0.898154079914093, "learning_rate": 1.078774677213773e-06, "loss": 0.9786, "step": 292430 }, { "epoch": 1.8683158069585883, "grad_norm": 1.0389325618743896, "learning_rate": 1.0777382464546571e-06, "loss": 0.7767, "step": 292440 }, { "epoch": 1.868379694108327, "grad_norm": 0.9077551364898682, "learning_rate": 1.0767023083850304e-06, "loss": 0.6231, "step": 292450 }, { "epoch": 1.8684435812580658, "grad_norm": 0.8413649797439575, "learning_rate": 1.0756668630153454e-06, "loss": 0.9749, "step": 292460 }, { "epoch": 1.8685074684078045, "grad_norm": 0.8598177433013916, "learning_rate": 1.0746319103560109e-06, "loss": 0.8766, "step": 292470 }, { "epoch": 1.8685713555575432, "grad_norm": 0.6066474318504333, "learning_rate": 1.0735974504174685e-06, "loss": 0.7777, "step": 292480 }, { "epoch": 1.8686352427072819, "grad_norm": 1.1965216398239136, "learning_rate": 1.0725634832101206e-06, "loss": 0.8804, "step": 292490 }, { "epoch": 1.8686991298570206, "grad_norm": 0.983538031578064, "learning_rate": 1.0715300087443925e-06, "loss": 0.8417, "step": 292500 }, { "epoch": 1.8687630170067593, "grad_norm": 0.6179125905036926, "learning_rate": 1.0704970270306813e-06, "loss": 0.9033, "step": 292510 }, { "epoch": 1.868826904156498, "grad_norm": 1.353305459022522, "learning_rate": 1.0694645380793956e-06, "loss": 1.0423, "step": 292520 }, { "epoch": 1.8688907913062367, "grad_norm": 0.8663296103477478, "learning_rate": 1.0684325419009322e-06, "loss": 0.688, "step": 292530 }, { "epoch": 1.8689546784559754, "grad_norm": 1.348539113998413, "learning_rate": 1.0674010385056887e-06, "loss": 0.9348, "step": 292540 }, { "epoch": 1.869018565605714, "grad_norm": 0.8060293793678284, "learning_rate": 1.0663700279040455e-06, "loss": 0.6422, "step": 292550 }, { "epoch": 1.8690824527554528, "grad_norm": 0.7460553050041199, "learning_rate": 1.0653395101063945e-06, "loss": 0.7276, "step": 292560 }, { "epoch": 1.8691463399051915, "grad_norm": 0.8735922574996948, "learning_rate": 1.0643094851231106e-06, "loss": 0.7634, "step": 292570 }, { "epoch": 1.8692102270549302, "grad_norm": 0.6985118389129639, "learning_rate": 1.0632799529645577e-06, "loss": 1.0026, "step": 292580 }, { "epoch": 1.8692741142046687, "grad_norm": 0.7997674942016602, "learning_rate": 1.0622509136411219e-06, "loss": 0.8164, "step": 292590 }, { "epoch": 1.8693380013544076, "grad_norm": 0.8839585185050964, "learning_rate": 1.061222367163145e-06, "loss": 1.2042, "step": 292600 }, { "epoch": 1.8694018885041461, "grad_norm": 0.857516348361969, "learning_rate": 1.0601943135410076e-06, "loss": 0.9505, "step": 292610 }, { "epoch": 1.869465775653885, "grad_norm": 0.7346429228782654, "learning_rate": 1.0591667527850456e-06, "loss": 1.3544, "step": 292620 }, { "epoch": 1.8695296628036235, "grad_norm": 1.585056185722351, "learning_rate": 1.058139684905618e-06, "loss": 0.7689, "step": 292630 }, { "epoch": 1.8695935499533625, "grad_norm": 1.9108575582504272, "learning_rate": 1.0571131099130605e-06, "loss": 1.1067, "step": 292640 }, { "epoch": 1.869657437103101, "grad_norm": 0.894305408000946, "learning_rate": 1.0560870278177148e-06, "loss": 1.0915, "step": 292650 }, { "epoch": 1.8697213242528399, "grad_norm": 0.8008806705474854, "learning_rate": 1.0550614386299174e-06, "loss": 0.8791, "step": 292660 }, { "epoch": 1.8697852114025784, "grad_norm": 0.8988972306251526, "learning_rate": 1.0540363423599931e-06, "loss": 0.9824, "step": 292670 }, { "epoch": 1.8698490985523173, "grad_norm": 1.3784595727920532, "learning_rate": 1.0530117390182724e-06, "loss": 0.9264, "step": 292680 }, { "epoch": 1.8699129857020558, "grad_norm": 0.7012110352516174, "learning_rate": 1.0519876286150644e-06, "loss": 0.7323, "step": 292690 }, { "epoch": 1.8699768728517947, "grad_norm": 0.7183138132095337, "learning_rate": 1.0509640111606878e-06, "loss": 0.8072, "step": 292700 }, { "epoch": 1.8700407600015332, "grad_norm": 1.9337592124938965, "learning_rate": 1.0499408866654515e-06, "loss": 1.141, "step": 292710 }, { "epoch": 1.8701046471512721, "grad_norm": 0.8221973180770874, "learning_rate": 1.0489182551396582e-06, "loss": 0.663, "step": 292720 }, { "epoch": 1.8701685343010106, "grad_norm": 0.6989620923995972, "learning_rate": 1.0478961165936052e-06, "loss": 0.8676, "step": 292730 }, { "epoch": 1.8702324214507495, "grad_norm": 1.8266626596450806, "learning_rate": 1.04687447103759e-06, "loss": 0.7127, "step": 292740 }, { "epoch": 1.870296308600488, "grad_norm": 0.7952519655227661, "learning_rate": 1.0458533184818985e-06, "loss": 0.7495, "step": 292750 }, { "epoch": 1.870360195750227, "grad_norm": 1.0890142917633057, "learning_rate": 1.0448326589368174e-06, "loss": 0.959, "step": 292760 }, { "epoch": 1.8704240828999654, "grad_norm": 0.8280913829803467, "learning_rate": 1.0438124924126214e-06, "loss": 0.6805, "step": 292770 }, { "epoch": 1.8704879700497044, "grad_norm": 1.139344334602356, "learning_rate": 1.0427928189195858e-06, "loss": 1.0305, "step": 292780 }, { "epoch": 1.8705518571994428, "grad_norm": 1.243733286857605, "learning_rate": 1.04177363846798e-06, "loss": 0.6495, "step": 292790 }, { "epoch": 1.8706157443491818, "grad_norm": 0.7558228373527527, "learning_rate": 1.0407549510680737e-06, "loss": 0.7663, "step": 292800 }, { "epoch": 1.8706796314989202, "grad_norm": 1.4247775077819824, "learning_rate": 1.0397367567301141e-06, "loss": 0.9372, "step": 292810 }, { "epoch": 1.870743518648659, "grad_norm": 2.343604803085327, "learning_rate": 1.0387190554643656e-06, "loss": 0.8101, "step": 292820 }, { "epoch": 1.8708074057983977, "grad_norm": 1.535949945449829, "learning_rate": 1.037701847281075e-06, "loss": 0.7573, "step": 292830 }, { "epoch": 1.8708712929481364, "grad_norm": 1.2534575462341309, "learning_rate": 1.0366851321904846e-06, "loss": 0.8837, "step": 292840 }, { "epoch": 1.870935180097875, "grad_norm": 0.8451334834098816, "learning_rate": 1.0356689102028305e-06, "loss": 0.868, "step": 292850 }, { "epoch": 1.8709990672476138, "grad_norm": 1.0970386266708374, "learning_rate": 1.0346531813283488e-06, "loss": 0.9886, "step": 292860 }, { "epoch": 1.8710629543973525, "grad_norm": 1.1927027702331543, "learning_rate": 1.0336379455772816e-06, "loss": 0.8813, "step": 292870 }, { "epoch": 1.8711268415470912, "grad_norm": 0.9931333661079407, "learning_rate": 1.0326232029598314e-06, "loss": 0.7571, "step": 292880 }, { "epoch": 1.87119072869683, "grad_norm": 1.6181813478469849, "learning_rate": 1.031608953486235e-06, "loss": 0.7072, "step": 292890 }, { "epoch": 1.8712546158465686, "grad_norm": 0.9007042050361633, "learning_rate": 1.030595197166695e-06, "loss": 0.7609, "step": 292900 }, { "epoch": 1.8713185029963073, "grad_norm": 1.2203731536865234, "learning_rate": 1.029581934011431e-06, "loss": 0.9663, "step": 292910 }, { "epoch": 1.871382390146046, "grad_norm": 1.218652606010437, "learning_rate": 1.0285691640306405e-06, "loss": 1.052, "step": 292920 }, { "epoch": 1.8714462772957847, "grad_norm": 1.5266693830490112, "learning_rate": 1.0275568872345264e-06, "loss": 0.8739, "step": 292930 }, { "epoch": 1.8715101644455234, "grad_norm": 1.157273292541504, "learning_rate": 1.0265451036332751e-06, "loss": 0.7755, "step": 292940 }, { "epoch": 1.8715740515952621, "grad_norm": 2.2461156845092773, "learning_rate": 1.0255338132370895e-06, "loss": 1.2531, "step": 292950 }, { "epoch": 1.8716379387450008, "grad_norm": 1.6639330387115479, "learning_rate": 1.0245230160561447e-06, "loss": 0.9475, "step": 292960 }, { "epoch": 1.8717018258947395, "grad_norm": 0.8339784145355225, "learning_rate": 1.023512712100627e-06, "loss": 0.6774, "step": 292970 }, { "epoch": 1.8717657130444783, "grad_norm": 0.7059604525566101, "learning_rate": 1.0225029013807009e-06, "loss": 0.8867, "step": 292980 }, { "epoch": 1.871829600194217, "grad_norm": 1.0560535192489624, "learning_rate": 1.0214935839065465e-06, "loss": 0.6824, "step": 292990 }, { "epoch": 1.8718934873439557, "grad_norm": 1.1694117784500122, "learning_rate": 1.0204847596883228e-06, "loss": 0.825, "step": 293000 }, { "epoch": 1.8719573744936944, "grad_norm": 3.1129536628723145, "learning_rate": 1.019476428736188e-06, "loss": 0.9892, "step": 293010 }, { "epoch": 1.872021261643433, "grad_norm": 1.1729931831359863, "learning_rate": 1.0184685910603009e-06, "loss": 0.7504, "step": 293020 }, { "epoch": 1.8720851487931718, "grad_norm": 1.0270169973373413, "learning_rate": 1.0174612466708143e-06, "loss": 0.8028, "step": 293030 }, { "epoch": 1.8721490359429105, "grad_norm": 0.8455641269683838, "learning_rate": 1.016454395577865e-06, "loss": 0.7136, "step": 293040 }, { "epoch": 1.8722129230926492, "grad_norm": 1.1827267408370972, "learning_rate": 1.0154480377915999e-06, "loss": 1.1107, "step": 293050 }, { "epoch": 1.872276810242388, "grad_norm": 0.7174341082572937, "learning_rate": 1.0144421733221499e-06, "loss": 0.7128, "step": 293060 }, { "epoch": 1.8723406973921266, "grad_norm": 0.9067844152450562, "learning_rate": 1.0134368021796402e-06, "loss": 0.9257, "step": 293070 }, { "epoch": 1.872404584541865, "grad_norm": 0.9582309126853943, "learning_rate": 1.0124319243742075e-06, "loss": 0.8328, "step": 293080 }, { "epoch": 1.872468471691604, "grad_norm": 0.9209734797477722, "learning_rate": 1.0114275399159656e-06, "loss": 0.6514, "step": 293090 }, { "epoch": 1.8725323588413425, "grad_norm": 2.1290807723999023, "learning_rate": 1.0104236488150288e-06, "loss": 0.9401, "step": 293100 }, { "epoch": 1.8725962459910814, "grad_norm": 0.7640361189842224, "learning_rate": 1.0094202510815054e-06, "loss": 0.8225, "step": 293110 }, { "epoch": 1.87266013314082, "grad_norm": 1.0006041526794434, "learning_rate": 1.0084173467255042e-06, "loss": 0.7502, "step": 293120 }, { "epoch": 1.8727240202905588, "grad_norm": 0.6950806379318237, "learning_rate": 1.0074149357571227e-06, "loss": 0.7883, "step": 293130 }, { "epoch": 1.8727879074402973, "grad_norm": 1.5263314247131348, "learning_rate": 1.0064130181864584e-06, "loss": 0.8787, "step": 293140 }, { "epoch": 1.8728517945900363, "grad_norm": 1.1775399446487427, "learning_rate": 1.005411594023603e-06, "loss": 1.0736, "step": 293150 }, { "epoch": 1.8729156817397747, "grad_norm": 1.1319595575332642, "learning_rate": 1.0044106632786377e-06, "loss": 0.8147, "step": 293160 }, { "epoch": 1.8729795688895137, "grad_norm": 1.4746376276016235, "learning_rate": 1.003410225961643e-06, "loss": 0.8774, "step": 293170 }, { "epoch": 1.8730434560392522, "grad_norm": 1.2940460443496704, "learning_rate": 1.002410282082694e-06, "loss": 0.7233, "step": 293180 }, { "epoch": 1.873107343188991, "grad_norm": 0.9180367588996887, "learning_rate": 1.0014108316518667e-06, "loss": 0.7021, "step": 293190 }, { "epoch": 1.8731712303387296, "grad_norm": 1.2637653350830078, "learning_rate": 1.0004118746792136e-06, "loss": 1.0029, "step": 293200 }, { "epoch": 1.8732351174884685, "grad_norm": 0.6989993453025818, "learning_rate": 9.994134111748155e-07, "loss": 0.7655, "step": 293210 }, { "epoch": 1.873299004638207, "grad_norm": 0.7211950421333313, "learning_rate": 9.984154411487035e-07, "loss": 0.9773, "step": 293220 }, { "epoch": 1.873362891787946, "grad_norm": 0.7398399710655212, "learning_rate": 9.974179646109527e-07, "loss": 0.8612, "step": 293230 }, { "epoch": 1.8734267789376844, "grad_norm": 0.6061854362487793, "learning_rate": 9.964209815715885e-07, "loss": 0.8147, "step": 293240 }, { "epoch": 1.8734906660874233, "grad_norm": 2.689434289932251, "learning_rate": 9.95424492040664e-07, "loss": 0.7556, "step": 293250 }, { "epoch": 1.8735545532371618, "grad_norm": 0.9904452562332153, "learning_rate": 9.944284960282047e-07, "loss": 0.8019, "step": 293260 }, { "epoch": 1.8736184403869007, "grad_norm": 0.8292899131774902, "learning_rate": 9.934329935442522e-07, "loss": 0.8431, "step": 293270 }, { "epoch": 1.8736823275366392, "grad_norm": 0.9797796607017517, "learning_rate": 9.924379845988207e-07, "loss": 0.7326, "step": 293280 }, { "epoch": 1.8737462146863781, "grad_norm": 1.3742291927337646, "learning_rate": 9.914434692019358e-07, "loss": 0.8503, "step": 293290 }, { "epoch": 1.8738101018361166, "grad_norm": 0.8166207671165466, "learning_rate": 9.904494473636173e-07, "loss": 1.1437, "step": 293300 }, { "epoch": 1.8738739889858553, "grad_norm": 1.2097806930541992, "learning_rate": 9.894559190938736e-07, "loss": 0.9368, "step": 293310 }, { "epoch": 1.873937876135594, "grad_norm": 0.9409804940223694, "learning_rate": 9.88462884402702e-07, "loss": 0.7663, "step": 293320 }, { "epoch": 1.8740017632853327, "grad_norm": 0.996408998966217, "learning_rate": 9.874703433001175e-07, "loss": 0.9313, "step": 293330 }, { "epoch": 1.8740656504350715, "grad_norm": 2.0611023902893066, "learning_rate": 9.86478295796106e-07, "loss": 0.7475, "step": 293340 }, { "epoch": 1.8741295375848102, "grad_norm": 1.0358458757400513, "learning_rate": 9.854867419006597e-07, "loss": 0.9663, "step": 293350 }, { "epoch": 1.8741934247345489, "grad_norm": 0.6037928462028503, "learning_rate": 9.84495681623765e-07, "loss": 0.911, "step": 293360 }, { "epoch": 1.8742573118842876, "grad_norm": 0.8664078712463379, "learning_rate": 9.83505114975408e-07, "loss": 0.9303, "step": 293370 }, { "epoch": 1.8743211990340263, "grad_norm": 1.4451388120651245, "learning_rate": 9.825150419655538e-07, "loss": 0.8275, "step": 293380 }, { "epoch": 1.874385086183765, "grad_norm": 1.456311583518982, "learning_rate": 9.81525462604177e-07, "loss": 0.8143, "step": 293390 }, { "epoch": 1.8744489733335037, "grad_norm": 0.6796972155570984, "learning_rate": 9.805363769012532e-07, "loss": 0.8208, "step": 293400 }, { "epoch": 1.8745128604832424, "grad_norm": 0.8689972162246704, "learning_rate": 9.7954778486673e-07, "loss": 0.8274, "step": 293410 }, { "epoch": 1.874576747632981, "grad_norm": 1.17522132396698, "learning_rate": 9.785596865105772e-07, "loss": 0.8881, "step": 293420 }, { "epoch": 1.8746406347827198, "grad_norm": 0.7953550219535828, "learning_rate": 9.775720818427315e-07, "loss": 0.8244, "step": 293430 }, { "epoch": 1.8747045219324585, "grad_norm": 0.6701993942260742, "learning_rate": 9.765849708731455e-07, "loss": 0.7774, "step": 293440 }, { "epoch": 1.8747684090821972, "grad_norm": 1.067297101020813, "learning_rate": 9.755983536117618e-07, "loss": 0.8687, "step": 293450 }, { "epoch": 1.874832296231936, "grad_norm": 1.085796594619751, "learning_rate": 9.746122300685168e-07, "loss": 0.8056, "step": 293460 }, { "epoch": 1.8748961833816746, "grad_norm": 0.8632622957229614, "learning_rate": 9.736266002533357e-07, "loss": 0.9159, "step": 293470 }, { "epoch": 1.8749600705314133, "grad_norm": 0.7865797877311707, "learning_rate": 9.72641464176155e-07, "loss": 0.7732, "step": 293480 }, { "epoch": 1.875023957681152, "grad_norm": 1.2301065921783447, "learning_rate": 9.71656821846878e-07, "loss": 1.1233, "step": 293490 }, { "epoch": 1.8750878448308907, "grad_norm": 0.8430875539779663, "learning_rate": 9.706726732754413e-07, "loss": 0.8162, "step": 293500 }, { "epoch": 1.8751517319806295, "grad_norm": 1.114546775817871, "learning_rate": 9.696890184717478e-07, "loss": 1.1276, "step": 293510 }, { "epoch": 1.8752156191303682, "grad_norm": 0.7824980616569519, "learning_rate": 9.687058574457008e-07, "loss": 0.7735, "step": 293520 }, { "epoch": 1.8752795062801069, "grad_norm": 0.917863667011261, "learning_rate": 9.677231902072037e-07, "loss": 0.918, "step": 293530 }, { "epoch": 1.8753433934298456, "grad_norm": 0.9983965754508972, "learning_rate": 9.66741016766154e-07, "loss": 0.6978, "step": 293540 }, { "epoch": 1.875407280579584, "grad_norm": 1.4215760231018066, "learning_rate": 9.657593371324437e-07, "loss": 0.9128, "step": 293550 }, { "epoch": 1.875471167729323, "grad_norm": 1.064919352531433, "learning_rate": 9.647781513159538e-07, "loss": 0.7519, "step": 293560 }, { "epoch": 1.8755350548790615, "grad_norm": 1.0606855154037476, "learning_rate": 9.637974593265708e-07, "loss": 0.9517, "step": 293570 }, { "epoch": 1.8755989420288004, "grad_norm": 1.0682787895202637, "learning_rate": 9.628172611741647e-07, "loss": 0.8493, "step": 293580 }, { "epoch": 1.8756628291785389, "grad_norm": 1.3362362384796143, "learning_rate": 9.618375568686222e-07, "loss": 0.7255, "step": 293590 }, { "epoch": 1.8757267163282778, "grad_norm": 1.1866968870162964, "learning_rate": 9.608583464197907e-07, "loss": 0.9001, "step": 293600 }, { "epoch": 1.8757906034780163, "grad_norm": 0.8186989426612854, "learning_rate": 9.598796298375456e-07, "loss": 0.8377, "step": 293610 }, { "epoch": 1.8758544906277552, "grad_norm": 1.4240480661392212, "learning_rate": 9.589014071317348e-07, "loss": 0.6917, "step": 293620 }, { "epoch": 1.8759183777774937, "grad_norm": 0.9501568675041199, "learning_rate": 9.579236783122169e-07, "loss": 0.754, "step": 293630 }, { "epoch": 1.8759822649272326, "grad_norm": 1.0874764919281006, "learning_rate": 9.569464433888342e-07, "loss": 0.9862, "step": 293640 }, { "epoch": 1.8760461520769711, "grad_norm": 1.2083163261413574, "learning_rate": 9.559697023714286e-07, "loss": 0.7187, "step": 293650 }, { "epoch": 1.87611003922671, "grad_norm": 0.9831110239028931, "learning_rate": 9.54993455269837e-07, "loss": 0.9622, "step": 293660 }, { "epoch": 1.8761739263764485, "grad_norm": 1.3009384870529175, "learning_rate": 9.540177020938902e-07, "loss": 0.8913, "step": 293670 }, { "epoch": 1.8762378135261875, "grad_norm": 1.0569088459014893, "learning_rate": 9.530424428534135e-07, "loss": 0.7587, "step": 293680 }, { "epoch": 1.876301700675926, "grad_norm": 1.2314594984054565, "learning_rate": 9.520676775582382e-07, "loss": 0.8637, "step": 293690 }, { "epoch": 1.8763655878256649, "grad_norm": 0.889288604259491, "learning_rate": 9.510934062181675e-07, "loss": 0.7726, "step": 293700 }, { "epoch": 1.8764294749754034, "grad_norm": 0.7331340312957764, "learning_rate": 9.501196288430215e-07, "loss": 0.6647, "step": 293710 }, { "epoch": 1.8764933621251423, "grad_norm": 2.036623239517212, "learning_rate": 9.491463454426086e-07, "loss": 0.8645, "step": 293720 }, { "epoch": 1.8765572492748808, "grad_norm": 1.0206176042556763, "learning_rate": 9.481735560267213e-07, "loss": 0.8772, "step": 293730 }, { "epoch": 1.8766211364246197, "grad_norm": 2.132143259048462, "learning_rate": 9.472012606051683e-07, "loss": 0.936, "step": 293740 }, { "epoch": 1.8766850235743582, "grad_norm": 1.135907769203186, "learning_rate": 9.462294591877307e-07, "loss": 0.8672, "step": 293750 }, { "epoch": 1.876748910724097, "grad_norm": 0.9503557682037354, "learning_rate": 9.452581517842008e-07, "loss": 0.9258, "step": 293760 }, { "epoch": 1.8768127978738356, "grad_norm": 1.0329943895339966, "learning_rate": 9.442873384043594e-07, "loss": 0.6602, "step": 293770 }, { "epoch": 1.8768766850235745, "grad_norm": 1.0028157234191895, "learning_rate": 9.433170190579876e-07, "loss": 0.9158, "step": 293780 }, { "epoch": 1.876940572173313, "grad_norm": 0.954515814781189, "learning_rate": 9.4234719375485e-07, "loss": 0.8213, "step": 293790 }, { "epoch": 1.8770044593230517, "grad_norm": 0.5199512839317322, "learning_rate": 9.413778625047165e-07, "loss": 0.871, "step": 293800 }, { "epoch": 1.8770683464727904, "grad_norm": 1.4453988075256348, "learning_rate": 9.404090253173514e-07, "loss": 0.8009, "step": 293810 }, { "epoch": 1.8771322336225291, "grad_norm": 0.8859098553657532, "learning_rate": 9.394406822025081e-07, "loss": 0.8949, "step": 293820 }, { "epoch": 1.8771961207722678, "grad_norm": 4.4526047706604, "learning_rate": 9.384728331699399e-07, "loss": 0.869, "step": 293830 }, { "epoch": 1.8772600079220065, "grad_norm": 0.8694409728050232, "learning_rate": 9.375054782294001e-07, "loss": 0.8192, "step": 293840 }, { "epoch": 1.8773238950717452, "grad_norm": 1.8180179595947266, "learning_rate": 9.365386173906199e-07, "loss": 0.9313, "step": 293850 }, { "epoch": 1.877387782221484, "grad_norm": 0.6987054347991943, "learning_rate": 9.355722506633469e-07, "loss": 0.9417, "step": 293860 }, { "epoch": 1.8774516693712227, "grad_norm": 1.0461370944976807, "learning_rate": 9.346063780573011e-07, "loss": 0.7896, "step": 293870 }, { "epoch": 1.8775155565209614, "grad_norm": 1.137550711631775, "learning_rate": 9.336409995822193e-07, "loss": 0.9223, "step": 293880 }, { "epoch": 1.8775794436707, "grad_norm": 1.02553129196167, "learning_rate": 9.326761152478214e-07, "loss": 0.7892, "step": 293890 }, { "epoch": 1.8776433308204388, "grad_norm": 1.0381640195846558, "learning_rate": 9.317117250638274e-07, "loss": 1.0834, "step": 293900 }, { "epoch": 1.8777072179701775, "grad_norm": 0.9272412061691284, "learning_rate": 9.307478290399408e-07, "loss": 0.9732, "step": 293910 }, { "epoch": 1.8777711051199162, "grad_norm": 1.2473331689834595, "learning_rate": 9.297844271858758e-07, "loss": 0.962, "step": 293920 }, { "epoch": 1.877834992269655, "grad_norm": 1.2069578170776367, "learning_rate": 9.288215195113359e-07, "loss": 1.2623, "step": 293930 }, { "epoch": 1.8778988794193936, "grad_norm": 1.2229667901992798, "learning_rate": 9.278591060260134e-07, "loss": 0.6933, "step": 293940 }, { "epoch": 1.8779627665691323, "grad_norm": 0.9840774536132812, "learning_rate": 9.268971867396114e-07, "loss": 0.6754, "step": 293950 }, { "epoch": 1.878026653718871, "grad_norm": 1.087142825126648, "learning_rate": 9.259357616618003e-07, "loss": 0.8483, "step": 293960 }, { "epoch": 1.8780905408686097, "grad_norm": 1.1667194366455078, "learning_rate": 9.249748308022721e-07, "loss": 0.8202, "step": 293970 }, { "epoch": 1.8781544280183484, "grad_norm": 0.888049304485321, "learning_rate": 9.240143941707024e-07, "loss": 1.299, "step": 293980 }, { "epoch": 1.8782183151680871, "grad_norm": 1.1439584493637085, "learning_rate": 9.230544517767726e-07, "loss": 0.5954, "step": 293990 }, { "epoch": 1.8782822023178258, "grad_norm": 0.981224000453949, "learning_rate": 9.220950036301302e-07, "loss": 0.8226, "step": 294000 }, { "epoch": 1.8783460894675645, "grad_norm": 1.2684990167617798, "learning_rate": 9.21136049740462e-07, "loss": 0.9423, "step": 294010 }, { "epoch": 1.8784099766173032, "grad_norm": 1.2848501205444336, "learning_rate": 9.201775901174048e-07, "loss": 0.7289, "step": 294020 }, { "epoch": 1.878473863767042, "grad_norm": 0.8658862709999084, "learning_rate": 9.192196247706231e-07, "loss": 0.8491, "step": 294030 }, { "epoch": 1.8785377509167804, "grad_norm": 1.240254282951355, "learning_rate": 9.182621537097591e-07, "loss": 0.9544, "step": 294040 }, { "epoch": 1.8786016380665194, "grad_norm": 1.1382461786270142, "learning_rate": 9.173051769444552e-07, "loss": 0.8901, "step": 294050 }, { "epoch": 1.8786655252162578, "grad_norm": 1.0991541147232056, "learning_rate": 9.163486944843536e-07, "loss": 0.7114, "step": 294060 }, { "epoch": 1.8787294123659968, "grad_norm": 1.1713433265686035, "learning_rate": 9.1539270633908e-07, "loss": 0.7156, "step": 294070 }, { "epoch": 1.8787932995157353, "grad_norm": 1.8868058919906616, "learning_rate": 9.14437212518271e-07, "loss": 0.9405, "step": 294080 }, { "epoch": 1.8788571866654742, "grad_norm": 1.1627702713012695, "learning_rate": 9.134822130315413e-07, "loss": 1.0747, "step": 294090 }, { "epoch": 1.8789210738152127, "grad_norm": 1.2614167928695679, "learning_rate": 9.125277078885164e-07, "loss": 0.8957, "step": 294100 }, { "epoch": 1.8789849609649516, "grad_norm": 1.0662238597869873, "learning_rate": 9.115736970987943e-07, "loss": 0.6811, "step": 294110 }, { "epoch": 1.87904884811469, "grad_norm": 1.1519864797592163, "learning_rate": 9.106201806720005e-07, "loss": 1.1223, "step": 294120 }, { "epoch": 1.879112735264429, "grad_norm": 1.1450780630111694, "learning_rate": 9.096671586177274e-07, "loss": 0.8483, "step": 294130 }, { "epoch": 1.8791766224141675, "grad_norm": 0.6603822708129883, "learning_rate": 9.087146309455786e-07, "loss": 0.6462, "step": 294140 }, { "epoch": 1.8792405095639064, "grad_norm": 0.8664895296096802, "learning_rate": 9.077625976651349e-07, "loss": 0.7966, "step": 294150 }, { "epoch": 1.879304396713645, "grad_norm": 1.2432823181152344, "learning_rate": 9.068110587860001e-07, "loss": 0.9826, "step": 294160 }, { "epoch": 1.8793682838633838, "grad_norm": 0.8814300894737244, "learning_rate": 9.058600143177498e-07, "loss": 0.9677, "step": 294170 }, { "epoch": 1.8794321710131223, "grad_norm": 1.6960625648498535, "learning_rate": 9.050044970255411e-07, "loss": 0.8404, "step": 294180 }, { "epoch": 1.8794960581628612, "grad_norm": 2.7894229888916016, "learning_rate": 9.04054391964354e-07, "loss": 0.937, "step": 294190 }, { "epoch": 1.8795599453125997, "grad_norm": 1.0094927549362183, "learning_rate": 9.031047813418125e-07, "loss": 0.7125, "step": 294200 }, { "epoch": 1.8796238324623387, "grad_norm": 1.201365351676941, "learning_rate": 9.021556651674812e-07, "loss": 0.8694, "step": 294210 }, { "epoch": 1.8796877196120771, "grad_norm": 3.078171730041504, "learning_rate": 9.012070434509134e-07, "loss": 0.7851, "step": 294220 }, { "epoch": 1.879751606761816, "grad_norm": 1.129648208618164, "learning_rate": 9.002589162016684e-07, "loss": 0.9309, "step": 294230 }, { "epoch": 1.8798154939115546, "grad_norm": 1.0142879486083984, "learning_rate": 8.993112834292938e-07, "loss": 0.7422, "step": 294240 }, { "epoch": 1.8798793810612935, "grad_norm": 0.6870994567871094, "learning_rate": 8.983641451433378e-07, "loss": 0.77, "step": 294250 }, { "epoch": 1.879943268211032, "grad_norm": 0.812430202960968, "learning_rate": 8.97417501353326e-07, "loss": 1.0045, "step": 294260 }, { "epoch": 1.880007155360771, "grad_norm": 0.6833242774009705, "learning_rate": 8.964713520688061e-07, "loss": 0.9386, "step": 294270 }, { "epoch": 1.8800710425105094, "grad_norm": 1.2980430126190186, "learning_rate": 8.955256972992931e-07, "loss": 0.9414, "step": 294280 }, { "epoch": 1.880134929660248, "grad_norm": 1.0034370422363281, "learning_rate": 8.945805370543292e-07, "loss": 0.8657, "step": 294290 }, { "epoch": 1.8801988168099868, "grad_norm": 1.363470435142517, "learning_rate": 8.936358713434124e-07, "loss": 0.8611, "step": 294300 }, { "epoch": 1.8802627039597255, "grad_norm": 0.93649822473526, "learning_rate": 8.926917001760682e-07, "loss": 0.9878, "step": 294310 }, { "epoch": 1.8803265911094642, "grad_norm": 1.797559142112732, "learning_rate": 8.917480235618003e-07, "loss": 0.8329, "step": 294320 }, { "epoch": 1.880390478259203, "grad_norm": 1.6403647661209106, "learning_rate": 8.908048415101178e-07, "loss": 0.7079, "step": 294330 }, { "epoch": 1.8804543654089416, "grad_norm": 0.9935147166252136, "learning_rate": 8.89862154030513e-07, "loss": 0.8007, "step": 294340 }, { "epoch": 1.8805182525586803, "grad_norm": 1.2044225931167603, "learning_rate": 8.889199611324783e-07, "loss": 1.0715, "step": 294350 }, { "epoch": 1.880582139708419, "grad_norm": 0.8857986330986023, "learning_rate": 8.879782628255173e-07, "loss": 0.695, "step": 294360 }, { "epoch": 1.8806460268581577, "grad_norm": 0.9867647290229797, "learning_rate": 8.870370591190946e-07, "loss": 0.8391, "step": 294370 }, { "epoch": 1.8807099140078964, "grad_norm": 0.795604407787323, "learning_rate": 8.860963500227027e-07, "loss": 0.7813, "step": 294380 }, { "epoch": 1.8807738011576352, "grad_norm": 1.4664380550384521, "learning_rate": 8.85156135545806e-07, "loss": 1.0997, "step": 294390 }, { "epoch": 1.8808376883073739, "grad_norm": 0.8081570863723755, "learning_rate": 8.842164156978861e-07, "loss": 0.8407, "step": 294400 }, { "epoch": 1.8809015754571126, "grad_norm": 0.9665111303329468, "learning_rate": 8.832771904883851e-07, "loss": 0.6138, "step": 294410 }, { "epoch": 1.8809654626068513, "grad_norm": 0.6109456419944763, "learning_rate": 8.823384599267848e-07, "loss": 0.7707, "step": 294420 }, { "epoch": 1.88102934975659, "grad_norm": 1.4124689102172852, "learning_rate": 8.814002240225272e-07, "loss": 1.1025, "step": 294430 }, { "epoch": 1.8810932369063287, "grad_norm": 0.8159995675086975, "learning_rate": 8.804624827850605e-07, "loss": 1.0161, "step": 294440 }, { "epoch": 1.8811571240560674, "grad_norm": 1.37120521068573, "learning_rate": 8.795252362238327e-07, "loss": 0.9202, "step": 294450 }, { "epoch": 1.881221011205806, "grad_norm": 0.7169210910797119, "learning_rate": 8.785884843482806e-07, "loss": 0.9267, "step": 294460 }, { "epoch": 1.8812848983555448, "grad_norm": 2.554936408996582, "learning_rate": 8.776522271678356e-07, "loss": 1.1622, "step": 294470 }, { "epoch": 1.8813487855052835, "grad_norm": 1.1686550378799438, "learning_rate": 8.767164646919346e-07, "loss": 0.7131, "step": 294480 }, { "epoch": 1.8814126726550222, "grad_norm": 0.7276962399482727, "learning_rate": 8.757811969299923e-07, "loss": 0.8749, "step": 294490 }, { "epoch": 1.881476559804761, "grad_norm": 1.421937108039856, "learning_rate": 8.748464238914344e-07, "loss": 1.047, "step": 294500 }, { "epoch": 1.8815404469544996, "grad_norm": 0.8149546980857849, "learning_rate": 8.739121455856703e-07, "loss": 0.7673, "step": 294510 }, { "epoch": 1.8816043341042383, "grad_norm": 1.083941102027893, "learning_rate": 8.729783620221143e-07, "loss": 0.5612, "step": 294520 }, { "epoch": 1.8816682212539768, "grad_norm": 1.2710938453674316, "learning_rate": 8.720450732101649e-07, "loss": 0.8329, "step": 294530 }, { "epoch": 1.8817321084037157, "grad_norm": 0.7315378189086914, "learning_rate": 8.711122791592252e-07, "loss": 0.6684, "step": 294540 }, { "epoch": 1.8817959955534542, "grad_norm": 1.0665138959884644, "learning_rate": 8.701799798786825e-07, "loss": 0.9872, "step": 294550 }, { "epoch": 1.8818598827031932, "grad_norm": 0.810028076171875, "learning_rate": 8.692481753779347e-07, "loss": 0.8548, "step": 294560 }, { "epoch": 1.8819237698529316, "grad_norm": 0.8060693740844727, "learning_rate": 8.683168656663631e-07, "loss": 0.8849, "step": 294570 }, { "epoch": 1.8819876570026706, "grad_norm": 1.5116766691207886, "learning_rate": 8.673860507533437e-07, "loss": 0.9512, "step": 294580 }, { "epoch": 1.882051544152409, "grad_norm": 1.0401395559310913, "learning_rate": 8.664557306482523e-07, "loss": 0.7837, "step": 294590 }, { "epoch": 1.882115431302148, "grad_norm": 0.8519911170005798, "learning_rate": 8.655259053604592e-07, "loss": 0.8609, "step": 294600 }, { "epoch": 1.8821793184518865, "grad_norm": 1.1604868173599243, "learning_rate": 8.64596574899329e-07, "loss": 0.717, "step": 294610 }, { "epoch": 1.8822432056016254, "grad_norm": 1.0016506910324097, "learning_rate": 8.636677392742154e-07, "loss": 1.0582, "step": 294620 }, { "epoch": 1.8823070927513639, "grad_norm": 0.9217401742935181, "learning_rate": 8.627393984944776e-07, "loss": 0.6711, "step": 294630 }, { "epoch": 1.8823709799011028, "grad_norm": 0.6310465931892395, "learning_rate": 8.618115525694637e-07, "loss": 1.0225, "step": 294640 }, { "epoch": 1.8824348670508413, "grad_norm": 1.1562144756317139, "learning_rate": 8.608842015085217e-07, "loss": 0.8545, "step": 294650 }, { "epoch": 1.8824987542005802, "grad_norm": 2.5376696586608887, "learning_rate": 8.599573453209886e-07, "loss": 1.0066, "step": 294660 }, { "epoch": 1.8825626413503187, "grad_norm": 0.8767745494842529, "learning_rate": 8.590309840161903e-07, "loss": 1.0221, "step": 294670 }, { "epoch": 1.8826265285000576, "grad_norm": 0.9438298940658569, "learning_rate": 8.581051176034694e-07, "loss": 0.7347, "step": 294680 }, { "epoch": 1.8826904156497961, "grad_norm": 0.8803073763847351, "learning_rate": 8.571797460921349e-07, "loss": 1.154, "step": 294690 }, { "epoch": 1.882754302799535, "grad_norm": 1.1712591648101807, "learning_rate": 8.56254869491524e-07, "loss": 1.035, "step": 294700 }, { "epoch": 1.8828181899492735, "grad_norm": 0.6817288994789124, "learning_rate": 8.553304878109347e-07, "loss": 0.8175, "step": 294710 }, { "epoch": 1.8828820770990125, "grad_norm": 0.8393722176551819, "learning_rate": 8.54406601059693e-07, "loss": 0.8887, "step": 294720 }, { "epoch": 1.882945964248751, "grad_norm": 0.9503065347671509, "learning_rate": 8.534832092470857e-07, "loss": 0.9214, "step": 294730 }, { "epoch": 1.8830098513984899, "grad_norm": 0.6335180997848511, "learning_rate": 8.525603123824222e-07, "loss": 0.7857, "step": 294740 }, { "epoch": 1.8830737385482283, "grad_norm": 0.7557439804077148, "learning_rate": 8.51637910474995e-07, "loss": 0.8171, "step": 294750 }, { "epoch": 1.8831376256979673, "grad_norm": 0.79386967420578, "learning_rate": 8.507160035340966e-07, "loss": 0.7434, "step": 294760 }, { "epoch": 1.8832015128477058, "grad_norm": 0.8553667664527893, "learning_rate": 8.497945915690031e-07, "loss": 0.9031, "step": 294770 }, { "epoch": 1.8832653999974445, "grad_norm": 1.2695742845535278, "learning_rate": 8.488736745890013e-07, "loss": 1.095, "step": 294780 }, { "epoch": 1.8833292871471832, "grad_norm": 1.114892601966858, "learning_rate": 8.479532526033618e-07, "loss": 0.8603, "step": 294790 }, { "epoch": 1.8833931742969219, "grad_norm": 1.5279147624969482, "learning_rate": 8.470333256213603e-07, "loss": 0.808, "step": 294800 }, { "epoch": 1.8834570614466606, "grad_norm": 0.6107496023178101, "learning_rate": 8.461138936522506e-07, "loss": 0.8576, "step": 294810 }, { "epoch": 1.8835209485963993, "grad_norm": 0.7857877016067505, "learning_rate": 8.45194956705303e-07, "loss": 0.9546, "step": 294820 }, { "epoch": 1.883584835746138, "grad_norm": 1.1471401453018188, "learning_rate": 8.442765147897657e-07, "loss": 0.7312, "step": 294830 }, { "epoch": 1.8836487228958767, "grad_norm": 0.9037813544273376, "learning_rate": 8.433585679148926e-07, "loss": 0.8613, "step": 294840 }, { "epoch": 1.8837126100456154, "grad_norm": 0.8556420207023621, "learning_rate": 8.424411160899204e-07, "loss": 0.8962, "step": 294850 }, { "epoch": 1.8837764971953541, "grad_norm": 0.6740580201148987, "learning_rate": 8.415241593240974e-07, "loss": 1.1488, "step": 294860 }, { "epoch": 1.8838403843450928, "grad_norm": 0.9400308728218079, "learning_rate": 8.406076976266497e-07, "loss": 0.6987, "step": 294870 }, { "epoch": 1.8839042714948315, "grad_norm": 1.5534549951553345, "learning_rate": 8.396917310068086e-07, "loss": 0.8772, "step": 294880 }, { "epoch": 1.8839681586445702, "grad_norm": 1.0593148469924927, "learning_rate": 8.387762594738114e-07, "loss": 0.6944, "step": 294890 }, { "epoch": 1.884032045794309, "grad_norm": 0.9874648451805115, "learning_rate": 8.378612830368615e-07, "loss": 0.7625, "step": 294900 }, { "epoch": 1.8840959329440476, "grad_norm": 0.998270571231842, "learning_rate": 8.369468017051796e-07, "loss": 0.8599, "step": 294910 }, { "epoch": 1.8841598200937864, "grad_norm": 1.26828134059906, "learning_rate": 8.360328154879749e-07, "loss": 0.9925, "step": 294920 }, { "epoch": 1.884223707243525, "grad_norm": 0.8565728068351746, "learning_rate": 8.351193243944566e-07, "loss": 0.8852, "step": 294930 }, { "epoch": 1.8842875943932638, "grad_norm": 0.6440544724464417, "learning_rate": 8.342063284338175e-07, "loss": 0.7831, "step": 294940 }, { "epoch": 1.8843514815430025, "grad_norm": 0.646582841873169, "learning_rate": 8.332938276152613e-07, "loss": 0.9184, "step": 294950 }, { "epoch": 1.8844153686927412, "grad_norm": 0.6677904725074768, "learning_rate": 8.32381821947964e-07, "loss": 0.73, "step": 294960 }, { "epoch": 1.8844792558424799, "grad_norm": 0.7851777076721191, "learning_rate": 8.314703114411182e-07, "loss": 1.1174, "step": 294970 }, { "epoch": 1.8845431429922186, "grad_norm": 1.0177836418151855, "learning_rate": 8.305592961039055e-07, "loss": 0.7214, "step": 294980 }, { "epoch": 1.8846070301419573, "grad_norm": 1.17615807056427, "learning_rate": 8.296487759455019e-07, "loss": 0.8174, "step": 294990 }, { "epoch": 1.884670917291696, "grad_norm": 2.1096794605255127, "learning_rate": 8.287387509750666e-07, "loss": 1.018, "step": 295000 }, { "epoch": 1.8847348044414347, "grad_norm": 1.9853860139846802, "learning_rate": 8.278292212017758e-07, "loss": 0.6983, "step": 295010 }, { "epoch": 1.8847986915911732, "grad_norm": 0.6836848855018616, "learning_rate": 8.269201866347831e-07, "loss": 0.8722, "step": 295020 }, { "epoch": 1.8848625787409121, "grad_norm": 0.900377631187439, "learning_rate": 8.260116472832479e-07, "loss": 0.7465, "step": 295030 }, { "epoch": 1.8849264658906506, "grad_norm": 0.8947538733482361, "learning_rate": 8.251036031563075e-07, "loss": 1.0633, "step": 295040 }, { "epoch": 1.8849903530403895, "grad_norm": 0.8567830324172974, "learning_rate": 8.241960542631266e-07, "loss": 0.966, "step": 295050 }, { "epoch": 1.885054240190128, "grad_norm": 0.9220252633094788, "learning_rate": 8.232890006128313e-07, "loss": 0.9313, "step": 295060 }, { "epoch": 1.885118127339867, "grad_norm": 3.402864456176758, "learning_rate": 8.223824422145587e-07, "loss": 0.8671, "step": 295070 }, { "epoch": 1.8851820144896054, "grad_norm": 0.9252503514289856, "learning_rate": 8.214763790774405e-07, "loss": 0.7996, "step": 295080 }, { "epoch": 1.8852459016393444, "grad_norm": 1.2209160327911377, "learning_rate": 8.20570811210597e-07, "loss": 0.6249, "step": 295090 }, { "epoch": 1.8853097887890828, "grad_norm": 1.0366135835647583, "learning_rate": 8.196657386231543e-07, "loss": 0.8923, "step": 295100 }, { "epoch": 1.8853736759388218, "grad_norm": 1.104676365852356, "learning_rate": 8.187611613242274e-07, "loss": 0.748, "step": 295110 }, { "epoch": 1.8854375630885603, "grad_norm": 1.3151907920837402, "learning_rate": 8.178570793229201e-07, "loss": 0.7379, "step": 295120 }, { "epoch": 1.8855014502382992, "grad_norm": 0.8398836851119995, "learning_rate": 8.169534926283418e-07, "loss": 1.088, "step": 295130 }, { "epoch": 1.8855653373880377, "grad_norm": 3.185319423675537, "learning_rate": 8.160504012495906e-07, "loss": 0.9349, "step": 295140 }, { "epoch": 1.8856292245377766, "grad_norm": 0.7827329039573669, "learning_rate": 8.15147805195765e-07, "loss": 0.7621, "step": 295150 }, { "epoch": 1.885693111687515, "grad_norm": 0.9202792644500732, "learning_rate": 8.142457044759522e-07, "loss": 0.7232, "step": 295160 }, { "epoch": 1.885756998837254, "grad_norm": 1.3906402587890625, "learning_rate": 8.133440990992336e-07, "loss": 0.9122, "step": 295170 }, { "epoch": 1.8858208859869925, "grad_norm": 1.051938533782959, "learning_rate": 8.124429890746965e-07, "loss": 0.8986, "step": 295180 }, { "epoch": 1.8858847731367314, "grad_norm": 1.882930040359497, "learning_rate": 8.115423744114059e-07, "loss": 1.1432, "step": 295190 }, { "epoch": 1.88594866028647, "grad_norm": 1.0550377368927002, "learning_rate": 8.106422551184378e-07, "loss": 0.9013, "step": 295200 }, { "epoch": 1.8860125474362088, "grad_norm": 0.7018422484397888, "learning_rate": 8.097426312048573e-07, "loss": 0.8337, "step": 295210 }, { "epoch": 1.8860764345859473, "grad_norm": 1.2057894468307495, "learning_rate": 8.088435026797292e-07, "loss": 1.0478, "step": 295220 }, { "epoch": 1.8861403217356862, "grad_norm": 0.8332082629203796, "learning_rate": 8.07944869552102e-07, "loss": 0.7315, "step": 295230 }, { "epoch": 1.8862042088854247, "grad_norm": 0.5676252245903015, "learning_rate": 8.070467318310238e-07, "loss": 0.7383, "step": 295240 }, { "epoch": 1.8862680960351634, "grad_norm": 1.2264729738235474, "learning_rate": 8.061490895255431e-07, "loss": 1.0706, "step": 295250 }, { "epoch": 1.8863319831849021, "grad_norm": 0.9976449608802795, "learning_rate": 8.052519426447025e-07, "loss": 0.8886, "step": 295260 }, { "epoch": 1.8863958703346408, "grad_norm": 0.8264930248260498, "learning_rate": 8.043552911975338e-07, "loss": 0.7411, "step": 295270 }, { "epoch": 1.8864597574843796, "grad_norm": 1.149370789527893, "learning_rate": 8.034591351930632e-07, "loss": 0.7386, "step": 295280 }, { "epoch": 1.8865236446341183, "grad_norm": 1.0822536945343018, "learning_rate": 8.025634746403277e-07, "loss": 0.7569, "step": 295290 }, { "epoch": 1.886587531783857, "grad_norm": 0.8716220855712891, "learning_rate": 8.016683095483368e-07, "loss": 0.7104, "step": 295300 }, { "epoch": 1.8866514189335957, "grad_norm": 0.6852596402168274, "learning_rate": 8.007736399261057e-07, "loss": 0.8472, "step": 295310 }, { "epoch": 1.8867153060833344, "grad_norm": 0.975752592086792, "learning_rate": 7.998794657826491e-07, "loss": 0.7972, "step": 295320 }, { "epoch": 1.886779193233073, "grad_norm": 1.0218257904052734, "learning_rate": 7.989857871269768e-07, "loss": 0.8167, "step": 295330 }, { "epoch": 1.8868430803828118, "grad_norm": 1.0164343118667603, "learning_rate": 7.980926039680702e-07, "loss": 0.9823, "step": 295340 }, { "epoch": 1.8869069675325505, "grad_norm": 0.8673834204673767, "learning_rate": 7.971999163149501e-07, "loss": 1.0353, "step": 295350 }, { "epoch": 1.8869708546822892, "grad_norm": 1.0239756107330322, "learning_rate": 7.963077241765815e-07, "loss": 0.81, "step": 295360 }, { "epoch": 1.887034741832028, "grad_norm": 0.7221771478652954, "learning_rate": 7.954160275619682e-07, "loss": 0.6973, "step": 295370 }, { "epoch": 1.8870986289817666, "grad_norm": 0.7318668365478516, "learning_rate": 7.945248264800808e-07, "loss": 0.9796, "step": 295380 }, { "epoch": 1.8871625161315053, "grad_norm": 0.6194974184036255, "learning_rate": 7.936341209399012e-07, "loss": 0.6397, "step": 295390 }, { "epoch": 1.887226403281244, "grad_norm": 0.9081866145133972, "learning_rate": 7.927439109503887e-07, "loss": 0.804, "step": 295400 }, { "epoch": 1.8872902904309827, "grad_norm": 1.4008513689041138, "learning_rate": 7.918541965205195e-07, "loss": 0.9405, "step": 295410 }, { "epoch": 1.8873541775807214, "grad_norm": 1.3504021167755127, "learning_rate": 7.909649776592532e-07, "loss": 0.8039, "step": 295420 }, { "epoch": 1.8874180647304601, "grad_norm": 0.5890411138534546, "learning_rate": 7.900762543755325e-07, "loss": 0.8612, "step": 295430 }, { "epoch": 1.8874819518801988, "grad_norm": 0.855315089225769, "learning_rate": 7.891880266783225e-07, "loss": 0.6389, "step": 295440 }, { "epoch": 1.8875458390299376, "grad_norm": 0.9654882550239563, "learning_rate": 7.883002945765605e-07, "loss": 0.9406, "step": 295450 }, { "epoch": 1.8876097261796763, "grad_norm": 1.4022914171218872, "learning_rate": 7.874130580791949e-07, "loss": 1.3269, "step": 295460 }, { "epoch": 1.887673613329415, "grad_norm": 0.707993745803833, "learning_rate": 7.865263171951465e-07, "loss": 0.9352, "step": 295470 }, { "epoch": 1.8877375004791537, "grad_norm": 2.06457257270813, "learning_rate": 7.856400719333579e-07, "loss": 0.8777, "step": 295480 }, { "epoch": 1.8878013876288924, "grad_norm": 2.3128252029418945, "learning_rate": 7.847543223027498e-07, "loss": 0.8605, "step": 295490 }, { "epoch": 1.887865274778631, "grad_norm": 1.0920239686965942, "learning_rate": 7.83869068312243e-07, "loss": 0.9512, "step": 295500 }, { "epoch": 1.8879291619283696, "grad_norm": 1.3171707391738892, "learning_rate": 7.829843099707524e-07, "loss": 0.8446, "step": 295510 }, { "epoch": 1.8879930490781085, "grad_norm": 0.6091967821121216, "learning_rate": 7.821000472871875e-07, "loss": 0.8654, "step": 295520 }, { "epoch": 1.888056936227847, "grad_norm": 0.8139270544052124, "learning_rate": 7.812162802704582e-07, "loss": 0.698, "step": 295530 }, { "epoch": 1.888120823377586, "grad_norm": 1.0595269203186035, "learning_rate": 7.803330089294569e-07, "loss": 1.1004, "step": 295540 }, { "epoch": 1.8881847105273244, "grad_norm": 0.7380056381225586, "learning_rate": 7.794502332730824e-07, "loss": 1.1522, "step": 295550 }, { "epoch": 1.8882485976770633, "grad_norm": 0.7570953965187073, "learning_rate": 7.785679533102331e-07, "loss": 0.9224, "step": 295560 }, { "epoch": 1.8883124848268018, "grad_norm": 0.9701615571975708, "learning_rate": 7.776861690497794e-07, "loss": 0.5713, "step": 295570 }, { "epoch": 1.8883763719765407, "grad_norm": 1.3707141876220703, "learning_rate": 7.768048805006145e-07, "loss": 1.0388, "step": 295580 }, { "epoch": 1.8884402591262792, "grad_norm": 1.3032087087631226, "learning_rate": 7.759240876716034e-07, "loss": 0.8332, "step": 295590 }, { "epoch": 1.8885041462760181, "grad_norm": 1.7075655460357666, "learning_rate": 7.750437905716279e-07, "loss": 0.9384, "step": 295600 }, { "epoch": 1.8885680334257566, "grad_norm": 0.8511855602264404, "learning_rate": 7.74163989209542e-07, "loss": 0.9488, "step": 295610 }, { "epoch": 1.8886319205754956, "grad_norm": 1.4510815143585205, "learning_rate": 7.732846835942109e-07, "loss": 0.7513, "step": 295620 }, { "epoch": 1.888695807725234, "grad_norm": 0.7361019849777222, "learning_rate": 7.724058737344942e-07, "loss": 0.8163, "step": 295630 }, { "epoch": 1.888759694874973, "grad_norm": 1.0821374654769897, "learning_rate": 7.715275596392402e-07, "loss": 0.8498, "step": 295640 }, { "epoch": 1.8888235820247115, "grad_norm": 0.9581074118614197, "learning_rate": 7.70649741317292e-07, "loss": 0.6997, "step": 295650 }, { "epoch": 1.8888874691744504, "grad_norm": 1.1700917482376099, "learning_rate": 7.697724187774868e-07, "loss": 0.7939, "step": 295660 }, { "epoch": 1.8889513563241889, "grad_norm": 0.6985589265823364, "learning_rate": 7.688955920286623e-07, "loss": 0.9785, "step": 295670 }, { "epoch": 1.8890152434739278, "grad_norm": 1.1156351566314697, "learning_rate": 7.6801926107965e-07, "loss": 0.8195, "step": 295680 }, { "epoch": 1.8890791306236663, "grad_norm": 0.6667960286140442, "learning_rate": 7.671434259392818e-07, "loss": 0.9264, "step": 295690 }, { "epoch": 1.8891430177734052, "grad_norm": 3.8637044429779053, "learning_rate": 7.662680866163619e-07, "loss": 1.2968, "step": 295700 }, { "epoch": 1.8892069049231437, "grad_norm": 0.7520202398300171, "learning_rate": 7.653932431197219e-07, "loss": 0.8864, "step": 295710 }, { "epoch": 1.8892707920728826, "grad_norm": 0.8065469264984131, "learning_rate": 7.645188954581661e-07, "loss": 0.865, "step": 295720 }, { "epoch": 1.889334679222621, "grad_norm": 0.6768770217895508, "learning_rate": 7.636450436404985e-07, "loss": 0.8843, "step": 295730 }, { "epoch": 1.8893985663723598, "grad_norm": 1.8908792734146118, "learning_rate": 7.627716876755176e-07, "loss": 0.9908, "step": 295740 }, { "epoch": 1.8894624535220985, "grad_norm": 1.037677526473999, "learning_rate": 7.618988275720273e-07, "loss": 1.0596, "step": 295750 }, { "epoch": 1.8895263406718372, "grad_norm": 0.7945002317428589, "learning_rate": 7.610264633388098e-07, "loss": 0.9728, "step": 295760 }, { "epoch": 1.889590227821576, "grad_norm": 1.0304378271102905, "learning_rate": 7.601545949846523e-07, "loss": 0.9641, "step": 295770 }, { "epoch": 1.8896541149713146, "grad_norm": 0.7987378239631653, "learning_rate": 7.592832225183421e-07, "loss": 0.8351, "step": 295780 }, { "epoch": 1.8897180021210533, "grad_norm": 1.3686131238937378, "learning_rate": 7.584123459486447e-07, "loss": 0.7295, "step": 295790 }, { "epoch": 1.889781889270792, "grad_norm": 1.25900137424469, "learning_rate": 7.575419652843363e-07, "loss": 0.6627, "step": 295800 }, { "epoch": 1.8898457764205308, "grad_norm": 0.8558017015457153, "learning_rate": 7.56672080534182e-07, "loss": 0.9229, "step": 295810 }, { "epoch": 1.8899096635702695, "grad_norm": 0.9014974236488342, "learning_rate": 7.558026917069416e-07, "loss": 0.944, "step": 295820 }, { "epoch": 1.8899735507200082, "grad_norm": 0.7495697140693665, "learning_rate": 7.549337988113691e-07, "loss": 0.9312, "step": 295830 }, { "epoch": 1.8900374378697469, "grad_norm": 2.5676043033599854, "learning_rate": 7.540654018562188e-07, "loss": 1.0228, "step": 295840 }, { "epoch": 1.8901013250194856, "grad_norm": 1.3730984926223755, "learning_rate": 7.531975008502279e-07, "loss": 0.7373, "step": 295850 }, { "epoch": 1.8901652121692243, "grad_norm": 0.9089881181716919, "learning_rate": 7.523300958021451e-07, "loss": 0.812, "step": 295860 }, { "epoch": 1.890229099318963, "grad_norm": 1.2313027381896973, "learning_rate": 7.514631867207078e-07, "loss": 0.6691, "step": 295870 }, { "epoch": 1.8902929864687017, "grad_norm": 0.9636072516441345, "learning_rate": 7.505967736146369e-07, "loss": 1.005, "step": 295880 }, { "epoch": 1.8903568736184404, "grad_norm": 0.7294650077819824, "learning_rate": 7.497308564926641e-07, "loss": 0.8859, "step": 295890 }, { "epoch": 1.890420760768179, "grad_norm": 0.9986069798469543, "learning_rate": 7.488654353635105e-07, "loss": 0.5529, "step": 295900 }, { "epoch": 1.8904846479179178, "grad_norm": 0.9179570078849792, "learning_rate": 7.480005102358911e-07, "loss": 0.7446, "step": 295910 }, { "epoch": 1.8905485350676565, "grad_norm": 0.981963038444519, "learning_rate": 7.471360811185157e-07, "loss": 0.8282, "step": 295920 }, { "epoch": 1.8906124222173952, "grad_norm": 0.8185356259346008, "learning_rate": 7.462721480200885e-07, "loss": 1.0243, "step": 295930 }, { "epoch": 1.890676309367134, "grad_norm": 0.9393899440765381, "learning_rate": 7.45408710949308e-07, "loss": 0.9815, "step": 295940 }, { "epoch": 1.8907401965168726, "grad_norm": 0.8965507745742798, "learning_rate": 7.445457699148783e-07, "loss": 0.7835, "step": 295950 }, { "epoch": 1.8908040836666113, "grad_norm": 0.8095226287841797, "learning_rate": 7.436833249254816e-07, "loss": 0.8778, "step": 295960 }, { "epoch": 1.89086797081635, "grad_norm": 1.0888183116912842, "learning_rate": 7.428213759898106e-07, "loss": 0.9133, "step": 295970 }, { "epoch": 1.8909318579660885, "grad_norm": 1.2279393672943115, "learning_rate": 7.419599231165364e-07, "loss": 0.7502, "step": 295980 }, { "epoch": 1.8909957451158275, "grad_norm": 0.6059055924415588, "learning_rate": 7.410989663143464e-07, "loss": 1.0614, "step": 295990 }, { "epoch": 1.891059632265566, "grad_norm": 0.8721667528152466, "learning_rate": 7.402385055919003e-07, "loss": 0.4989, "step": 296000 }, { "epoch": 1.8911235194153049, "grad_norm": 0.5935079455375671, "learning_rate": 7.393785409578691e-07, "loss": 0.9195, "step": 296010 }, { "epoch": 1.8911874065650434, "grad_norm": 0.7822202444076538, "learning_rate": 7.385190724209123e-07, "loss": 0.9554, "step": 296020 }, { "epoch": 1.8912512937147823, "grad_norm": 2.311864137649536, "learning_rate": 7.376600999896899e-07, "loss": 0.8155, "step": 296030 }, { "epoch": 1.8913151808645208, "grad_norm": 0.9179143309593201, "learning_rate": 7.368016236728392e-07, "loss": 0.8417, "step": 296040 }, { "epoch": 1.8913790680142597, "grad_norm": 1.1837308406829834, "learning_rate": 7.359436434790257e-07, "loss": 0.8029, "step": 296050 }, { "epoch": 1.8914429551639982, "grad_norm": 1.0275431871414185, "learning_rate": 7.350861594168701e-07, "loss": 0.7866, "step": 296060 }, { "epoch": 1.8915068423137371, "grad_norm": 0.9733939170837402, "learning_rate": 7.342291714950211e-07, "loss": 0.9155, "step": 296070 }, { "epoch": 1.8915707294634756, "grad_norm": 0.6989685893058777, "learning_rate": 7.333726797221053e-07, "loss": 0.7643, "step": 296080 }, { "epoch": 1.8916346166132145, "grad_norm": 1.013899564743042, "learning_rate": 7.325166841067487e-07, "loss": 0.9916, "step": 296090 }, { "epoch": 1.891698503762953, "grad_norm": 0.8235689401626587, "learning_rate": 7.316611846575672e-07, "loss": 0.9801, "step": 296100 }, { "epoch": 1.891762390912692, "grad_norm": 1.1039879322052002, "learning_rate": 7.308061813831868e-07, "loss": 0.8541, "step": 296110 }, { "epoch": 1.8918262780624304, "grad_norm": 0.9536022543907166, "learning_rate": 7.299516742922119e-07, "loss": 0.6973, "step": 296120 }, { "epoch": 1.8918901652121694, "grad_norm": 0.7637679576873779, "learning_rate": 7.290976633932411e-07, "loss": 0.8889, "step": 296130 }, { "epoch": 1.8919540523619078, "grad_norm": 0.8953757286071777, "learning_rate": 7.282441486948899e-07, "loss": 1.065, "step": 296140 }, { "epoch": 1.8920179395116468, "grad_norm": 1.9607559442520142, "learning_rate": 7.273911302057457e-07, "loss": 0.8208, "step": 296150 }, { "epoch": 1.8920818266613852, "grad_norm": 0.8156129717826843, "learning_rate": 7.265386079343961e-07, "loss": 1.0972, "step": 296160 }, { "epoch": 1.8921457138111242, "grad_norm": 1.9369901418685913, "learning_rate": 7.256865818894288e-07, "loss": 0.8655, "step": 296170 }, { "epoch": 1.8922096009608627, "grad_norm": 0.618569552898407, "learning_rate": 7.248350520794312e-07, "loss": 0.7582, "step": 296180 }, { "epoch": 1.8922734881106016, "grad_norm": 0.8822636604309082, "learning_rate": 7.239840185129687e-07, "loss": 0.8655, "step": 296190 }, { "epoch": 1.89233737526034, "grad_norm": 1.2168229818344116, "learning_rate": 7.231334811986234e-07, "loss": 0.7847, "step": 296200 }, { "epoch": 1.892401262410079, "grad_norm": 0.5056108236312866, "learning_rate": 7.222834401449496e-07, "loss": 0.8557, "step": 296210 }, { "epoch": 1.8924651495598175, "grad_norm": 0.9576126337051392, "learning_rate": 7.214338953605127e-07, "loss": 0.7553, "step": 296220 }, { "epoch": 1.8925290367095562, "grad_norm": 0.8779643177986145, "learning_rate": 7.205848468538723e-07, "loss": 0.9253, "step": 296230 }, { "epoch": 1.892592923859295, "grad_norm": 1.0522737503051758, "learning_rate": 7.197362946335718e-07, "loss": 0.7269, "step": 296240 }, { "epoch": 1.8926568110090336, "grad_norm": 1.0928726196289062, "learning_rate": 7.188882387081597e-07, "loss": 0.8349, "step": 296250 }, { "epoch": 1.8927206981587723, "grad_norm": 0.9813924431800842, "learning_rate": 7.180406790861794e-07, "loss": 1.1308, "step": 296260 }, { "epoch": 1.892784585308511, "grad_norm": 0.6593855023384094, "learning_rate": 7.171936157761628e-07, "loss": 0.9651, "step": 296270 }, { "epoch": 1.8928484724582497, "grad_norm": 0.9737311601638794, "learning_rate": 7.16347048786642e-07, "loss": 0.9277, "step": 296280 }, { "epoch": 1.8929123596079884, "grad_norm": 1.488356351852417, "learning_rate": 7.155009781261435e-07, "loss": 1.0415, "step": 296290 }, { "epoch": 1.8929762467577271, "grad_norm": 0.7735053300857544, "learning_rate": 7.146554038031883e-07, "loss": 0.8097, "step": 296300 }, { "epoch": 1.8930401339074658, "grad_norm": 1.3010677099227905, "learning_rate": 7.138948112881683e-07, "loss": 0.9612, "step": 296310 }, { "epoch": 1.8931040210572045, "grad_norm": 0.6956140995025635, "learning_rate": 7.130501800300004e-07, "loss": 1.1993, "step": 296320 }, { "epoch": 1.8931679082069433, "grad_norm": 0.8052523732185364, "learning_rate": 7.122060451340562e-07, "loss": 0.726, "step": 296330 }, { "epoch": 1.893231795356682, "grad_norm": 0.8581894636154175, "learning_rate": 7.113624066088342e-07, "loss": 0.8463, "step": 296340 }, { "epoch": 1.8932956825064207, "grad_norm": 1.2147918939590454, "learning_rate": 7.105192644628389e-07, "loss": 0.9161, "step": 296350 }, { "epoch": 1.8933595696561594, "grad_norm": 1.007676362991333, "learning_rate": 7.096766187045467e-07, "loss": 1.032, "step": 296360 }, { "epoch": 1.893423456805898, "grad_norm": 1.0709525346755981, "learning_rate": 7.08834469342462e-07, "loss": 0.9742, "step": 296370 }, { "epoch": 1.8934873439556368, "grad_norm": 0.7088238596916199, "learning_rate": 7.079928163850558e-07, "loss": 1.0639, "step": 296380 }, { "epoch": 1.8935512311053755, "grad_norm": 1.4133071899414062, "learning_rate": 7.07151659840799e-07, "loss": 0.6822, "step": 296390 }, { "epoch": 1.8936151182551142, "grad_norm": 1.2081222534179688, "learning_rate": 7.063109997181793e-07, "loss": 0.9155, "step": 296400 }, { "epoch": 1.893679005404853, "grad_norm": 1.5343451499938965, "learning_rate": 7.054708360256457e-07, "loss": 0.9854, "step": 296410 }, { "epoch": 1.8937428925545916, "grad_norm": 0.7808114886283875, "learning_rate": 7.046311687716689e-07, "loss": 0.9595, "step": 296420 }, { "epoch": 1.8938067797043303, "grad_norm": 0.7206653356552124, "learning_rate": 7.037919979647034e-07, "loss": 0.5652, "step": 296430 }, { "epoch": 1.893870666854069, "grad_norm": 0.6024944186210632, "learning_rate": 7.029533236132035e-07, "loss": 0.8725, "step": 296440 }, { "epoch": 1.8939345540038077, "grad_norm": 3.378950357437134, "learning_rate": 7.02115145725607e-07, "loss": 0.8943, "step": 296450 }, { "epoch": 1.8939984411535464, "grad_norm": 0.7265892624855042, "learning_rate": 7.012774643103571e-07, "loss": 0.9218, "step": 296460 }, { "epoch": 1.894062328303285, "grad_norm": 1.143747091293335, "learning_rate": 7.004402793758968e-07, "loss": 1.0715, "step": 296470 }, { "epoch": 1.8941262154530238, "grad_norm": 0.8175809979438782, "learning_rate": 6.99603590930653e-07, "loss": 0.7293, "step": 296480 }, { "epoch": 1.8941901026027623, "grad_norm": 0.9248138666152954, "learning_rate": 6.987673989830523e-07, "loss": 0.8777, "step": 296490 }, { "epoch": 1.8942539897525013, "grad_norm": 0.9757458567619324, "learning_rate": 6.979317035415156e-07, "loss": 0.7873, "step": 296500 }, { "epoch": 1.8943178769022397, "grad_norm": 2.3840856552124023, "learning_rate": 6.970965046144528e-07, "loss": 0.7192, "step": 296510 }, { "epoch": 1.8943817640519787, "grad_norm": 1.2808120250701904, "learning_rate": 6.962618022102907e-07, "loss": 0.8615, "step": 296520 }, { "epoch": 1.8944456512017172, "grad_norm": 0.7400955557823181, "learning_rate": 6.954275963374168e-07, "loss": 0.9027, "step": 296530 }, { "epoch": 1.894509538351456, "grad_norm": 0.9748068451881409, "learning_rate": 6.945938870042524e-07, "loss": 0.6593, "step": 296540 }, { "epoch": 1.8945734255011946, "grad_norm": 0.9538581371307373, "learning_rate": 6.937606742191738e-07, "loss": 0.8944, "step": 296550 }, { "epoch": 1.8946373126509335, "grad_norm": 0.9795793890953064, "learning_rate": 6.929279579905856e-07, "loss": 1.0333, "step": 296560 }, { "epoch": 1.894701199800672, "grad_norm": 0.6251556277275085, "learning_rate": 6.920957383268645e-07, "loss": 0.8936, "step": 296570 }, { "epoch": 1.894765086950411, "grad_norm": 0.9500271677970886, "learning_rate": 6.912640152363981e-07, "loss": 1.1232, "step": 296580 }, { "epoch": 1.8948289741001494, "grad_norm": 0.9892424941062927, "learning_rate": 6.904327887275686e-07, "loss": 0.8446, "step": 296590 }, { "epoch": 1.8948928612498883, "grad_norm": 1.1366146802902222, "learning_rate": 6.896020588087304e-07, "loss": 0.8872, "step": 296600 }, { "epoch": 1.8949567483996268, "grad_norm": 0.8495091199874878, "learning_rate": 6.887718254882658e-07, "loss": 0.9404, "step": 296610 }, { "epoch": 1.8950206355493657, "grad_norm": 1.9020293951034546, "learning_rate": 6.879420887745235e-07, "loss": 1.0483, "step": 296620 }, { "epoch": 1.8950845226991042, "grad_norm": 2.187201738357544, "learning_rate": 6.871128486758694e-07, "loss": 0.8387, "step": 296630 }, { "epoch": 1.8951484098488431, "grad_norm": 1.4816763401031494, "learning_rate": 6.862841052006519e-07, "loss": 0.7488, "step": 296640 }, { "epoch": 1.8952122969985816, "grad_norm": 1.045585036277771, "learning_rate": 6.854558583572146e-07, "loss": 0.6764, "step": 296650 }, { "epoch": 1.8952761841483206, "grad_norm": 1.1862424612045288, "learning_rate": 6.846281081538952e-07, "loss": 0.7465, "step": 296660 }, { "epoch": 1.895340071298059, "grad_norm": 0.6618155837059021, "learning_rate": 6.838008545990426e-07, "loss": 1.0474, "step": 296670 }, { "epoch": 1.895403958447798, "grad_norm": 0.9749765396118164, "learning_rate": 6.829740977009724e-07, "loss": 0.9448, "step": 296680 }, { "epoch": 1.8954678455975364, "grad_norm": 1.0106642246246338, "learning_rate": 6.821478374680223e-07, "loss": 0.771, "step": 296690 }, { "epoch": 1.8955317327472754, "grad_norm": 0.9189677834510803, "learning_rate": 6.81322073908508e-07, "loss": 0.9741, "step": 296700 }, { "epoch": 1.8955956198970139, "grad_norm": 1.4712313413619995, "learning_rate": 6.804968070307505e-07, "loss": 0.8101, "step": 296710 }, { "epoch": 1.8956595070467526, "grad_norm": 1.327321171760559, "learning_rate": 6.796720368430542e-07, "loss": 0.7703, "step": 296720 }, { "epoch": 1.8957233941964913, "grad_norm": 0.8706001043319702, "learning_rate": 6.788477633537293e-07, "loss": 0.9648, "step": 296730 }, { "epoch": 1.89578728134623, "grad_norm": 0.8188433051109314, "learning_rate": 6.780239865710747e-07, "loss": 0.8005, "step": 296740 }, { "epoch": 1.8958511684959687, "grad_norm": 1.1353031396865845, "learning_rate": 6.772007065033947e-07, "loss": 0.8646, "step": 296750 }, { "epoch": 1.8959150556457074, "grad_norm": 1.0272502899169922, "learning_rate": 6.763779231589717e-07, "loss": 1.0035, "step": 296760 }, { "epoch": 1.895978942795446, "grad_norm": 0.8530550599098206, "learning_rate": 6.75555636546088e-07, "loss": 0.7951, "step": 296770 }, { "epoch": 1.8960428299451848, "grad_norm": 0.9556049108505249, "learning_rate": 6.747338466730369e-07, "loss": 0.9092, "step": 296780 }, { "epoch": 1.8961067170949235, "grad_norm": 1.1247327327728271, "learning_rate": 6.739125535480839e-07, "loss": 0.9113, "step": 296790 }, { "epoch": 1.8961706042446622, "grad_norm": 1.8721996545791626, "learning_rate": 6.730917571795059e-07, "loss": 1.0448, "step": 296800 }, { "epoch": 1.896234491394401, "grad_norm": 0.783287763595581, "learning_rate": 6.722714575755684e-07, "loss": 0.7878, "step": 296810 }, { "epoch": 1.8962983785441396, "grad_norm": 0.8555763959884644, "learning_rate": 6.714516547445315e-07, "loss": 0.7399, "step": 296820 }, { "epoch": 1.8963622656938783, "grad_norm": 0.8863575458526611, "learning_rate": 6.706323486946553e-07, "loss": 0.743, "step": 296830 }, { "epoch": 1.896426152843617, "grad_norm": 0.8079519867897034, "learning_rate": 6.698135394341832e-07, "loss": 0.7099, "step": 296840 }, { "epoch": 1.8964900399933557, "grad_norm": 1.180772304534912, "learning_rate": 6.68995226971364e-07, "loss": 0.7238, "step": 296850 }, { "epoch": 1.8965539271430945, "grad_norm": 0.9137830138206482, "learning_rate": 6.681774113144468e-07, "loss": 0.8114, "step": 296860 }, { "epoch": 1.8966178142928332, "grad_norm": 1.2317211627960205, "learning_rate": 6.673600924716528e-07, "loss": 1.0328, "step": 296870 }, { "epoch": 1.8966817014425719, "grad_norm": 1.8075276613235474, "learning_rate": 6.665432704512309e-07, "loss": 0.7587, "step": 296880 }, { "epoch": 1.8967455885923106, "grad_norm": 0.7599256634712219, "learning_rate": 6.657269452613856e-07, "loss": 1.0978, "step": 296890 }, { "epoch": 1.8968094757420493, "grad_norm": 1.4767416715621948, "learning_rate": 6.649111169103606e-07, "loss": 0.6445, "step": 296900 }, { "epoch": 1.896873362891788, "grad_norm": 0.8817050457000732, "learning_rate": 6.640957854063601e-07, "loss": 0.6194, "step": 296910 }, { "epoch": 1.8969372500415267, "grad_norm": 0.8108386993408203, "learning_rate": 6.632809507575888e-07, "loss": 0.8546, "step": 296920 }, { "epoch": 1.8970011371912654, "grad_norm": 0.8253944516181946, "learning_rate": 6.624666129722678e-07, "loss": 0.7681, "step": 296930 }, { "epoch": 1.897065024341004, "grad_norm": 0.8359985947608948, "learning_rate": 6.616527720585908e-07, "loss": 0.8461, "step": 296940 }, { "epoch": 1.8971289114907428, "grad_norm": 0.5892947316169739, "learning_rate": 6.608394280247565e-07, "loss": 0.6485, "step": 296950 }, { "epoch": 1.8971927986404813, "grad_norm": 0.7966788411140442, "learning_rate": 6.600265808789475e-07, "loss": 0.8482, "step": 296960 }, { "epoch": 1.8972566857902202, "grad_norm": 1.2839607000350952, "learning_rate": 6.592142306293569e-07, "loss": 0.7302, "step": 296970 }, { "epoch": 1.8973205729399587, "grad_norm": 0.5650898814201355, "learning_rate": 6.584023772841674e-07, "loss": 0.7182, "step": 296980 }, { "epoch": 1.8973844600896976, "grad_norm": 3.5387778282165527, "learning_rate": 6.575910208515557e-07, "loss": 0.8163, "step": 296990 }, { "epoch": 1.8974483472394361, "grad_norm": 1.5479587316513062, "learning_rate": 6.567801613396817e-07, "loss": 0.9965, "step": 297000 }, { "epoch": 1.897512234389175, "grad_norm": 0.8099126815795898, "learning_rate": 6.559697987567226e-07, "loss": 0.9669, "step": 297010 }, { "epoch": 1.8975761215389135, "grad_norm": 1.0780574083328247, "learning_rate": 6.551599331108382e-07, "loss": 0.9857, "step": 297020 }, { "epoch": 1.8976400086886525, "grad_norm": 0.7173043489456177, "learning_rate": 6.543505644101833e-07, "loss": 0.8413, "step": 297030 }, { "epoch": 1.897703895838391, "grad_norm": 0.9796221852302551, "learning_rate": 6.535416926629067e-07, "loss": 0.6674, "step": 297040 }, { "epoch": 1.8977677829881299, "grad_norm": 1.4020726680755615, "learning_rate": 6.527333178771577e-07, "loss": 0.8466, "step": 297050 }, { "epoch": 1.8978316701378684, "grad_norm": 1.3557363748550415, "learning_rate": 6.519254400610686e-07, "loss": 1.1703, "step": 297060 }, { "epoch": 1.8978955572876073, "grad_norm": 1.3132637739181519, "learning_rate": 6.511180592227939e-07, "loss": 0.8213, "step": 297070 }, { "epoch": 1.8979594444373458, "grad_norm": 1.5618770122528076, "learning_rate": 6.503111753704439e-07, "loss": 0.7792, "step": 297080 }, { "epoch": 1.8980233315870847, "grad_norm": 0.9405543804168701, "learning_rate": 6.495047885121563e-07, "loss": 0.8413, "step": 297090 }, { "epoch": 1.8980872187368232, "grad_norm": 1.0824949741363525, "learning_rate": 6.486988986560527e-07, "loss": 0.8309, "step": 297100 }, { "epoch": 1.898151105886562, "grad_norm": 1.8957220315933228, "learning_rate": 6.478935058102375e-07, "loss": 0.8497, "step": 297110 }, { "epoch": 1.8982149930363006, "grad_norm": 3.8873727321624756, "learning_rate": 6.470886099828432e-07, "loss": 0.8598, "step": 297120 }, { "epoch": 1.8982788801860395, "grad_norm": 1.3968037366867065, "learning_rate": 6.462842111819523e-07, "loss": 0.9953, "step": 297130 }, { "epoch": 1.898342767335778, "grad_norm": 0.8467806577682495, "learning_rate": 6.454803094156803e-07, "loss": 0.8903, "step": 297140 }, { "epoch": 1.898406654485517, "grad_norm": 0.9526151418685913, "learning_rate": 6.446769046921208e-07, "loss": 0.7577, "step": 297150 }, { "epoch": 1.8984705416352554, "grad_norm": 0.5876900553703308, "learning_rate": 6.438739970193619e-07, "loss": 0.7978, "step": 297160 }, { "epoch": 1.8985344287849943, "grad_norm": 0.9410789608955383, "learning_rate": 6.430715864054915e-07, "loss": 0.8442, "step": 297170 }, { "epoch": 1.8985983159347328, "grad_norm": 0.5718557834625244, "learning_rate": 6.42269672858592e-07, "loss": 0.7736, "step": 297180 }, { "epoch": 1.8986622030844718, "grad_norm": 1.0744683742523193, "learning_rate": 6.414682563867347e-07, "loss": 0.8258, "step": 297190 }, { "epoch": 1.8987260902342102, "grad_norm": 0.9959836602210999, "learning_rate": 6.40667336997991e-07, "loss": 0.8789, "step": 297200 }, { "epoch": 1.898789977383949, "grad_norm": 0.6707397103309631, "learning_rate": 6.398669147004321e-07, "loss": 0.8412, "step": 297210 }, { "epoch": 1.8988538645336877, "grad_norm": 1.135922908782959, "learning_rate": 6.390669895021184e-07, "loss": 0.7758, "step": 297220 }, { "epoch": 1.8989177516834264, "grad_norm": 1.4174561500549316, "learning_rate": 6.382675614110989e-07, "loss": 0.8635, "step": 297230 }, { "epoch": 1.898981638833165, "grad_norm": 2.0364394187927246, "learning_rate": 6.374686304354338e-07, "loss": 0.644, "step": 297240 }, { "epoch": 1.8990455259829038, "grad_norm": 1.104007363319397, "learning_rate": 6.366701965831612e-07, "loss": 0.8382, "step": 297250 }, { "epoch": 1.8991094131326425, "grad_norm": 1.1201772689819336, "learning_rate": 6.358722598623246e-07, "loss": 0.9077, "step": 297260 }, { "epoch": 1.8991733002823812, "grad_norm": 0.5292360782623291, "learning_rate": 6.35074820280962e-07, "loss": 0.7113, "step": 297270 }, { "epoch": 1.8992371874321199, "grad_norm": 1.2881067991256714, "learning_rate": 6.342778778471004e-07, "loss": 0.8994, "step": 297280 }, { "epoch": 1.8993010745818586, "grad_norm": 0.8692072033882141, "learning_rate": 6.334814325687721e-07, "loss": 0.9001, "step": 297290 }, { "epoch": 1.8993649617315973, "grad_norm": 0.9872758984565735, "learning_rate": 6.326854844539876e-07, "loss": 0.8542, "step": 297300 }, { "epoch": 1.899428848881336, "grad_norm": 1.0740875005722046, "learning_rate": 6.318900335107736e-07, "loss": 0.8472, "step": 297310 }, { "epoch": 1.8994927360310747, "grad_norm": 0.7466246485710144, "learning_rate": 6.310950797471349e-07, "loss": 0.7936, "step": 297320 }, { "epoch": 1.8995566231808134, "grad_norm": 0.9600816965103149, "learning_rate": 6.303006231710818e-07, "loss": 0.7065, "step": 297330 }, { "epoch": 1.8996205103305521, "grad_norm": 0.7528518438339233, "learning_rate": 6.295066637906077e-07, "loss": 0.788, "step": 297340 }, { "epoch": 1.8996843974802908, "grad_norm": 0.9469917416572571, "learning_rate": 6.287132016137177e-07, "loss": 0.83, "step": 297350 }, { "epoch": 1.8997482846300295, "grad_norm": 0.7395723462104797, "learning_rate": 6.279202366483939e-07, "loss": 0.8339, "step": 297360 }, { "epoch": 1.8998121717797682, "grad_norm": 0.881351113319397, "learning_rate": 6.2712776890263e-07, "loss": 0.7541, "step": 297370 }, { "epoch": 1.899876058929507, "grad_norm": 0.8257976770401001, "learning_rate": 6.263357983843976e-07, "loss": 1.041, "step": 297380 }, { "epoch": 1.8999399460792457, "grad_norm": 0.9281383156776428, "learning_rate": 6.255443251016846e-07, "loss": 0.7552, "step": 297390 }, { "epoch": 1.9000038332289844, "grad_norm": 1.1832506656646729, "learning_rate": 6.247533490624513e-07, "loss": 0.679, "step": 297400 }, { "epoch": 1.900067720378723, "grad_norm": 0.9818503856658936, "learning_rate": 6.239628702746691e-07, "loss": 0.6673, "step": 297410 }, { "epoch": 1.9001316075284618, "grad_norm": 0.8966130018234253, "learning_rate": 6.231728887462929e-07, "loss": 0.7171, "step": 297420 }, { "epoch": 1.9001954946782005, "grad_norm": 0.9654017090797424, "learning_rate": 6.223834044852883e-07, "loss": 0.7999, "step": 297430 }, { "epoch": 1.9002593818279392, "grad_norm": 1.1915733814239502, "learning_rate": 6.215944174995992e-07, "loss": 0.7432, "step": 297440 }, { "epoch": 1.9003232689776777, "grad_norm": 0.6636626124382019, "learning_rate": 6.208059277971689e-07, "loss": 0.9614, "step": 297450 }, { "epoch": 1.9003871561274166, "grad_norm": 0.7218161821365356, "learning_rate": 6.20017935385947e-07, "loss": 0.6887, "step": 297460 }, { "epoch": 1.900451043277155, "grad_norm": 2.2991743087768555, "learning_rate": 6.192304402738603e-07, "loss": 0.97, "step": 297470 }, { "epoch": 1.900514930426894, "grad_norm": 0.8845507502555847, "learning_rate": 6.184434424688467e-07, "loss": 0.8686, "step": 297480 }, { "epoch": 1.9005788175766325, "grad_norm": 0.7170679569244385, "learning_rate": 6.176569419788281e-07, "loss": 0.9057, "step": 297490 }, { "epoch": 1.9006427047263714, "grad_norm": 0.8707745671272278, "learning_rate": 6.168709388117255e-07, "loss": 0.8525, "step": 297500 }, { "epoch": 1.90070659187611, "grad_norm": 1.2940828800201416, "learning_rate": 6.160854329754551e-07, "loss": 0.6402, "step": 297510 }, { "epoch": 1.9007704790258488, "grad_norm": 1.2797635793685913, "learning_rate": 6.15300424477927e-07, "loss": 0.8512, "step": 297520 }, { "epoch": 1.9008343661755873, "grad_norm": 0.953954815864563, "learning_rate": 6.145159133270461e-07, "loss": 0.9747, "step": 297530 }, { "epoch": 1.9008982533253262, "grad_norm": 1.0358965396881104, "learning_rate": 6.137318995307173e-07, "loss": 0.8922, "step": 297540 }, { "epoch": 1.9009621404750647, "grad_norm": 1.3977952003479004, "learning_rate": 6.129483830968285e-07, "loss": 0.8189, "step": 297550 }, { "epoch": 1.9010260276248037, "grad_norm": 0.7680107951164246, "learning_rate": 6.121653640332848e-07, "loss": 0.9159, "step": 297560 }, { "epoch": 1.9010899147745421, "grad_norm": 1.2032699584960938, "learning_rate": 6.113828423479517e-07, "loss": 1.0925, "step": 297570 }, { "epoch": 1.901153801924281, "grad_norm": 1.2679545879364014, "learning_rate": 6.106008180487288e-07, "loss": 0.9924, "step": 297580 }, { "epoch": 1.9012176890740196, "grad_norm": 0.7097789645195007, "learning_rate": 6.098192911434763e-07, "loss": 1.1285, "step": 297590 }, { "epoch": 1.9012815762237585, "grad_norm": 1.2766450643539429, "learning_rate": 6.090382616400825e-07, "loss": 1.1143, "step": 297600 }, { "epoch": 1.901345463373497, "grad_norm": 0.8973786234855652, "learning_rate": 6.082577295463909e-07, "loss": 0.549, "step": 297610 }, { "epoch": 1.901409350523236, "grad_norm": 1.1079696416854858, "learning_rate": 6.074776948702843e-07, "loss": 0.7188, "step": 297620 }, { "epoch": 1.9014732376729744, "grad_norm": 1.3107043504714966, "learning_rate": 6.066981576196007e-07, "loss": 0.9424, "step": 297630 }, { "epoch": 1.9015371248227133, "grad_norm": 1.010285496711731, "learning_rate": 6.059191178022005e-07, "loss": 1.2047, "step": 297640 }, { "epoch": 1.9016010119724518, "grad_norm": 1.1612452268600464, "learning_rate": 6.051405754259276e-07, "loss": 1.06, "step": 297650 }, { "epoch": 1.9016648991221907, "grad_norm": 1.245816946029663, "learning_rate": 6.0436253049862e-07, "loss": 0.7431, "step": 297660 }, { "epoch": 1.9017287862719292, "grad_norm": 0.9382659792900085, "learning_rate": 6.03584983028116e-07, "loss": 0.7355, "step": 297670 }, { "epoch": 1.901792673421668, "grad_norm": 1.9679533243179321, "learning_rate": 6.028079330222425e-07, "loss": 0.7824, "step": 297680 }, { "epoch": 1.9018565605714066, "grad_norm": 1.0482052564620972, "learning_rate": 6.020313804888323e-07, "loss": 0.8567, "step": 297690 }, { "epoch": 1.9019204477211453, "grad_norm": 0.8708074688911438, "learning_rate": 6.012553254356957e-07, "loss": 0.7518, "step": 297700 }, { "epoch": 1.901984334870884, "grad_norm": 0.7268311381340027, "learning_rate": 6.004797678706598e-07, "loss": 0.7066, "step": 297710 }, { "epoch": 1.9020482220206227, "grad_norm": 0.9656032919883728, "learning_rate": 5.997047078015295e-07, "loss": 0.7627, "step": 297720 }, { "epoch": 1.9021121091703614, "grad_norm": 0.854823648929596, "learning_rate": 5.989301452361096e-07, "loss": 0.9002, "step": 297730 }, { "epoch": 1.9021759963201001, "grad_norm": 1.0799676179885864, "learning_rate": 5.981560801821995e-07, "loss": 1.0072, "step": 297740 }, { "epoch": 1.9022398834698389, "grad_norm": 4.408614158630371, "learning_rate": 5.97382512647593e-07, "loss": 0.9735, "step": 297750 }, { "epoch": 1.9023037706195776, "grad_norm": 0.6965596675872803, "learning_rate": 5.966094426400892e-07, "loss": 0.935, "step": 297760 }, { "epoch": 1.9023676577693163, "grad_norm": 1.042432188987732, "learning_rate": 5.958368701674655e-07, "loss": 0.7964, "step": 297770 }, { "epoch": 1.902431544919055, "grad_norm": 0.9587807655334473, "learning_rate": 5.950647952375043e-07, "loss": 1.0627, "step": 297780 }, { "epoch": 1.9024954320687937, "grad_norm": 1.1214818954467773, "learning_rate": 5.94293217857983e-07, "loss": 0.9112, "step": 297790 }, { "epoch": 1.9025593192185324, "grad_norm": 0.829887866973877, "learning_rate": 5.935221380366729e-07, "loss": 0.8339, "step": 297800 }, { "epoch": 1.902623206368271, "grad_norm": 0.9119672179222107, "learning_rate": 5.927515557813345e-07, "loss": 1.0159, "step": 297810 }, { "epoch": 1.9026870935180098, "grad_norm": 0.8453476428985596, "learning_rate": 5.91981471099734e-07, "loss": 0.8833, "step": 297820 }, { "epoch": 1.9027509806677485, "grad_norm": 1.4447675943374634, "learning_rate": 5.912118839996261e-07, "loss": 0.6944, "step": 297830 }, { "epoch": 1.9028148678174872, "grad_norm": 0.6993200182914734, "learning_rate": 5.904427944887547e-07, "loss": 0.8901, "step": 297840 }, { "epoch": 1.902878754967226, "grad_norm": 0.7036521434783936, "learning_rate": 5.896742025748691e-07, "loss": 0.6993, "step": 297850 }, { "epoch": 1.9029426421169646, "grad_norm": 1.3977524042129517, "learning_rate": 5.889061082657188e-07, "loss": 0.7307, "step": 297860 }, { "epoch": 1.9030065292667033, "grad_norm": 0.8624377846717834, "learning_rate": 5.881385115690197e-07, "loss": 0.9632, "step": 297870 }, { "epoch": 1.903070416416442, "grad_norm": 0.7405509352684021, "learning_rate": 5.873714124925211e-07, "loss": 0.8716, "step": 297880 }, { "epoch": 1.9031343035661807, "grad_norm": 0.808953046798706, "learning_rate": 5.866048110439337e-07, "loss": 0.7768, "step": 297890 }, { "epoch": 1.9031981907159194, "grad_norm": 1.1939847469329834, "learning_rate": 5.858387072309901e-07, "loss": 0.6911, "step": 297900 }, { "epoch": 1.9032620778656582, "grad_norm": 1.3884003162384033, "learning_rate": 5.850731010613952e-07, "loss": 0.7971, "step": 297910 }, { "epoch": 1.9033259650153966, "grad_norm": 1.2230150699615479, "learning_rate": 5.843079925428708e-07, "loss": 0.8024, "step": 297920 }, { "epoch": 1.9033898521651356, "grad_norm": 1.0419037342071533, "learning_rate": 5.835433816831104e-07, "loss": 0.9005, "step": 297930 }, { "epoch": 1.903453739314874, "grad_norm": 0.9676340818405151, "learning_rate": 5.827792684898193e-07, "loss": 0.9439, "step": 297940 }, { "epoch": 1.903517626464613, "grad_norm": 0.9829468727111816, "learning_rate": 5.820156529706911e-07, "loss": 0.6664, "step": 297950 }, { "epoch": 1.9035815136143515, "grad_norm": 1.0047937631607056, "learning_rate": 5.812525351334197e-07, "loss": 1.0327, "step": 297960 }, { "epoch": 1.9036454007640904, "grad_norm": 0.8629552721977234, "learning_rate": 5.804899149856934e-07, "loss": 0.9404, "step": 297970 }, { "epoch": 1.9037092879138289, "grad_norm": 1.8847241401672363, "learning_rate": 5.797277925351841e-07, "loss": 0.8702, "step": 297980 }, { "epoch": 1.9037731750635678, "grad_norm": 0.6932811141014099, "learning_rate": 5.789661677895741e-07, "loss": 0.6871, "step": 297990 }, { "epoch": 1.9038370622133063, "grad_norm": 1.0819748640060425, "learning_rate": 5.782050407565243e-07, "loss": 0.7768, "step": 298000 }, { "epoch": 1.9039009493630452, "grad_norm": 0.9804828763008118, "learning_rate": 5.774444114437061e-07, "loss": 0.8023, "step": 298010 }, { "epoch": 1.9039648365127837, "grad_norm": 1.4732115268707275, "learning_rate": 5.766842798587802e-07, "loss": 0.9734, "step": 298020 }, { "epoch": 1.9040287236625226, "grad_norm": 1.7599880695343018, "learning_rate": 5.759246460094069e-07, "loss": 0.7371, "step": 298030 }, { "epoch": 1.904092610812261, "grad_norm": 1.3185185194015503, "learning_rate": 5.751655099032193e-07, "loss": 0.7012, "step": 298040 }, { "epoch": 1.904156497962, "grad_norm": 0.753343939781189, "learning_rate": 5.744068715478835e-07, "loss": 0.9058, "step": 298050 }, { "epoch": 1.9042203851117385, "grad_norm": 2.9530723094940186, "learning_rate": 5.736487309510263e-07, "loss": 0.8327, "step": 298060 }, { "epoch": 1.9042842722614775, "grad_norm": 0.975061297416687, "learning_rate": 5.728910881202864e-07, "loss": 0.9793, "step": 298070 }, { "epoch": 1.904348159411216, "grad_norm": 0.7971157431602478, "learning_rate": 5.72133943063291e-07, "loss": 0.6126, "step": 298080 }, { "epoch": 1.9044120465609549, "grad_norm": 0.778538167476654, "learning_rate": 5.713772957876728e-07, "loss": 0.784, "step": 298090 }, { "epoch": 1.9044759337106933, "grad_norm": 1.185715913772583, "learning_rate": 5.706211463010424e-07, "loss": 0.7173, "step": 298100 }, { "epoch": 1.9045398208604323, "grad_norm": 1.3961964845657349, "learning_rate": 5.698654946110215e-07, "loss": 1.0598, "step": 298110 }, { "epoch": 1.9046037080101708, "grad_norm": 0.8438785076141357, "learning_rate": 5.691103407252152e-07, "loss": 0.9722, "step": 298120 }, { "epoch": 1.9046675951599097, "grad_norm": 1.0225751399993896, "learning_rate": 5.683556846512395e-07, "loss": 0.9682, "step": 298130 }, { "epoch": 1.9047314823096482, "grad_norm": 1.0111029148101807, "learning_rate": 5.676015263966772e-07, "loss": 1.0396, "step": 298140 }, { "epoch": 1.904795369459387, "grad_norm": 1.1241531372070312, "learning_rate": 5.66847865969139e-07, "loss": 0.8262, "step": 298150 }, { "epoch": 1.9048592566091256, "grad_norm": 1.169219732284546, "learning_rate": 5.660947033762076e-07, "loss": 0.7759, "step": 298160 }, { "epoch": 1.9049231437588643, "grad_norm": 1.2458572387695312, "learning_rate": 5.65342038625466e-07, "loss": 0.8906, "step": 298170 }, { "epoch": 1.904987030908603, "grad_norm": 0.9076129198074341, "learning_rate": 5.645898717244969e-07, "loss": 0.9836, "step": 298180 }, { "epoch": 1.9050509180583417, "grad_norm": 0.7402836084365845, "learning_rate": 5.638382026808775e-07, "loss": 0.6931, "step": 298190 }, { "epoch": 1.9051148052080804, "grad_norm": 2.3426685333251953, "learning_rate": 5.630870315021797e-07, "loss": 0.6219, "step": 298200 }, { "epoch": 1.9051786923578191, "grad_norm": 1.006109356880188, "learning_rate": 5.62336358195964e-07, "loss": 0.7118, "step": 298210 }, { "epoch": 1.9052425795075578, "grad_norm": 1.1263142824172974, "learning_rate": 5.615861827697855e-07, "loss": 1.0447, "step": 298220 }, { "epoch": 1.9053064666572965, "grad_norm": 0.8541145324707031, "learning_rate": 5.608365052312048e-07, "loss": 0.9683, "step": 298230 }, { "epoch": 1.9053703538070352, "grad_norm": 0.7063738703727722, "learning_rate": 5.60087325587777e-07, "loss": 0.7833, "step": 298240 }, { "epoch": 1.905434240956774, "grad_norm": 0.9849272966384888, "learning_rate": 5.593386438470349e-07, "loss": 1.0038, "step": 298250 }, { "epoch": 1.9054981281065126, "grad_norm": 0.8187015652656555, "learning_rate": 5.585904600165281e-07, "loss": 0.843, "step": 298260 }, { "epoch": 1.9055620152562514, "grad_norm": 1.0451970100402832, "learning_rate": 5.578427741037895e-07, "loss": 0.958, "step": 298270 }, { "epoch": 1.90562590240599, "grad_norm": 1.0295426845550537, "learning_rate": 5.570955861163407e-07, "loss": 0.9706, "step": 298280 }, { "epoch": 1.9056897895557288, "grad_norm": 0.785459041595459, "learning_rate": 5.563488960617202e-07, "loss": 0.8274, "step": 298290 }, { "epoch": 1.9057536767054675, "grad_norm": 0.8145094513893127, "learning_rate": 5.556027039474387e-07, "loss": 1.0412, "step": 298300 }, { "epoch": 1.9058175638552062, "grad_norm": 0.7018794417381287, "learning_rate": 5.548570097810179e-07, "loss": 0.9255, "step": 298310 }, { "epoch": 1.9058814510049449, "grad_norm": 1.8968229293823242, "learning_rate": 5.541118135699574e-07, "loss": 1.2584, "step": 298320 }, { "epoch": 1.9059453381546836, "grad_norm": 1.589530348777771, "learning_rate": 5.533671153217734e-07, "loss": 1.0387, "step": 298330 }, { "epoch": 1.9060092253044223, "grad_norm": 1.1622456312179565, "learning_rate": 5.526229150439544e-07, "loss": 0.894, "step": 298340 }, { "epoch": 1.906073112454161, "grad_norm": 1.0762825012207031, "learning_rate": 5.518792127440053e-07, "loss": 0.8478, "step": 298350 }, { "epoch": 1.9061369996038997, "grad_norm": 1.124621033668518, "learning_rate": 5.511360084294093e-07, "loss": 1.104, "step": 298360 }, { "epoch": 1.9062008867536384, "grad_norm": 1.6566370725631714, "learning_rate": 5.503933021076546e-07, "loss": 0.7928, "step": 298370 }, { "epoch": 1.9062647739033771, "grad_norm": 0.7469080090522766, "learning_rate": 5.496510937862132e-07, "loss": 1.1642, "step": 298380 }, { "epoch": 1.9063286610531158, "grad_norm": 0.85306715965271, "learning_rate": 5.489093834725733e-07, "loss": 0.5735, "step": 298390 }, { "epoch": 1.9063925482028545, "grad_norm": 0.8713909387588501, "learning_rate": 5.481681711741904e-07, "loss": 0.903, "step": 298400 }, { "epoch": 1.906456435352593, "grad_norm": 0.6891903877258301, "learning_rate": 5.474274568985416e-07, "loss": 0.7379, "step": 298410 }, { "epoch": 1.906520322502332, "grad_norm": 0.9910528063774109, "learning_rate": 5.466872406530766e-07, "loss": 0.611, "step": 298420 }, { "epoch": 1.9065842096520704, "grad_norm": 0.9393962621688843, "learning_rate": 5.459475224452614e-07, "loss": 0.7234, "step": 298430 }, { "epoch": 1.9066480968018094, "grad_norm": 0.7950835824012756, "learning_rate": 5.452083022825294e-07, "loss": 0.9627, "step": 298440 }, { "epoch": 1.9067119839515478, "grad_norm": 1.1390228271484375, "learning_rate": 5.444695801723409e-07, "loss": 0.6353, "step": 298450 }, { "epoch": 1.9067758711012868, "grad_norm": 0.8612496256828308, "learning_rate": 5.437313561221291e-07, "loss": 0.8579, "step": 298460 }, { "epoch": 1.9068397582510253, "grad_norm": 1.0620536804199219, "learning_rate": 5.429936301393268e-07, "loss": 0.9749, "step": 298470 }, { "epoch": 1.9069036454007642, "grad_norm": 0.9960135221481323, "learning_rate": 5.422564022313614e-07, "loss": 0.837, "step": 298480 }, { "epoch": 1.9069675325505027, "grad_norm": 0.6055924892425537, "learning_rate": 5.415196724056604e-07, "loss": 0.7759, "step": 298490 }, { "epoch": 1.9070314197002416, "grad_norm": 1.1424163579940796, "learning_rate": 5.407834406696511e-07, "loss": 0.8854, "step": 298500 }, { "epoch": 1.90709530684998, "grad_norm": 0.9661005735397339, "learning_rate": 5.400477070307331e-07, "loss": 1.0237, "step": 298510 }, { "epoch": 1.907159193999719, "grad_norm": 0.5954277515411377, "learning_rate": 5.393124714963283e-07, "loss": 0.9063, "step": 298520 }, { "epoch": 1.9072230811494575, "grad_norm": 0.6582477688789368, "learning_rate": 5.385777340738363e-07, "loss": 1.1954, "step": 298530 }, { "epoch": 1.9072869682991964, "grad_norm": 0.908450186252594, "learning_rate": 5.378434947706568e-07, "loss": 0.8158, "step": 298540 }, { "epoch": 1.907350855448935, "grad_norm": 1.31619393825531, "learning_rate": 5.371097535941838e-07, "loss": 0.7553, "step": 298550 }, { "epoch": 1.9074147425986738, "grad_norm": 0.9759384989738464, "learning_rate": 5.364498124398043e-07, "loss": 0.9226, "step": 298560 }, { "epoch": 1.9074786297484123, "grad_norm": 1.0978734493255615, "learning_rate": 5.357170177244242e-07, "loss": 0.7859, "step": 298570 }, { "epoch": 1.9075425168981512, "grad_norm": 1.4422558546066284, "learning_rate": 5.349847211571724e-07, "loss": 0.9151, "step": 298580 }, { "epoch": 1.9076064040478897, "grad_norm": 1.002185344696045, "learning_rate": 5.342529227454152e-07, "loss": 0.8842, "step": 298590 }, { "epoch": 1.9076702911976287, "grad_norm": 1.7048262357711792, "learning_rate": 5.335216224965189e-07, "loss": 0.6968, "step": 298600 }, { "epoch": 1.9077341783473671, "grad_norm": 0.5697035193443298, "learning_rate": 5.327908204178666e-07, "loss": 0.7287, "step": 298610 }, { "epoch": 1.907798065497106, "grad_norm": 0.9011799097061157, "learning_rate": 5.320605165167969e-07, "loss": 1.1011, "step": 298620 }, { "epoch": 1.9078619526468445, "grad_norm": 0.9827040433883667, "learning_rate": 5.31330710800676e-07, "loss": 0.6246, "step": 298630 }, { "epoch": 1.9079258397965835, "grad_norm": 0.7323964834213257, "learning_rate": 5.306014032768536e-07, "loss": 0.8952, "step": 298640 }, { "epoch": 1.907989726946322, "grad_norm": 0.8654759526252747, "learning_rate": 5.298725939526738e-07, "loss": 0.8405, "step": 298650 }, { "epoch": 1.9080536140960607, "grad_norm": 0.9938244223594666, "learning_rate": 5.291442828354698e-07, "loss": 0.9068, "step": 298660 }, { "epoch": 1.9081175012457994, "grad_norm": 0.8327669501304626, "learning_rate": 5.284164699325855e-07, "loss": 0.9399, "step": 298670 }, { "epoch": 1.908181388395538, "grad_norm": 0.8316702842712402, "learning_rate": 5.27689155251343e-07, "loss": 0.842, "step": 298680 }, { "epoch": 1.9082452755452768, "grad_norm": 0.7041569948196411, "learning_rate": 5.269623387990697e-07, "loss": 0.9606, "step": 298690 }, { "epoch": 1.9083091626950155, "grad_norm": 1.169980764389038, "learning_rate": 5.262360205830874e-07, "loss": 0.8678, "step": 298700 }, { "epoch": 1.9083730498447542, "grad_norm": 1.0179364681243896, "learning_rate": 5.255102006107127e-07, "loss": 0.8915, "step": 298710 }, { "epoch": 1.908436936994493, "grad_norm": 1.13489830493927, "learning_rate": 5.247848788892451e-07, "loss": 0.9829, "step": 298720 }, { "epoch": 1.9085008241442316, "grad_norm": 0.8179365396499634, "learning_rate": 5.240600554260011e-07, "loss": 0.7415, "step": 298730 }, { "epoch": 1.9085647112939703, "grad_norm": 0.911197304725647, "learning_rate": 5.233357302282749e-07, "loss": 0.6342, "step": 298740 }, { "epoch": 1.908628598443709, "grad_norm": 0.8303636312484741, "learning_rate": 5.226119033033605e-07, "loss": 0.8932, "step": 298750 }, { "epoch": 1.9086924855934477, "grad_norm": 1.0183674097061157, "learning_rate": 5.218885746585467e-07, "loss": 0.9725, "step": 298760 }, { "epoch": 1.9087563727431864, "grad_norm": 0.7333863973617554, "learning_rate": 5.211657443011219e-07, "loss": 0.9961, "step": 298770 }, { "epoch": 1.9088202598929251, "grad_norm": 0.9927520155906677, "learning_rate": 5.204434122383583e-07, "loss": 0.6939, "step": 298780 }, { "epoch": 1.9088841470426638, "grad_norm": 0.9672486186027527, "learning_rate": 5.197215784775389e-07, "loss": 0.675, "step": 298790 }, { "epoch": 1.9089480341924026, "grad_norm": 0.8010286092758179, "learning_rate": 5.1900024302593e-07, "loss": 1.0252, "step": 298800 }, { "epoch": 1.9090119213421413, "grad_norm": 0.6621715426445007, "learning_rate": 5.182794058907925e-07, "loss": 0.8564, "step": 298810 }, { "epoch": 1.90907580849188, "grad_norm": 1.5702663660049438, "learning_rate": 5.175590670793984e-07, "loss": 0.751, "step": 298820 }, { "epoch": 1.9091396956416187, "grad_norm": 1.3269661664962769, "learning_rate": 5.168392265989808e-07, "loss": 0.9925, "step": 298830 }, { "epoch": 1.9092035827913574, "grad_norm": 0.9862305521965027, "learning_rate": 5.161198844568171e-07, "loss": 0.8104, "step": 298840 }, { "epoch": 1.909267469941096, "grad_norm": 1.0738799571990967, "learning_rate": 5.154010406601239e-07, "loss": 0.9463, "step": 298850 }, { "epoch": 1.9093313570908348, "grad_norm": 1.043198585510254, "learning_rate": 5.146826952161565e-07, "loss": 0.9851, "step": 298860 }, { "epoch": 1.9093952442405735, "grad_norm": 1.2452592849731445, "learning_rate": 5.139648481321424e-07, "loss": 0.8567, "step": 298870 }, { "epoch": 1.9094591313903122, "grad_norm": 0.4939374029636383, "learning_rate": 5.132474994153147e-07, "loss": 0.8316, "step": 298880 }, { "epoch": 1.909523018540051, "grad_norm": 0.9886005520820618, "learning_rate": 5.125306490728954e-07, "loss": 0.7403, "step": 298890 }, { "epoch": 1.9095869056897894, "grad_norm": 1.311178207397461, "learning_rate": 5.118142971121065e-07, "loss": 0.8995, "step": 298900 }, { "epoch": 1.9096507928395283, "grad_norm": 1.4442360401153564, "learning_rate": 5.110984435401589e-07, "loss": 0.682, "step": 298910 }, { "epoch": 1.9097146799892668, "grad_norm": 1.067458987236023, "learning_rate": 5.10383088364269e-07, "loss": 0.8009, "step": 298920 }, { "epoch": 1.9097785671390057, "grad_norm": 0.740051805973053, "learning_rate": 5.096682315916313e-07, "loss": 1.2432, "step": 298930 }, { "epoch": 1.9098424542887442, "grad_norm": 0.8193008899688721, "learning_rate": 5.089538732294507e-07, "loss": 1.0796, "step": 298940 }, { "epoch": 1.9099063414384831, "grad_norm": 0.8132315874099731, "learning_rate": 5.082400132849219e-07, "loss": 0.7697, "step": 298950 }, { "epoch": 1.9099702285882216, "grad_norm": 0.9826362729072571, "learning_rate": 5.075266517652333e-07, "loss": 0.8249, "step": 298960 }, { "epoch": 1.9100341157379606, "grad_norm": 1.4442051649093628, "learning_rate": 5.068137886775681e-07, "loss": 0.8598, "step": 298970 }, { "epoch": 1.910098002887699, "grad_norm": 1.0820285081863403, "learning_rate": 5.061014240291039e-07, "loss": 0.7014, "step": 298980 }, { "epoch": 1.910161890037438, "grad_norm": 0.83199542760849, "learning_rate": 5.053895578270185e-07, "loss": 0.7628, "step": 298990 }, { "epoch": 1.9102257771871765, "grad_norm": 0.9732728004455566, "learning_rate": 5.04678190078478e-07, "loss": 0.9333, "step": 299000 }, { "epoch": 1.9102896643369154, "grad_norm": 0.919940173625946, "learning_rate": 5.039673207906492e-07, "loss": 0.9274, "step": 299010 }, { "epoch": 1.9103535514866539, "grad_norm": 0.6937656998634338, "learning_rate": 5.032569499706874e-07, "loss": 0.693, "step": 299020 }, { "epoch": 1.9104174386363928, "grad_norm": 1.305191159248352, "learning_rate": 5.025470776257535e-07, "loss": 0.731, "step": 299030 }, { "epoch": 1.9104813257861313, "grad_norm": 1.3394445180892944, "learning_rate": 5.018377037629862e-07, "loss": 0.8673, "step": 299040 }, { "epoch": 1.9105452129358702, "grad_norm": 0.7385538220405579, "learning_rate": 5.011288283895354e-07, "loss": 0.8233, "step": 299050 }, { "epoch": 1.9106091000856087, "grad_norm": 1.0281059741973877, "learning_rate": 5.004204515125454e-07, "loss": 0.7892, "step": 299060 }, { "epoch": 1.9106729872353476, "grad_norm": 1.4805169105529785, "learning_rate": 4.997125731391383e-07, "loss": 0.9471, "step": 299070 }, { "epoch": 1.910736874385086, "grad_norm": 2.7536349296569824, "learning_rate": 4.990051932764528e-07, "loss": 1.0098, "step": 299080 }, { "epoch": 1.910800761534825, "grad_norm": 0.9810320138931274, "learning_rate": 4.98298311931611e-07, "loss": 0.9173, "step": 299090 }, { "epoch": 1.9108646486845635, "grad_norm": 0.8710843920707703, "learning_rate": 4.975919291117292e-07, "loss": 0.878, "step": 299100 }, { "epoch": 1.9109285358343024, "grad_norm": 1.1887816190719604, "learning_rate": 4.968860448239187e-07, "loss": 0.95, "step": 299110 }, { "epoch": 1.910992422984041, "grad_norm": 0.7549880146980286, "learning_rate": 4.96180659075296e-07, "loss": 1.0875, "step": 299120 }, { "epoch": 1.9110563101337799, "grad_norm": 0.7689534425735474, "learning_rate": 4.954757718729553e-07, "loss": 0.6713, "step": 299130 }, { "epoch": 1.9111201972835183, "grad_norm": 0.8089300990104675, "learning_rate": 4.947713832240075e-07, "loss": 0.6852, "step": 299140 }, { "epoch": 1.911184084433257, "grad_norm": 0.9446051120758057, "learning_rate": 4.940674931355361e-07, "loss": 0.8864, "step": 299150 }, { "epoch": 1.9112479715829958, "grad_norm": 1.5336053371429443, "learning_rate": 4.933641016146407e-07, "loss": 0.8913, "step": 299160 }, { "epoch": 1.9113118587327345, "grad_norm": 0.8503767848014832, "learning_rate": 4.926612086683879e-07, "loss": 0.7915, "step": 299170 }, { "epoch": 1.9113757458824732, "grad_norm": 1.0193933248519897, "learning_rate": 4.919588143038778e-07, "loss": 0.7958, "step": 299180 }, { "epoch": 1.9114396330322119, "grad_norm": 2.003002166748047, "learning_rate": 4.912569185281657e-07, "loss": 0.9709, "step": 299190 }, { "epoch": 1.9115035201819506, "grad_norm": 0.6801881790161133, "learning_rate": 4.905555213483293e-07, "loss": 0.8338, "step": 299200 }, { "epoch": 1.9115674073316893, "grad_norm": 2.296818733215332, "learning_rate": 4.898546227714295e-07, "loss": 0.9052, "step": 299210 }, { "epoch": 1.911631294481428, "grad_norm": 1.7277367115020752, "learning_rate": 4.891542228045276e-07, "loss": 0.8738, "step": 299220 }, { "epoch": 1.9116951816311667, "grad_norm": 0.8613941669464111, "learning_rate": 4.884543214546733e-07, "loss": 0.94, "step": 299230 }, { "epoch": 1.9117590687809054, "grad_norm": 1.1765998601913452, "learning_rate": 4.877549187289221e-07, "loss": 0.9561, "step": 299240 }, { "epoch": 1.911822955930644, "grad_norm": 0.697990357875824, "learning_rate": 4.870560146343073e-07, "loss": 0.8642, "step": 299250 }, { "epoch": 1.9118868430803828, "grad_norm": 0.831953763961792, "learning_rate": 4.863576091778788e-07, "loss": 0.836, "step": 299260 }, { "epoch": 1.9119507302301215, "grad_norm": 0.548539400100708, "learning_rate": 4.856597023666586e-07, "loss": 0.6535, "step": 299270 }, { "epoch": 1.9120146173798602, "grad_norm": 0.8374563455581665, "learning_rate": 4.849622942076859e-07, "loss": 0.8024, "step": 299280 }, { "epoch": 1.912078504529599, "grad_norm": 1.9350553750991821, "learning_rate": 4.84265384707977e-07, "loss": 0.8086, "step": 299290 }, { "epoch": 1.9121423916793376, "grad_norm": 1.3241461515426636, "learning_rate": 4.835689738745541e-07, "loss": 1.0187, "step": 299300 }, { "epoch": 1.9122062788290763, "grad_norm": 0.6663087606430054, "learning_rate": 4.828730617144283e-07, "loss": 0.9202, "step": 299310 }, { "epoch": 1.912270165978815, "grad_norm": 0.7999659180641174, "learning_rate": 4.821776482346108e-07, "loss": 0.8876, "step": 299320 }, { "epoch": 1.9123340531285538, "grad_norm": 1.4280097484588623, "learning_rate": 4.814827334421068e-07, "loss": 1.0624, "step": 299330 }, { "epoch": 1.9123979402782925, "grad_norm": 4.343797206878662, "learning_rate": 4.807883173439054e-07, "loss": 1.1265, "step": 299340 }, { "epoch": 1.9124618274280312, "grad_norm": 0.9057490229606628, "learning_rate": 4.800943999470064e-07, "loss": 0.8631, "step": 299350 }, { "epoch": 1.9125257145777699, "grad_norm": 0.8484719395637512, "learning_rate": 4.794009812583988e-07, "loss": 0.9666, "step": 299360 }, { "epoch": 1.9125896017275086, "grad_norm": 1.200207233428955, "learning_rate": 4.787080612850659e-07, "loss": 0.8848, "step": 299370 }, { "epoch": 1.9126534888772473, "grad_norm": 1.1934592723846436, "learning_rate": 4.780156400339853e-07, "loss": 0.6613, "step": 299380 }, { "epoch": 1.9127173760269858, "grad_norm": 1.0981838703155518, "learning_rate": 4.773237175121293e-07, "loss": 1.0469, "step": 299390 }, { "epoch": 1.9127812631767247, "grad_norm": 1.2460277080535889, "learning_rate": 4.7663229372646443e-07, "loss": 0.9713, "step": 299400 }, { "epoch": 1.9128451503264632, "grad_norm": 1.1386065483093262, "learning_rate": 4.7594136868395756e-07, "loss": 1.0489, "step": 299410 }, { "epoch": 1.9129090374762021, "grad_norm": 0.6835050582885742, "learning_rate": 4.752509423915641e-07, "loss": 0.9037, "step": 299420 }, { "epoch": 1.9129729246259406, "grad_norm": 0.7506453394889832, "learning_rate": 4.745610148562396e-07, "loss": 0.7473, "step": 299430 }, { "epoch": 1.9130368117756795, "grad_norm": 1.6977883577346802, "learning_rate": 4.7387158608492853e-07, "loss": 0.9052, "step": 299440 }, { "epoch": 1.913100698925418, "grad_norm": 1.0963678359985352, "learning_rate": 4.731826560845809e-07, "loss": 1.1013, "step": 299450 }, { "epoch": 1.913164586075157, "grad_norm": 0.818276584148407, "learning_rate": 4.7249422486213003e-07, "loss": 0.7577, "step": 299460 }, { "epoch": 1.9132284732248954, "grad_norm": 1.0423306226730347, "learning_rate": 4.7180629242450923e-07, "loss": 0.9088, "step": 299470 }, { "epoch": 1.9132923603746343, "grad_norm": 0.8312962055206299, "learning_rate": 4.7111885877864635e-07, "loss": 0.8614, "step": 299480 }, { "epoch": 1.9133562475243728, "grad_norm": 1.4227992296218872, "learning_rate": 4.704319239314636e-07, "loss": 0.8673, "step": 299490 }, { "epoch": 1.9134201346741118, "grad_norm": 1.1332935094833374, "learning_rate": 4.697454878898888e-07, "loss": 1.0211, "step": 299500 }, { "epoch": 1.9134840218238502, "grad_norm": 0.896660566329956, "learning_rate": 4.690595506608164e-07, "loss": 0.7276, "step": 299510 }, { "epoch": 1.9135479089735892, "grad_norm": 0.8751005530357361, "learning_rate": 4.683741122511742e-07, "loss": 0.7033, "step": 299520 }, { "epoch": 1.9136117961233277, "grad_norm": 0.6766482591629028, "learning_rate": 4.676891726678456e-07, "loss": 1.0435, "step": 299530 }, { "epoch": 1.9136756832730666, "grad_norm": 1.7327042818069458, "learning_rate": 4.670047319177473e-07, "loss": 0.6154, "step": 299540 }, { "epoch": 1.913739570422805, "grad_norm": 1.3828660249710083, "learning_rate": 4.663207900077571e-07, "loss": 0.7493, "step": 299550 }, { "epoch": 1.913803457572544, "grad_norm": 1.1648141145706177, "learning_rate": 4.6563734694477503e-07, "loss": 1.0343, "step": 299560 }, { "epoch": 1.9138673447222825, "grad_norm": 0.800993800163269, "learning_rate": 4.649544027356734e-07, "loss": 0.8081, "step": 299570 }, { "epoch": 1.9139312318720214, "grad_norm": 0.8474355936050415, "learning_rate": 4.6427195738733553e-07, "loss": 0.943, "step": 299580 }, { "epoch": 1.91399511902176, "grad_norm": 0.9152101874351501, "learning_rate": 4.635900109066338e-07, "loss": 1.0417, "step": 299590 }, { "epoch": 1.9140590061714988, "grad_norm": 1.0631303787231445, "learning_rate": 4.629085633004404e-07, "loss": 0.9787, "step": 299600 }, { "epoch": 1.9141228933212373, "grad_norm": 1.1691399812698364, "learning_rate": 4.622276145756055e-07, "loss": 0.9034, "step": 299610 }, { "epoch": 1.914186780470976, "grad_norm": 0.645896315574646, "learning_rate": 4.615471647390013e-07, "loss": 0.8945, "step": 299620 }, { "epoch": 1.9142506676207147, "grad_norm": 1.9830036163330078, "learning_rate": 4.608672137974668e-07, "loss": 0.6084, "step": 299630 }, { "epoch": 1.9143145547704534, "grad_norm": 0.9347323775291443, "learning_rate": 4.6018776175786317e-07, "loss": 0.8824, "step": 299640 }, { "epoch": 1.9143784419201921, "grad_norm": 0.5798866748809814, "learning_rate": 4.5950880862702385e-07, "loss": 0.9729, "step": 299650 }, { "epoch": 1.9144423290699308, "grad_norm": 2.9061269760131836, "learning_rate": 4.588303544117933e-07, "loss": 0.8328, "step": 299660 }, { "epoch": 1.9145062162196695, "grad_norm": 0.9483212232589722, "learning_rate": 4.5815239911899953e-07, "loss": 0.9326, "step": 299670 }, { "epoch": 1.9145701033694082, "grad_norm": 1.4846980571746826, "learning_rate": 4.574749427554648e-07, "loss": 0.8387, "step": 299680 }, { "epoch": 1.914633990519147, "grad_norm": 0.7667824625968933, "learning_rate": 4.567979853280224e-07, "loss": 0.8384, "step": 299690 }, { "epoch": 1.9146978776688857, "grad_norm": 0.8439723253250122, "learning_rate": 4.5612152684348373e-07, "loss": 0.8051, "step": 299700 }, { "epoch": 1.9147617648186244, "grad_norm": 0.8428217172622681, "learning_rate": 4.554455673086655e-07, "loss": 0.9557, "step": 299710 }, { "epoch": 1.914825651968363, "grad_norm": 0.8997921943664551, "learning_rate": 4.547701067303733e-07, "loss": 1.1864, "step": 299720 }, { "epoch": 1.9148895391181018, "grad_norm": 0.9388466477394104, "learning_rate": 4.540951451154074e-07, "loss": 0.8625, "step": 299730 }, { "epoch": 1.9149534262678405, "grad_norm": 0.8117624521255493, "learning_rate": 4.5342068247056225e-07, "loss": 0.9298, "step": 299740 }, { "epoch": 1.9150173134175792, "grad_norm": 1.0533971786499023, "learning_rate": 4.5274671880264353e-07, "loss": 0.7363, "step": 299750 }, { "epoch": 1.915081200567318, "grad_norm": 1.016930341720581, "learning_rate": 4.520732541184236e-07, "loss": 0.8498, "step": 299760 }, { "epoch": 1.9151450877170566, "grad_norm": 0.7840309739112854, "learning_rate": 4.514002884246915e-07, "loss": 0.564, "step": 299770 }, { "epoch": 1.9152089748667953, "grad_norm": 0.9618764519691467, "learning_rate": 4.5072782172822514e-07, "loss": 0.8378, "step": 299780 }, { "epoch": 1.915272862016534, "grad_norm": 0.8250643610954285, "learning_rate": 4.5005585403579687e-07, "loss": 0.8388, "step": 299790 }, { "epoch": 1.9153367491662727, "grad_norm": 0.9536775350570679, "learning_rate": 4.493843853541679e-07, "loss": 0.9822, "step": 299800 }, { "epoch": 1.9154006363160114, "grad_norm": 0.9676626920700073, "learning_rate": 4.487134156901107e-07, "loss": 0.941, "step": 299810 }, { "epoch": 1.9154645234657501, "grad_norm": 1.5679287910461426, "learning_rate": 4.480429450503809e-07, "loss": 0.993, "step": 299820 }, { "epoch": 1.9155284106154888, "grad_norm": 1.0796259641647339, "learning_rate": 4.4737297344171757e-07, "loss": 0.7983, "step": 299830 }, { "epoch": 1.9155922977652275, "grad_norm": 1.0324289798736572, "learning_rate": 4.467035008708875e-07, "loss": 0.7589, "step": 299840 }, { "epoch": 1.9156561849149663, "grad_norm": 1.006419062614441, "learning_rate": 4.4603452734461317e-07, "loss": 1.0923, "step": 299850 }, { "epoch": 1.915720072064705, "grad_norm": 0.981799840927124, "learning_rate": 4.4536605286965015e-07, "loss": 0.8727, "step": 299860 }, { "epoch": 1.9157839592144437, "grad_norm": 1.746458888053894, "learning_rate": 4.446980774527154e-07, "loss": 1.0021, "step": 299870 }, { "epoch": 1.9158478463641821, "grad_norm": 0.8302098512649536, "learning_rate": 4.440306011005424e-07, "loss": 0.7477, "step": 299880 }, { "epoch": 1.915911733513921, "grad_norm": 1.0629510879516602, "learning_rate": 4.4336362381985905e-07, "loss": 0.8635, "step": 299890 }, { "epoch": 1.9159756206636596, "grad_norm": 1.2623989582061768, "learning_rate": 4.4269714561737117e-07, "loss": 0.9379, "step": 299900 }, { "epoch": 1.9160395078133985, "grad_norm": 1.230000376701355, "learning_rate": 4.420311664997956e-07, "loss": 0.8526, "step": 299910 }, { "epoch": 1.916103394963137, "grad_norm": 1.3445703983306885, "learning_rate": 4.413656864738436e-07, "loss": 0.8425, "step": 299920 }, { "epoch": 1.916167282112876, "grad_norm": 0.8907622694969177, "learning_rate": 4.407007055462153e-07, "loss": 0.7709, "step": 299930 }, { "epoch": 1.9162311692626144, "grad_norm": 2.3243703842163086, "learning_rate": 4.400362237236e-07, "loss": 0.8596, "step": 299940 }, { "epoch": 1.9162950564123533, "grad_norm": 3.5086824893951416, "learning_rate": 4.393722410126977e-07, "loss": 0.8725, "step": 299950 }, { "epoch": 1.9163589435620918, "grad_norm": 0.6275745630264282, "learning_rate": 4.3870875742019757e-07, "loss": 0.8443, "step": 299960 }, { "epoch": 1.9164228307118307, "grad_norm": 0.9403732419013977, "learning_rate": 4.3804577295277204e-07, "loss": 0.8953, "step": 299970 }, { "epoch": 1.9164867178615692, "grad_norm": 0.9695448875427246, "learning_rate": 4.373832876170991e-07, "loss": 0.5875, "step": 299980 }, { "epoch": 1.9165506050113081, "grad_norm": 0.9237552881240845, "learning_rate": 4.3672130141986234e-07, "loss": 0.9285, "step": 299990 }, { "epoch": 1.9166144921610466, "grad_norm": 1.1848057508468628, "learning_rate": 4.3605981436771195e-07, "loss": 1.0084, "step": 300000 }, { "epoch": 1.9166783793107856, "grad_norm": 0.8543856739997864, "learning_rate": 4.353988264673259e-07, "loss": 0.8232, "step": 300010 }, { "epoch": 1.916742266460524, "grad_norm": 1.3468594551086426, "learning_rate": 4.3473833772534887e-07, "loss": 1.0356, "step": 300020 }, { "epoch": 1.916806153610263, "grad_norm": 1.0246607065200806, "learning_rate": 4.340783481484367e-07, "loss": 0.945, "step": 300030 }, { "epoch": 1.9168700407600014, "grad_norm": 0.7356663346290588, "learning_rate": 4.3341885774323966e-07, "loss": 0.8515, "step": 300040 }, { "epoch": 1.9169339279097404, "grad_norm": 0.770076334476471, "learning_rate": 4.3275986651639677e-07, "loss": 0.9127, "step": 300050 }, { "epoch": 1.9169978150594789, "grad_norm": 1.0495332479476929, "learning_rate": 4.3210137447453615e-07, "loss": 0.664, "step": 300060 }, { "epoch": 1.9170617022092178, "grad_norm": 0.8794049620628357, "learning_rate": 4.3144338162430244e-07, "loss": 0.6817, "step": 300070 }, { "epoch": 1.9171255893589563, "grad_norm": 0.8359986543655396, "learning_rate": 4.3078588797231814e-07, "loss": 0.937, "step": 300080 }, { "epoch": 1.9171894765086952, "grad_norm": 1.266853928565979, "learning_rate": 4.3012889352520014e-07, "loss": 0.797, "step": 300090 }, { "epoch": 1.9172533636584337, "grad_norm": 2.2502448558807373, "learning_rate": 4.29472398289571e-07, "loss": 1.0825, "step": 300100 }, { "epoch": 1.9173172508081724, "grad_norm": 3.827913761138916, "learning_rate": 4.2881640227203646e-07, "loss": 0.9192, "step": 300110 }, { "epoch": 1.917381137957911, "grad_norm": 0.9653197526931763, "learning_rate": 4.2816090547920793e-07, "loss": 0.7286, "step": 300120 }, { "epoch": 1.9174450251076498, "grad_norm": 1.0422720909118652, "learning_rate": 4.2750590791768574e-07, "loss": 0.8416, "step": 300130 }, { "epoch": 1.9175089122573885, "grad_norm": 1.3192023038864136, "learning_rate": 4.26851409594059e-07, "loss": 0.9013, "step": 300140 }, { "epoch": 1.9175727994071272, "grad_norm": 0.9541807770729065, "learning_rate": 4.261974105149336e-07, "loss": 1.204, "step": 300150 }, { "epoch": 1.917636686556866, "grad_norm": 0.9419488906860352, "learning_rate": 4.255439106868819e-07, "loss": 0.6745, "step": 300160 }, { "epoch": 1.9177005737066046, "grad_norm": 1.0936365127563477, "learning_rate": 4.248909101164933e-07, "loss": 0.972, "step": 300170 }, { "epoch": 1.9177644608563433, "grad_norm": 1.177007794380188, "learning_rate": 4.242384088103457e-07, "loss": 0.9293, "step": 300180 }, { "epoch": 1.917828348006082, "grad_norm": 0.6732769012451172, "learning_rate": 4.235864067750006e-07, "loss": 1.2595, "step": 300190 }, { "epoch": 1.9178922351558207, "grad_norm": 0.7928746938705444, "learning_rate": 4.229349040170305e-07, "loss": 0.8386, "step": 300200 }, { "epoch": 1.9179561223055595, "grad_norm": 1.158821702003479, "learning_rate": 4.2228390054299683e-07, "loss": 0.7762, "step": 300210 }, { "epoch": 1.9180200094552982, "grad_norm": 0.8231037855148315, "learning_rate": 4.2163339635946097e-07, "loss": 0.8962, "step": 300220 }, { "epoch": 1.9180838966050369, "grad_norm": 0.989878237247467, "learning_rate": 4.2098339147296215e-07, "loss": 0.9074, "step": 300230 }, { "epoch": 1.9181477837547756, "grad_norm": 0.7574949264526367, "learning_rate": 4.2033388589005075e-07, "loss": 0.8568, "step": 300240 }, { "epoch": 1.9182116709045143, "grad_norm": 1.9735184907913208, "learning_rate": 4.196848796172714e-07, "loss": 0.8463, "step": 300250 }, { "epoch": 1.918275558054253, "grad_norm": 1.1065763235092163, "learning_rate": 4.1903637266116347e-07, "loss": 0.989, "step": 300260 }, { "epoch": 1.9183394452039917, "grad_norm": 0.918474555015564, "learning_rate": 4.183883650282494e-07, "loss": 0.6855, "step": 300270 }, { "epoch": 1.9184033323537304, "grad_norm": 0.5903297662734985, "learning_rate": 4.1774085672505736e-07, "loss": 0.9428, "step": 300280 }, { "epoch": 1.918467219503469, "grad_norm": 0.7054703831672668, "learning_rate": 4.170938477581099e-07, "loss": 0.9902, "step": 300290 }, { "epoch": 1.9185311066532078, "grad_norm": 0.6120290160179138, "learning_rate": 4.16447338133924e-07, "loss": 0.9751, "step": 300300 }, { "epoch": 1.9185949938029465, "grad_norm": 0.9264044165611267, "learning_rate": 4.1580132785901116e-07, "loss": 1.0271, "step": 300310 }, { "epoch": 1.9186588809526852, "grad_norm": 0.8623595237731934, "learning_rate": 4.151558169398717e-07, "loss": 1.0952, "step": 300320 }, { "epoch": 1.918722768102424, "grad_norm": 3.1767020225524902, "learning_rate": 4.1451080538301155e-07, "loss": 0.9888, "step": 300330 }, { "epoch": 1.9187866552521626, "grad_norm": 1.0149223804473877, "learning_rate": 4.1386629319492556e-07, "loss": 0.9297, "step": 300340 }, { "epoch": 1.9188505424019011, "grad_norm": 1.0157650709152222, "learning_rate": 4.1322228038210286e-07, "loss": 0.9221, "step": 300350 }, { "epoch": 1.91891442955164, "grad_norm": 2.1870782375335693, "learning_rate": 4.125787669510328e-07, "loss": 0.8463, "step": 300360 }, { "epoch": 1.9189783167013785, "grad_norm": 1.3804250955581665, "learning_rate": 4.119357529081935e-07, "loss": 0.8814, "step": 300370 }, { "epoch": 1.9190422038511175, "grad_norm": 0.8124695420265198, "learning_rate": 4.112932382600576e-07, "loss": 0.8788, "step": 300380 }, { "epoch": 1.919106091000856, "grad_norm": 0.8044643402099609, "learning_rate": 4.106512230131032e-07, "loss": 0.7449, "step": 300390 }, { "epoch": 1.9191699781505949, "grad_norm": 0.8989754915237427, "learning_rate": 4.100097071737863e-07, "loss": 0.6831, "step": 300400 }, { "epoch": 1.9192338653003334, "grad_norm": 0.6823838949203491, "learning_rate": 4.0936869074857943e-07, "loss": 0.7529, "step": 300410 }, { "epoch": 1.9192977524500723, "grad_norm": 1.3462483882904053, "learning_rate": 4.0872817374392746e-07, "loss": 1.0186, "step": 300420 }, { "epoch": 1.9193616395998108, "grad_norm": 0.971286952495575, "learning_rate": 4.0808815616628636e-07, "loss": 0.8376, "step": 300430 }, { "epoch": 1.9194255267495497, "grad_norm": 1.3372255563735962, "learning_rate": 4.07448638022101e-07, "loss": 1.1777, "step": 300440 }, { "epoch": 1.9194894138992882, "grad_norm": 1.064820647239685, "learning_rate": 4.0680961931781615e-07, "loss": 0.8509, "step": 300450 }, { "epoch": 1.919553301049027, "grad_norm": 1.0935826301574707, "learning_rate": 4.061711000598545e-07, "loss": 0.8222, "step": 300460 }, { "epoch": 1.9196171881987656, "grad_norm": 1.4883924722671509, "learning_rate": 4.0553308025466083e-07, "loss": 0.979, "step": 300470 }, { "epoch": 1.9196810753485045, "grad_norm": 0.8018064498901367, "learning_rate": 4.0489555990865233e-07, "loss": 0.7354, "step": 300480 }, { "epoch": 1.919744962498243, "grad_norm": 1.1229437589645386, "learning_rate": 4.042585390282516e-07, "loss": 0.9783, "step": 300490 }, { "epoch": 1.919808849647982, "grad_norm": 0.7482228875160217, "learning_rate": 4.0362201761987017e-07, "loss": 0.8791, "step": 300500 }, { "epoch": 1.9198727367977204, "grad_norm": 0.5550474524497986, "learning_rate": 4.0298599568992513e-07, "loss": 0.7997, "step": 300510 }, { "epoch": 1.9199366239474593, "grad_norm": 0.8357752561569214, "learning_rate": 4.023504732448169e-07, "loss": 1.0888, "step": 300520 }, { "epoch": 1.9200005110971978, "grad_norm": 1.0730481147766113, "learning_rate": 4.0171545029095146e-07, "loss": 0.994, "step": 300530 }, { "epoch": 1.9200643982469368, "grad_norm": 0.8998647928237915, "learning_rate": 4.010809268347182e-07, "loss": 1.0256, "step": 300540 }, { "epoch": 1.9201282853966752, "grad_norm": 0.7829686999320984, "learning_rate": 4.004469028825064e-07, "loss": 0.8715, "step": 300550 }, { "epoch": 1.9201921725464142, "grad_norm": 0.7973158955574036, "learning_rate": 3.99813378440711e-07, "loss": 0.7151, "step": 300560 }, { "epoch": 1.9202560596961527, "grad_norm": 0.7357059121131897, "learning_rate": 3.9918035351569903e-07, "loss": 0.8495, "step": 300570 }, { "epoch": 1.9203199468458916, "grad_norm": 0.7509437799453735, "learning_rate": 3.985478281138544e-07, "loss": 0.8773, "step": 300580 }, { "epoch": 1.92038383399563, "grad_norm": 0.8090037107467651, "learning_rate": 3.9791580224153856e-07, "loss": 0.9297, "step": 300590 }, { "epoch": 1.9204477211453688, "grad_norm": 0.6789357662200928, "learning_rate": 3.9728427590512984e-07, "loss": 0.9613, "step": 300600 }, { "epoch": 1.9205116082951075, "grad_norm": 0.990573525428772, "learning_rate": 3.9665324911097866e-07, "loss": 0.7093, "step": 300610 }, { "epoch": 1.9205754954448462, "grad_norm": 0.97950679063797, "learning_rate": 3.960227218654411e-07, "loss": 0.6383, "step": 300620 }, { "epoch": 1.9206393825945849, "grad_norm": 0.8172192573547363, "learning_rate": 3.953926941748676e-07, "loss": 0.8158, "step": 300630 }, { "epoch": 1.9207032697443236, "grad_norm": 1.0370420217514038, "learning_rate": 3.947631660456086e-07, "loss": 0.9763, "step": 300640 }, { "epoch": 1.9207671568940623, "grad_norm": 0.6944184899330139, "learning_rate": 3.941341374839924e-07, "loss": 0.7228, "step": 300650 }, { "epoch": 1.920831044043801, "grad_norm": 1.1591705083847046, "learning_rate": 3.93505608496364e-07, "loss": 0.8656, "step": 300660 }, { "epoch": 1.9208949311935397, "grad_norm": 1.8749659061431885, "learning_rate": 3.9287757908905155e-07, "loss": 1.0307, "step": 300670 }, { "epoch": 1.9209588183432784, "grad_norm": 1.6521705389022827, "learning_rate": 3.9225004926837784e-07, "loss": 0.8042, "step": 300680 }, { "epoch": 1.9210227054930171, "grad_norm": 1.2027360200881958, "learning_rate": 3.9162301904066e-07, "loss": 0.7991, "step": 300690 }, { "epoch": 1.9210865926427558, "grad_norm": 0.7796891927719116, "learning_rate": 3.9099648841221527e-07, "loss": 0.8677, "step": 300700 }, { "epoch": 1.9211504797924945, "grad_norm": 1.0626076459884644, "learning_rate": 3.903704573893552e-07, "loss": 1.1278, "step": 300710 }, { "epoch": 1.9212143669422332, "grad_norm": 0.7469705939292908, "learning_rate": 3.8974492597838586e-07, "loss": 0.7027, "step": 300720 }, { "epoch": 1.921278254091972, "grad_norm": 1.140273928642273, "learning_rate": 3.8911989418560225e-07, "loss": 0.9368, "step": 300730 }, { "epoch": 1.9213421412417107, "grad_norm": 1.0759254693984985, "learning_rate": 3.884953620172993e-07, "loss": 1.0817, "step": 300740 }, { "epoch": 1.9214060283914494, "grad_norm": 0.7694743871688843, "learning_rate": 3.87871329479772e-07, "loss": 0.8649, "step": 300750 }, { "epoch": 1.921469915541188, "grad_norm": 0.9597426652908325, "learning_rate": 3.872477965792931e-07, "loss": 0.9085, "step": 300760 }, { "epoch": 1.9215338026909268, "grad_norm": 1.4340089559555054, "learning_rate": 3.8662476332215757e-07, "loss": 0.9228, "step": 300770 }, { "epoch": 1.9215976898406655, "grad_norm": 0.9579765200614929, "learning_rate": 3.8600222971462706e-07, "loss": 0.6958, "step": 300780 }, { "epoch": 1.9216615769904042, "grad_norm": 1.5325475931167603, "learning_rate": 3.8538019576298546e-07, "loss": 0.8199, "step": 300790 }, { "epoch": 1.921725464140143, "grad_norm": 1.028903603553772, "learning_rate": 3.847586614734833e-07, "loss": 0.8857, "step": 300800 }, { "epoch": 1.9217893512898816, "grad_norm": 0.8717125058174133, "learning_rate": 3.841376268523822e-07, "loss": 0.6886, "step": 300810 }, { "epoch": 1.9218532384396203, "grad_norm": 0.8490310311317444, "learning_rate": 3.8351709190593834e-07, "loss": 0.8223, "step": 300820 }, { "epoch": 1.921917125589359, "grad_norm": 1.1729625463485718, "learning_rate": 3.828970566404022e-07, "loss": 0.7418, "step": 300830 }, { "epoch": 1.9219810127390975, "grad_norm": 0.8890892863273621, "learning_rate": 3.8227752106201887e-07, "loss": 1.0118, "step": 300840 }, { "epoch": 1.9220448998888364, "grad_norm": 2.766529083251953, "learning_rate": 3.816584851770277e-07, "loss": 0.9139, "step": 300850 }, { "epoch": 1.922108787038575, "grad_norm": 0.8185132741928101, "learning_rate": 3.810399489916627e-07, "loss": 0.7931, "step": 300860 }, { "epoch": 1.9221726741883138, "grad_norm": 0.8807799220085144, "learning_rate": 3.8042191251214663e-07, "loss": 0.8877, "step": 300870 }, { "epoch": 1.9222365613380523, "grad_norm": 1.0990222692489624, "learning_rate": 3.7980437574471337e-07, "loss": 0.841, "step": 300880 }, { "epoch": 1.9223004484877912, "grad_norm": 0.7515807747840881, "learning_rate": 3.7918733869557464e-07, "loss": 0.8471, "step": 300890 }, { "epoch": 1.9223643356375297, "grad_norm": 1.0475271940231323, "learning_rate": 3.785708013709477e-07, "loss": 0.8562, "step": 300900 }, { "epoch": 1.9224282227872687, "grad_norm": 0.9003559947013855, "learning_rate": 3.779547637770442e-07, "loss": 0.9027, "step": 300910 }, { "epoch": 1.9224921099370071, "grad_norm": 0.8411440253257751, "learning_rate": 3.773392259200648e-07, "loss": 0.8666, "step": 300920 }, { "epoch": 1.922555997086746, "grad_norm": 1.0167624950408936, "learning_rate": 3.767241878062044e-07, "loss": 0.6216, "step": 300930 }, { "epoch": 1.9226198842364846, "grad_norm": 0.7058795094490051, "learning_rate": 3.761096494416694e-07, "loss": 0.9369, "step": 300940 }, { "epoch": 1.9226837713862235, "grad_norm": 0.8156857490539551, "learning_rate": 3.754956108326324e-07, "loss": 1.1769, "step": 300950 }, { "epoch": 1.922747658535962, "grad_norm": 1.0297104120254517, "learning_rate": 3.748820719852941e-07, "loss": 0.9375, "step": 300960 }, { "epoch": 1.922811545685701, "grad_norm": 1.000810146331787, "learning_rate": 3.742690329058218e-07, "loss": 0.9047, "step": 300970 }, { "epoch": 1.9228754328354394, "grad_norm": 0.8792943954467773, "learning_rate": 3.736564936003939e-07, "loss": 0.7601, "step": 300980 }, { "epoch": 1.9229393199851783, "grad_norm": 0.8752835988998413, "learning_rate": 3.730444540751721e-07, "loss": 0.9696, "step": 300990 }, { "epoch": 1.9230032071349168, "grad_norm": 0.7234314680099487, "learning_rate": 3.7243291433633475e-07, "loss": 0.7312, "step": 301000 }, { "epoch": 1.9230670942846557, "grad_norm": 0.7834398150444031, "learning_rate": 3.7182187439002704e-07, "loss": 0.9055, "step": 301010 }, { "epoch": 1.9231309814343942, "grad_norm": 0.5239536762237549, "learning_rate": 3.712113342424051e-07, "loss": 0.7016, "step": 301020 }, { "epoch": 1.9231948685841331, "grad_norm": 1.0821951627731323, "learning_rate": 3.7060129389962504e-07, "loss": 0.8404, "step": 301030 }, { "epoch": 1.9232587557338716, "grad_norm": 0.8091372847557068, "learning_rate": 3.69991753367821e-07, "loss": 1.1122, "step": 301040 }, { "epoch": 1.9233226428836105, "grad_norm": 0.5722392797470093, "learning_rate": 3.693827126531435e-07, "loss": 1.0596, "step": 301050 }, { "epoch": 1.923386530033349, "grad_norm": 1.005053997039795, "learning_rate": 3.687741717617099e-07, "loss": 0.6983, "step": 301060 }, { "epoch": 1.923450417183088, "grad_norm": 1.0301357507705688, "learning_rate": 3.6816613069966535e-07, "loss": 1.074, "step": 301070 }, { "epoch": 1.9235143043328264, "grad_norm": 0.8463405966758728, "learning_rate": 3.675585894731159e-07, "loss": 0.7987, "step": 301080 }, { "epoch": 1.9235781914825651, "grad_norm": 1.8486971855163574, "learning_rate": 3.669515480882013e-07, "loss": 1.0181, "step": 301090 }, { "epoch": 1.9236420786323039, "grad_norm": 0.6684698462486267, "learning_rate": 3.66345006551011e-07, "loss": 0.7149, "step": 301100 }, { "epoch": 1.9237059657820426, "grad_norm": 0.8282666802406311, "learning_rate": 3.6573896486767344e-07, "loss": 0.7379, "step": 301110 }, { "epoch": 1.9237698529317813, "grad_norm": 0.6827027797698975, "learning_rate": 3.651334230442838e-07, "loss": 0.7027, "step": 301120 }, { "epoch": 1.92383374008152, "grad_norm": 1.1820034980773926, "learning_rate": 3.6452838108694264e-07, "loss": 0.9152, "step": 301130 }, { "epoch": 1.9238976272312587, "grad_norm": 0.8771212697029114, "learning_rate": 3.639238390017341e-07, "loss": 1.0107, "step": 301140 }, { "epoch": 1.9239615143809974, "grad_norm": 1.2758761644363403, "learning_rate": 3.6331979679476435e-07, "loss": 0.8095, "step": 301150 }, { "epoch": 1.924025401530736, "grad_norm": 1.9822547435760498, "learning_rate": 3.627162544720952e-07, "loss": 1.2276, "step": 301160 }, { "epoch": 1.9240892886804748, "grad_norm": 0.8313500285148621, "learning_rate": 3.6211321203982183e-07, "loss": 0.879, "step": 301170 }, { "epoch": 1.9241531758302135, "grad_norm": 1.3029310703277588, "learning_rate": 3.6151066950401155e-07, "loss": 0.7567, "step": 301180 }, { "epoch": 1.9242170629799522, "grad_norm": 1.1256672143936157, "learning_rate": 3.609086268707318e-07, "loss": 0.8885, "step": 301190 }, { "epoch": 1.924280950129691, "grad_norm": 0.8242726922035217, "learning_rate": 3.603070841460443e-07, "loss": 0.8574, "step": 301200 }, { "epoch": 1.9243448372794296, "grad_norm": 0.9224770665168762, "learning_rate": 3.5970604133601095e-07, "loss": 0.7343, "step": 301210 }, { "epoch": 1.9244087244291683, "grad_norm": 1.151964783668518, "learning_rate": 3.591054984466824e-07, "loss": 1.0734, "step": 301220 }, { "epoch": 1.924472611578907, "grad_norm": 1.1502186059951782, "learning_rate": 3.5850545548410387e-07, "loss": 1.0551, "step": 301230 }, { "epoch": 1.9245364987286457, "grad_norm": 0.8960140347480774, "learning_rate": 3.5790591245432603e-07, "loss": 0.8229, "step": 301240 }, { "epoch": 1.9246003858783844, "grad_norm": 0.9007967114448547, "learning_rate": 3.5730686936337744e-07, "loss": 0.7307, "step": 301250 }, { "epoch": 1.9246642730281232, "grad_norm": 1.0921900272369385, "learning_rate": 3.5670832621729766e-07, "loss": 0.8592, "step": 301260 }, { "epoch": 1.9247281601778619, "grad_norm": 1.049098014831543, "learning_rate": 3.5611028302211523e-07, "loss": 0.802, "step": 301270 }, { "epoch": 1.9247920473276006, "grad_norm": 1.1155762672424316, "learning_rate": 3.555127397838476e-07, "loss": 0.7605, "step": 301280 }, { "epoch": 1.9248559344773393, "grad_norm": 0.8949849605560303, "learning_rate": 3.549156965085176e-07, "loss": 0.7584, "step": 301290 }, { "epoch": 1.924919821627078, "grad_norm": 1.0263363122940063, "learning_rate": 3.543191532021317e-07, "loss": 0.8152, "step": 301300 }, { "epoch": 1.9249837087768167, "grad_norm": 1.013625979423523, "learning_rate": 3.537231098707072e-07, "loss": 0.8231, "step": 301310 }, { "epoch": 1.9250475959265554, "grad_norm": 1.2236038446426392, "learning_rate": 3.531275665202338e-07, "loss": 1.0038, "step": 301320 }, { "epoch": 1.9251114830762939, "grad_norm": 1.1396390199661255, "learning_rate": 3.5253252315672337e-07, "loss": 1.084, "step": 301330 }, { "epoch": 1.9251753702260328, "grad_norm": 0.8289651870727539, "learning_rate": 3.5193797978615996e-07, "loss": 0.8872, "step": 301340 }, { "epoch": 1.9252392573757713, "grad_norm": 1.6858197450637817, "learning_rate": 3.5134393641452766e-07, "loss": 0.8278, "step": 301350 }, { "epoch": 1.9253031445255102, "grad_norm": 1.236825942993164, "learning_rate": 3.507503930478162e-07, "loss": 0.8035, "step": 301360 }, { "epoch": 1.9253670316752487, "grad_norm": 1.3187953233718872, "learning_rate": 3.501573496920041e-07, "loss": 0.759, "step": 301370 }, { "epoch": 1.9254309188249876, "grad_norm": 1.0832971334457397, "learning_rate": 3.4956480635305877e-07, "loss": 0.6859, "step": 301380 }, { "epoch": 1.925494805974726, "grad_norm": 0.6571474671363831, "learning_rate": 3.4897276303695324e-07, "loss": 0.9639, "step": 301390 }, { "epoch": 1.925558693124465, "grad_norm": 0.7257376313209534, "learning_rate": 3.483812197496383e-07, "loss": 0.7832, "step": 301400 }, { "epoch": 1.9256225802742035, "grad_norm": 1.053246259689331, "learning_rate": 3.47790176497087e-07, "loss": 0.6743, "step": 301410 }, { "epoch": 1.9256864674239424, "grad_norm": 0.8816425204277039, "learning_rate": 3.4719963328523896e-07, "loss": 1.0447, "step": 301420 }, { "epoch": 1.925750354573681, "grad_norm": 1.0703054666519165, "learning_rate": 3.4660959012005056e-07, "loss": 0.8536, "step": 301430 }, { "epoch": 1.9258142417234199, "grad_norm": 1.726242184638977, "learning_rate": 3.4602004700745594e-07, "loss": 1.0394, "step": 301440 }, { "epoch": 1.9258781288731583, "grad_norm": 1.054643988609314, "learning_rate": 3.4543100395340036e-07, "loss": 0.7895, "step": 301450 }, { "epoch": 1.9259420160228973, "grad_norm": 1.1033471822738647, "learning_rate": 3.448424609638068e-07, "loss": 0.7948, "step": 301460 }, { "epoch": 1.9260059031726358, "grad_norm": 0.784725546836853, "learning_rate": 3.44254418044615e-07, "loss": 0.8194, "step": 301470 }, { "epoch": 1.9260697903223747, "grad_norm": 0.9209342002868652, "learning_rate": 3.436668752017314e-07, "loss": 0.7762, "step": 301480 }, { "epoch": 1.9261336774721132, "grad_norm": 0.9801011085510254, "learning_rate": 3.430798324410844e-07, "loss": 0.903, "step": 301490 }, { "epoch": 1.926197564621852, "grad_norm": 1.1155471801757812, "learning_rate": 3.4249328976858066e-07, "loss": 1.0466, "step": 301500 }, { "epoch": 1.9262614517715906, "grad_norm": 0.9278727769851685, "learning_rate": 3.4190724719013744e-07, "loss": 0.9425, "step": 301510 }, { "epoch": 1.9263253389213295, "grad_norm": 1.0809649229049683, "learning_rate": 3.4132170471163905e-07, "loss": 0.6635, "step": 301520 }, { "epoch": 1.926389226071068, "grad_norm": 2.041011095046997, "learning_rate": 3.407366623389974e-07, "loss": 1.1511, "step": 301530 }, { "epoch": 1.926453113220807, "grad_norm": 1.0981637239456177, "learning_rate": 3.401521200780966e-07, "loss": 0.8078, "step": 301540 }, { "epoch": 1.9265170003705454, "grad_norm": 1.3349926471710205, "learning_rate": 3.3956807793482646e-07, "loss": 1.0045, "step": 301550 }, { "epoch": 1.9265808875202843, "grad_norm": 0.5713462233543396, "learning_rate": 3.3898453591506565e-07, "loss": 0.7818, "step": 301560 }, { "epoch": 1.9266447746700228, "grad_norm": 1.0720760822296143, "learning_rate": 3.3840149402469824e-07, "loss": 0.7939, "step": 301570 }, { "epoch": 1.9267086618197615, "grad_norm": 1.5037866830825806, "learning_rate": 3.378189522695863e-07, "loss": 0.6691, "step": 301580 }, { "epoch": 1.9267725489695002, "grad_norm": 1.3440207242965698, "learning_rate": 3.372369106556028e-07, "loss": 0.7193, "step": 301590 }, { "epoch": 1.926836436119239, "grad_norm": 0.6172181367874146, "learning_rate": 3.366553691886154e-07, "loss": 0.9351, "step": 301600 }, { "epoch": 1.9269003232689776, "grad_norm": 0.747612476348877, "learning_rate": 3.360743278744638e-07, "loss": 0.6419, "step": 301610 }, { "epoch": 1.9269642104187164, "grad_norm": 1.2818886041641235, "learning_rate": 3.3549378671901553e-07, "loss": 0.9875, "step": 301620 }, { "epoch": 1.927028097568455, "grad_norm": 0.7893770337104797, "learning_rate": 3.3491374572810487e-07, "loss": 1.0888, "step": 301630 }, { "epoch": 1.9270919847181938, "grad_norm": 1.241309404373169, "learning_rate": 3.3433420490758263e-07, "loss": 0.7368, "step": 301640 }, { "epoch": 1.9271558718679325, "grad_norm": 0.5212295055389404, "learning_rate": 3.33755164263283e-07, "loss": 0.6227, "step": 301650 }, { "epoch": 1.9272197590176712, "grad_norm": 1.048384428024292, "learning_rate": 3.3317662380103477e-07, "loss": 0.8716, "step": 301660 }, { "epoch": 1.9272836461674099, "grad_norm": 1.3901996612548828, "learning_rate": 3.3259858352666094e-07, "loss": 0.6373, "step": 301670 }, { "epoch": 1.9273475333171486, "grad_norm": 0.9306744933128357, "learning_rate": 3.3202104344599583e-07, "loss": 1.1866, "step": 301680 }, { "epoch": 1.9274114204668873, "grad_norm": 1.039258360862732, "learning_rate": 3.3144400356484585e-07, "loss": 0.8511, "step": 301690 }, { "epoch": 1.927475307616626, "grad_norm": 1.1582444906234741, "learning_rate": 3.30867463889023e-07, "loss": 0.9237, "step": 301700 }, { "epoch": 1.9275391947663647, "grad_norm": 0.918725311756134, "learning_rate": 3.302914244243338e-07, "loss": 0.9144, "step": 301710 }, { "epoch": 1.9276030819161034, "grad_norm": 1.1346672773361206, "learning_rate": 3.297158851765791e-07, "loss": 1.0005, "step": 301720 }, { "epoch": 1.9276669690658421, "grad_norm": 1.0042999982833862, "learning_rate": 3.291408461515599e-07, "loss": 0.6809, "step": 301730 }, { "epoch": 1.9277308562155808, "grad_norm": 1.322226881980896, "learning_rate": 3.2856630735506047e-07, "loss": 1.0392, "step": 301740 }, { "epoch": 1.9277947433653195, "grad_norm": 0.7337998747825623, "learning_rate": 3.2799226879287047e-07, "loss": 0.7134, "step": 301750 }, { "epoch": 1.9278586305150582, "grad_norm": 0.9696584343910217, "learning_rate": 3.274187304707743e-07, "loss": 0.9432, "step": 301760 }, { "epoch": 1.927922517664797, "grad_norm": 1.2278753519058228, "learning_rate": 3.268456923945451e-07, "loss": 0.9087, "step": 301770 }, { "epoch": 1.9279864048145356, "grad_norm": 1.1169925928115845, "learning_rate": 3.2627315456995045e-07, "loss": 0.8745, "step": 301780 }, { "epoch": 1.9280502919642744, "grad_norm": 0.8843469023704529, "learning_rate": 3.25701117002758e-07, "loss": 0.7129, "step": 301790 }, { "epoch": 1.928114179114013, "grad_norm": 1.417089581489563, "learning_rate": 3.251295796987297e-07, "loss": 0.8296, "step": 301800 }, { "epoch": 1.9281780662637518, "grad_norm": 1.1206992864608765, "learning_rate": 3.245585426636222e-07, "loss": 0.7617, "step": 301810 }, { "epoch": 1.9282419534134903, "grad_norm": 0.918014645576477, "learning_rate": 3.2398800590318636e-07, "loss": 0.76, "step": 301820 }, { "epoch": 1.9283058405632292, "grad_norm": 0.888495147228241, "learning_rate": 3.234179694231676e-07, "loss": 0.8968, "step": 301830 }, { "epoch": 1.9283697277129677, "grad_norm": 0.9238024950027466, "learning_rate": 3.228484332293058e-07, "loss": 0.7197, "step": 301840 }, { "epoch": 1.9284336148627066, "grad_norm": 0.8333662152290344, "learning_rate": 3.2227939732733523e-07, "loss": 1.0649, "step": 301850 }, { "epoch": 1.928497502012445, "grad_norm": 0.6979062557220459, "learning_rate": 3.2171086172299025e-07, "loss": 0.7092, "step": 301860 }, { "epoch": 1.928561389162184, "grad_norm": 1.1566264629364014, "learning_rate": 3.21142826421994e-07, "loss": 0.9163, "step": 301870 }, { "epoch": 1.9286252763119225, "grad_norm": 1.1237833499908447, "learning_rate": 3.205752914300697e-07, "loss": 0.9294, "step": 301880 }, { "epoch": 1.9286891634616614, "grad_norm": 1.5225160121917725, "learning_rate": 3.2000825675292387e-07, "loss": 0.8519, "step": 301890 }, { "epoch": 1.9287530506114, "grad_norm": 0.8152609467506409, "learning_rate": 3.194417223962853e-07, "loss": 0.8797, "step": 301900 }, { "epoch": 1.9288169377611388, "grad_norm": 1.680662751197815, "learning_rate": 3.188756883658384e-07, "loss": 0.9777, "step": 301910 }, { "epoch": 1.9288808249108773, "grad_norm": 0.8997313380241394, "learning_rate": 3.1831015466730063e-07, "loss": 0.7692, "step": 301920 }, { "epoch": 1.9289447120606162, "grad_norm": 0.9319784641265869, "learning_rate": 3.177451213063565e-07, "loss": 0.79, "step": 301930 }, { "epoch": 1.9290085992103547, "grad_norm": 1.0063719749450684, "learning_rate": 3.1718058828870133e-07, "loss": 1.0246, "step": 301940 }, { "epoch": 1.9290724863600937, "grad_norm": 1.1086889505386353, "learning_rate": 3.166165556200196e-07, "loss": 0.9792, "step": 301950 }, { "epoch": 1.9291363735098321, "grad_norm": 0.9177554845809937, "learning_rate": 3.160530233059955e-07, "loss": 0.6821, "step": 301960 }, { "epoch": 1.929200260659571, "grad_norm": 0.5704813003540039, "learning_rate": 3.1548999135229127e-07, "loss": 0.8721, "step": 301970 }, { "epoch": 1.9292641478093095, "grad_norm": 0.9116873741149902, "learning_rate": 3.149836904067338e-07, "loss": 1.1118, "step": 301980 }, { "epoch": 1.9293280349590485, "grad_norm": 1.0153589248657227, "learning_rate": 3.144216091532759e-07, "loss": 1.012, "step": 301990 }, { "epoch": 1.929391922108787, "grad_norm": 1.10478937625885, "learning_rate": 3.1386002827657466e-07, "loss": 0.933, "step": 302000 }, { "epoch": 1.929455809258526, "grad_norm": 0.9765149354934692, "learning_rate": 3.132989477822923e-07, "loss": 0.7768, "step": 302010 }, { "epoch": 1.9295196964082644, "grad_norm": 1.0319316387176514, "learning_rate": 3.127383676760687e-07, "loss": 0.7433, "step": 302020 }, { "epoch": 1.9295835835580033, "grad_norm": 0.9615095257759094, "learning_rate": 3.121782879635604e-07, "loss": 0.927, "step": 302030 }, { "epoch": 1.9296474707077418, "grad_norm": 0.7776680588722229, "learning_rate": 3.116187086504019e-07, "loss": 0.9391, "step": 302040 }, { "epoch": 1.9297113578574805, "grad_norm": 0.8813909888267517, "learning_rate": 3.1105962974222745e-07, "loss": 0.9085, "step": 302050 }, { "epoch": 1.9297752450072192, "grad_norm": 0.9444760084152222, "learning_rate": 3.1050105124467154e-07, "loss": 1.0147, "step": 302060 }, { "epoch": 1.929839132156958, "grad_norm": 1.1206434965133667, "learning_rate": 3.099429731633574e-07, "loss": 0.7016, "step": 302070 }, { "epoch": 1.9299030193066966, "grad_norm": 1.0431171655654907, "learning_rate": 3.093853955039028e-07, "loss": 0.8739, "step": 302080 }, { "epoch": 1.9299669064564353, "grad_norm": 1.2197887897491455, "learning_rate": 3.088283182719309e-07, "loss": 0.7848, "step": 302090 }, { "epoch": 1.930030793606174, "grad_norm": 1.1709030866622925, "learning_rate": 3.082717414730429e-07, "loss": 0.6757, "step": 302100 }, { "epoch": 1.9300946807559127, "grad_norm": 1.1273075342178345, "learning_rate": 3.07715665112851e-07, "loss": 1.007, "step": 302110 }, { "epoch": 1.9301585679056514, "grad_norm": 1.6367871761322021, "learning_rate": 3.0716008919695063e-07, "loss": 0.7174, "step": 302120 }, { "epoch": 1.9302224550553901, "grad_norm": 2.8941264152526855, "learning_rate": 3.066050137309373e-07, "loss": 0.8933, "step": 302130 }, { "epoch": 1.9302863422051288, "grad_norm": 0.96756911277771, "learning_rate": 3.060504387204066e-07, "loss": 0.8054, "step": 302140 }, { "epoch": 1.9303502293548676, "grad_norm": 0.7502398490905762, "learning_rate": 3.054963641709374e-07, "loss": 0.9063, "step": 302150 }, { "epoch": 1.9304141165046063, "grad_norm": 1.5427123308181763, "learning_rate": 3.0494279008810856e-07, "loss": 1.2005, "step": 302160 }, { "epoch": 1.930478003654345, "grad_norm": 0.8241047859191895, "learning_rate": 3.043897164774989e-07, "loss": 0.8046, "step": 302170 }, { "epoch": 1.9305418908040837, "grad_norm": 1.026366114616394, "learning_rate": 3.038371433446763e-07, "loss": 0.7106, "step": 302180 }, { "epoch": 1.9306057779538224, "grad_norm": 0.9416015148162842, "learning_rate": 3.0328507069521396e-07, "loss": 0.9412, "step": 302190 }, { "epoch": 1.930669665103561, "grad_norm": 1.4748917818069458, "learning_rate": 3.027334985346575e-07, "loss": 0.7928, "step": 302200 }, { "epoch": 1.9307335522532998, "grad_norm": 0.7493869066238403, "learning_rate": 3.021824268685691e-07, "loss": 1.0479, "step": 302210 }, { "epoch": 1.9307974394030385, "grad_norm": 1.2239211797714233, "learning_rate": 3.016318557025055e-07, "loss": 0.6985, "step": 302220 }, { "epoch": 1.9308613265527772, "grad_norm": 1.395402193069458, "learning_rate": 3.010817850419956e-07, "loss": 0.8747, "step": 302230 }, { "epoch": 1.930925213702516, "grad_norm": 1.5344443321228027, "learning_rate": 3.0053221489259595e-07, "loss": 0.8547, "step": 302240 }, { "epoch": 1.9309891008522546, "grad_norm": 0.7854205965995789, "learning_rate": 2.9998314525983005e-07, "loss": 0.6006, "step": 302250 }, { "epoch": 1.9310529880019933, "grad_norm": 0.9757431149482727, "learning_rate": 2.994345761492268e-07, "loss": 0.8716, "step": 302260 }, { "epoch": 1.931116875151732, "grad_norm": 0.7694618701934814, "learning_rate": 2.9888650756632054e-07, "loss": 0.8134, "step": 302270 }, { "epoch": 1.9311807623014707, "grad_norm": 1.0986371040344238, "learning_rate": 2.9833893951661807e-07, "loss": 0.8574, "step": 302280 }, { "epoch": 1.9312446494512094, "grad_norm": 1.9931775331497192, "learning_rate": 2.9779187200564276e-07, "loss": 0.8072, "step": 302290 }, { "epoch": 1.9313085366009481, "grad_norm": 1.187432885169983, "learning_rate": 2.9724530503890677e-07, "loss": 0.9356, "step": 302300 }, { "epoch": 1.9313724237506866, "grad_norm": 0.7285720705986023, "learning_rate": 2.966992386219059e-07, "loss": 0.716, "step": 302310 }, { "epoch": 1.9314363109004256, "grad_norm": 0.7605849504470825, "learning_rate": 2.9615367276014107e-07, "loss": 0.7563, "step": 302320 }, { "epoch": 1.931500198050164, "grad_norm": 1.7555122375488281, "learning_rate": 2.956086074591136e-07, "loss": 0.8992, "step": 302330 }, { "epoch": 1.931564085199903, "grad_norm": 1.09848952293396, "learning_rate": 2.950640427243023e-07, "loss": 0.9077, "step": 302340 }, { "epoch": 1.9316279723496415, "grad_norm": 0.9788870811462402, "learning_rate": 2.9451997856120294e-07, "loss": 0.9107, "step": 302350 }, { "epoch": 1.9316918594993804, "grad_norm": 1.018257737159729, "learning_rate": 2.9397641497528327e-07, "loss": 0.9396, "step": 302360 }, { "epoch": 1.9317557466491189, "grad_norm": 0.9232105612754822, "learning_rate": 2.9343335197202783e-07, "loss": 0.759, "step": 302370 }, { "epoch": 1.9318196337988578, "grad_norm": 0.5679325461387634, "learning_rate": 2.928907895568989e-07, "loss": 0.8857, "step": 302380 }, { "epoch": 1.9318835209485963, "grad_norm": 1.78494393825531, "learning_rate": 2.9234872773535873e-07, "loss": 0.7721, "step": 302390 }, { "epoch": 1.9319474080983352, "grad_norm": 0.668113112449646, "learning_rate": 2.9180716651287523e-07, "loss": 0.8469, "step": 302400 }, { "epoch": 1.9320112952480737, "grad_norm": 1.0490716695785522, "learning_rate": 2.91266105894894e-07, "loss": 0.9017, "step": 302410 }, { "epoch": 1.9320751823978126, "grad_norm": 0.9475468397140503, "learning_rate": 2.907255458868663e-07, "loss": 0.8416, "step": 302420 }, { "epoch": 1.932139069547551, "grad_norm": 1.1354217529296875, "learning_rate": 2.9018548649424326e-07, "loss": 0.9322, "step": 302430 }, { "epoch": 1.93220295669729, "grad_norm": 2.2949318885803223, "learning_rate": 2.8964592772245393e-07, "loss": 0.8608, "step": 302440 }, { "epoch": 1.9322668438470285, "grad_norm": 0.9771470427513123, "learning_rate": 2.8910686957693277e-07, "loss": 0.7114, "step": 302450 }, { "epoch": 1.9323307309967674, "grad_norm": 0.8451032638549805, "learning_rate": 2.885683120631144e-07, "loss": 0.7742, "step": 302460 }, { "epoch": 1.932394618146506, "grad_norm": 0.6596810221672058, "learning_rate": 2.8803025518642225e-07, "loss": 0.7939, "step": 302470 }, { "epoch": 1.9324585052962449, "grad_norm": 0.5329988598823547, "learning_rate": 2.874926989522686e-07, "loss": 1.0137, "step": 302480 }, { "epoch": 1.9325223924459833, "grad_norm": 0.6956501007080078, "learning_rate": 2.869556433660714e-07, "loss": 1.1259, "step": 302490 }, { "epoch": 1.9325862795957223, "grad_norm": 0.7437456846237183, "learning_rate": 2.864190884332374e-07, "loss": 0.9319, "step": 302500 }, { "epoch": 1.9326501667454608, "grad_norm": 0.9625310301780701, "learning_rate": 2.858830341591734e-07, "loss": 0.9455, "step": 302510 }, { "epoch": 1.9327140538951997, "grad_norm": 0.8072330951690674, "learning_rate": 2.8534748054927505e-07, "loss": 0.8667, "step": 302520 }, { "epoch": 1.9327779410449382, "grad_norm": 1.026102066040039, "learning_rate": 2.848124276089381e-07, "loss": 0.8568, "step": 302530 }, { "epoch": 1.9328418281946769, "grad_norm": 0.8609016537666321, "learning_rate": 2.842778753435471e-07, "loss": 1.0871, "step": 302540 }, { "epoch": 1.9329057153444156, "grad_norm": 0.9094386696815491, "learning_rate": 2.837438237584922e-07, "loss": 0.9309, "step": 302550 }, { "epoch": 1.9329696024941543, "grad_norm": 1.1046336889266968, "learning_rate": 2.8321027285914683e-07, "loss": 0.6946, "step": 302560 }, { "epoch": 1.933033489643893, "grad_norm": 1.0469043254852295, "learning_rate": 2.826772226508845e-07, "loss": 0.9, "step": 302570 }, { "epoch": 1.9330973767936317, "grad_norm": 0.8629121780395508, "learning_rate": 2.821446731390731e-07, "loss": 0.7624, "step": 302580 }, { "epoch": 1.9331612639433704, "grad_norm": 0.920183002948761, "learning_rate": 2.816126243290751e-07, "loss": 0.8727, "step": 302590 }, { "epoch": 1.933225151093109, "grad_norm": 1.0150858163833618, "learning_rate": 2.810810762262528e-07, "loss": 0.9346, "step": 302600 }, { "epoch": 1.9332890382428478, "grad_norm": 0.9224951267242432, "learning_rate": 2.8055002883595747e-07, "loss": 1.0581, "step": 302610 }, { "epoch": 1.9333529253925865, "grad_norm": 0.8653743863105774, "learning_rate": 2.800194821635405e-07, "loss": 0.8454, "step": 302620 }, { "epoch": 1.9334168125423252, "grad_norm": 0.8796379566192627, "learning_rate": 2.7948943621433633e-07, "loss": 0.7263, "step": 302630 }, { "epoch": 1.933480699692064, "grad_norm": 0.786419153213501, "learning_rate": 2.7895989099368524e-07, "loss": 0.8787, "step": 302640 }, { "epoch": 1.9335445868418026, "grad_norm": 0.9109292030334473, "learning_rate": 2.784308465069274e-07, "loss": 1.0634, "step": 302650 }, { "epoch": 1.9336084739915413, "grad_norm": 1.6854918003082275, "learning_rate": 2.779023027593863e-07, "loss": 1.245, "step": 302660 }, { "epoch": 1.93367236114128, "grad_norm": 3.914057970046997, "learning_rate": 2.7737425975638554e-07, "loss": 0.9142, "step": 302670 }, { "epoch": 1.9337362482910188, "grad_norm": 0.8332629799842834, "learning_rate": 2.768467175032374e-07, "loss": 0.9323, "step": 302680 }, { "epoch": 1.9338001354407575, "grad_norm": 1.001237392425537, "learning_rate": 2.7631967600526555e-07, "loss": 0.9254, "step": 302690 }, { "epoch": 1.9338640225904962, "grad_norm": 0.7326177358627319, "learning_rate": 2.7579313526776564e-07, "loss": 0.9705, "step": 302700 }, { "epoch": 1.9339279097402349, "grad_norm": 0.7289140820503235, "learning_rate": 2.752670952960501e-07, "loss": 0.9911, "step": 302710 }, { "epoch": 1.9339917968899736, "grad_norm": 1.3712490797042847, "learning_rate": 2.7474155609540917e-07, "loss": 0.7515, "step": 302720 }, { "epoch": 1.9340556840397123, "grad_norm": 1.1518125534057617, "learning_rate": 2.742165176711442e-07, "loss": 0.9325, "step": 302730 }, { "epoch": 1.934119571189451, "grad_norm": 0.7685813307762146, "learning_rate": 2.7369198002853426e-07, "loss": 0.9072, "step": 302740 }, { "epoch": 1.9341834583391897, "grad_norm": 0.7780716419219971, "learning_rate": 2.7316794317286953e-07, "loss": 0.7143, "step": 302750 }, { "epoch": 1.9342473454889284, "grad_norm": 0.9062319397926331, "learning_rate": 2.7264440710941806e-07, "loss": 0.6679, "step": 302760 }, { "epoch": 1.934311232638667, "grad_norm": 1.183876395225525, "learning_rate": 2.7212137184346453e-07, "loss": 0.8766, "step": 302770 }, { "epoch": 1.9343751197884056, "grad_norm": 1.1930270195007324, "learning_rate": 2.715988373802658e-07, "loss": 0.6851, "step": 302780 }, { "epoch": 1.9344390069381445, "grad_norm": 1.5688190460205078, "learning_rate": 2.7107680372508436e-07, "loss": 1.268, "step": 302790 }, { "epoch": 1.934502894087883, "grad_norm": 0.7877197861671448, "learning_rate": 2.705552708831827e-07, "loss": 0.7911, "step": 302800 }, { "epoch": 1.934566781237622, "grad_norm": 1.4768949747085571, "learning_rate": 2.700342388598176e-07, "loss": 0.8737, "step": 302810 }, { "epoch": 1.9346306683873604, "grad_norm": 0.8407143950462341, "learning_rate": 2.695137076602239e-07, "loss": 0.7297, "step": 302820 }, { "epoch": 1.9346945555370993, "grad_norm": 0.7811848521232605, "learning_rate": 2.6899367728965284e-07, "loss": 0.8857, "step": 302830 }, { "epoch": 1.9347584426868378, "grad_norm": 1.9201631546020508, "learning_rate": 2.684741477533392e-07, "loss": 1.0058, "step": 302840 }, { "epoch": 1.9348223298365768, "grad_norm": 0.9804142713546753, "learning_rate": 2.679551190565122e-07, "loss": 0.9072, "step": 302850 }, { "epoch": 1.9348862169863152, "grad_norm": 0.8214455246925354, "learning_rate": 2.6743659120440635e-07, "loss": 0.7655, "step": 302860 }, { "epoch": 1.9349501041360542, "grad_norm": 0.6904785633087158, "learning_rate": 2.6691856420223424e-07, "loss": 0.9752, "step": 302870 }, { "epoch": 1.9350139912857927, "grad_norm": 0.8468638062477112, "learning_rate": 2.664010380552195e-07, "loss": 0.8078, "step": 302880 }, { "epoch": 1.9350778784355316, "grad_norm": 0.8787335157394409, "learning_rate": 2.658840127685691e-07, "loss": 1.0969, "step": 302890 }, { "epoch": 1.93514176558527, "grad_norm": 0.9245291352272034, "learning_rate": 2.6536748834750103e-07, "loss": 0.8035, "step": 302900 }, { "epoch": 1.935205652735009, "grad_norm": 1.0244308710098267, "learning_rate": 2.648514647972e-07, "loss": 0.7529, "step": 302910 }, { "epoch": 1.9352695398847475, "grad_norm": 0.8991485238075256, "learning_rate": 2.6433594212287303e-07, "loss": 0.8502, "step": 302920 }, { "epoch": 1.9353334270344864, "grad_norm": 0.8746139407157898, "learning_rate": 2.6382092032971593e-07, "loss": 0.8885, "step": 302930 }, { "epoch": 1.935397314184225, "grad_norm": 0.6522064805030823, "learning_rate": 2.633063994229079e-07, "loss": 0.9982, "step": 302940 }, { "epoch": 1.9354612013339638, "grad_norm": 0.745275616645813, "learning_rate": 2.6279237940762813e-07, "loss": 1.1341, "step": 302950 }, { "epoch": 1.9355250884837023, "grad_norm": 0.8822062015533447, "learning_rate": 2.6227886028906136e-07, "loss": 1.1846, "step": 302960 }, { "epoch": 1.9355889756334412, "grad_norm": 1.1190886497497559, "learning_rate": 2.6176584207237563e-07, "loss": 0.9702, "step": 302970 }, { "epoch": 1.9356528627831797, "grad_norm": 0.9726095199584961, "learning_rate": 2.612533247627391e-07, "loss": 0.8938, "step": 302980 }, { "epoch": 1.9357167499329186, "grad_norm": 1.3287533521652222, "learning_rate": 2.6074130836530876e-07, "loss": 1.112, "step": 302990 }, { "epoch": 1.9357806370826571, "grad_norm": 1.12720787525177, "learning_rate": 2.602297928852471e-07, "loss": 0.7258, "step": 303000 }, { "epoch": 1.935844524232396, "grad_norm": 1.0907084941864014, "learning_rate": 2.5971877832769996e-07, "loss": 1.0114, "step": 303010 }, { "epoch": 1.9359084113821345, "grad_norm": 1.0238360166549683, "learning_rate": 2.592082646978189e-07, "loss": 0.7974, "step": 303020 }, { "epoch": 1.9359722985318732, "grad_norm": 1.0993527173995972, "learning_rate": 2.5869825200073863e-07, "loss": 0.8538, "step": 303030 }, { "epoch": 1.936036185681612, "grad_norm": 1.4540520906448364, "learning_rate": 2.5818874024159947e-07, "loss": 0.8249, "step": 303040 }, { "epoch": 1.9361000728313507, "grad_norm": 0.9106485843658447, "learning_rate": 2.576797294255362e-07, "loss": 1.0292, "step": 303050 }, { "epoch": 1.9361639599810894, "grad_norm": 0.9970775246620178, "learning_rate": 2.57171219557667e-07, "loss": 1.1452, "step": 303060 }, { "epoch": 1.936227847130828, "grad_norm": 0.9550371170043945, "learning_rate": 2.5666321064312106e-07, "loss": 0.7977, "step": 303070 }, { "epoch": 1.9362917342805668, "grad_norm": 0.8058514595031738, "learning_rate": 2.561557026870054e-07, "loss": 0.7658, "step": 303080 }, { "epoch": 1.9363556214303055, "grad_norm": 0.6585938334465027, "learning_rate": 2.5564869569444374e-07, "loss": 0.8798, "step": 303090 }, { "epoch": 1.9364195085800442, "grad_norm": 1.362809181213379, "learning_rate": 2.551421896705319e-07, "loss": 0.8086, "step": 303100 }, { "epoch": 1.936483395729783, "grad_norm": 0.8295511603355408, "learning_rate": 2.546361846203715e-07, "loss": 0.9155, "step": 303110 }, { "epoch": 1.9365472828795216, "grad_norm": 0.8398029208183289, "learning_rate": 2.5413068054906395e-07, "loss": 1.1177, "step": 303120 }, { "epoch": 1.9366111700292603, "grad_norm": 1.4834344387054443, "learning_rate": 2.5362567746169407e-07, "loss": 0.9247, "step": 303130 }, { "epoch": 1.936675057178999, "grad_norm": 1.2295825481414795, "learning_rate": 2.5312117536334665e-07, "loss": 0.6938, "step": 303140 }, { "epoch": 1.9367389443287377, "grad_norm": 4.947011947631836, "learning_rate": 2.5261717425911216e-07, "loss": 0.77, "step": 303150 }, { "epoch": 1.9368028314784764, "grad_norm": 1.1070027351379395, "learning_rate": 2.521136741540586e-07, "loss": 0.8208, "step": 303160 }, { "epoch": 1.9368667186282151, "grad_norm": 0.5779455900192261, "learning_rate": 2.5161067505325987e-07, "loss": 1.015, "step": 303170 }, { "epoch": 1.9369306057779538, "grad_norm": 2.129777669906616, "learning_rate": 2.5110817696177847e-07, "loss": 0.7702, "step": 303180 }, { "epoch": 1.9369944929276925, "grad_norm": 1.1007846593856812, "learning_rate": 2.5060617988467705e-07, "loss": 0.8984, "step": 303190 }, { "epoch": 1.9370583800774313, "grad_norm": 2.3898720741271973, "learning_rate": 2.501046838270127e-07, "loss": 0.9611, "step": 303200 }, { "epoch": 1.93712226722717, "grad_norm": 0.7541255354881287, "learning_rate": 2.4960368879383133e-07, "loss": 0.881, "step": 303210 }, { "epoch": 1.9371861543769087, "grad_norm": 0.8008748888969421, "learning_rate": 2.4910319479017895e-07, "loss": 0.9516, "step": 303220 }, { "epoch": 1.9372500415266474, "grad_norm": 1.0474908351898193, "learning_rate": 2.486032018211015e-07, "loss": 0.8205, "step": 303230 }, { "epoch": 1.937313928676386, "grad_norm": 0.9243830442428589, "learning_rate": 2.481037098916339e-07, "loss": 1.1442, "step": 303240 }, { "epoch": 1.9373778158261248, "grad_norm": 1.007006049156189, "learning_rate": 2.4760471900679425e-07, "loss": 0.9138, "step": 303250 }, { "epoch": 1.9374417029758635, "grad_norm": 1.0164581537246704, "learning_rate": 2.471062291716231e-07, "loss": 0.6679, "step": 303260 }, { "epoch": 1.937505590125602, "grad_norm": 0.7017830014228821, "learning_rate": 2.4660824039113295e-07, "loss": 0.6308, "step": 303270 }, { "epoch": 1.937569477275341, "grad_norm": 0.7931995987892151, "learning_rate": 2.4611075267034764e-07, "loss": 0.8849, "step": 303280 }, { "epoch": 1.9376333644250794, "grad_norm": 0.8192692995071411, "learning_rate": 2.4561376601426323e-07, "loss": 0.9391, "step": 303290 }, { "epoch": 1.9376972515748183, "grad_norm": 0.5092934966087341, "learning_rate": 2.451172804278923e-07, "loss": 0.6376, "step": 303300 }, { "epoch": 1.9377611387245568, "grad_norm": 0.9124179482460022, "learning_rate": 2.446212959162364e-07, "loss": 1.1539, "step": 303310 }, { "epoch": 1.9378250258742957, "grad_norm": 0.9786889553070068, "learning_rate": 2.44125812484286e-07, "loss": 0.9836, "step": 303320 }, { "epoch": 1.9378889130240342, "grad_norm": 1.0967962741851807, "learning_rate": 2.4363083013703157e-07, "loss": 0.9354, "step": 303330 }, { "epoch": 1.9379528001737731, "grad_norm": 0.8982738852500916, "learning_rate": 2.431363488794691e-07, "loss": 0.9411, "step": 303340 }, { "epoch": 1.9380166873235116, "grad_norm": 1.1276098489761353, "learning_rate": 2.426423687165613e-07, "loss": 0.8338, "step": 303350 }, { "epoch": 1.9380805744732506, "grad_norm": 0.9050993323326111, "learning_rate": 2.421488896532931e-07, "loss": 1.0627, "step": 303360 }, { "epoch": 1.938144461622989, "grad_norm": 1.407092809677124, "learning_rate": 2.4165591169463266e-07, "loss": 1.0689, "step": 303370 }, { "epoch": 1.938208348772728, "grad_norm": 0.9812001585960388, "learning_rate": 2.4116343484554274e-07, "loss": 0.7703, "step": 303380 }, { "epoch": 1.9382722359224664, "grad_norm": 0.9568415880203247, "learning_rate": 2.4067145911099154e-07, "loss": 0.9541, "step": 303390 }, { "epoch": 1.9383361230722054, "grad_norm": 0.8810369372367859, "learning_rate": 2.401799844959196e-07, "loss": 0.9046, "step": 303400 }, { "epoch": 1.9384000102219439, "grad_norm": 0.6494470834732056, "learning_rate": 2.3968901100528407e-07, "loss": 0.7342, "step": 303410 }, { "epoch": 1.9384638973716828, "grad_norm": 1.4283164739608765, "learning_rate": 2.39198538644031e-07, "loss": 1.0041, "step": 303420 }, { "epoch": 1.9385277845214213, "grad_norm": 0.9517856240272522, "learning_rate": 2.3870856741709526e-07, "loss": 0.6312, "step": 303430 }, { "epoch": 1.9385916716711602, "grad_norm": 0.7215419411659241, "learning_rate": 2.382190973294174e-07, "loss": 0.85, "step": 303440 }, { "epoch": 1.9386555588208987, "grad_norm": 1.9820024967193604, "learning_rate": 2.3773012838592125e-07, "loss": 0.7948, "step": 303450 }, { "epoch": 1.9387194459706376, "grad_norm": 0.7442840337753296, "learning_rate": 2.3724166059153063e-07, "loss": 0.8042, "step": 303460 }, { "epoch": 1.938783333120376, "grad_norm": 1.2596721649169922, "learning_rate": 2.3675369395117496e-07, "loss": 0.548, "step": 303470 }, { "epoch": 1.938847220270115, "grad_norm": 1.1596099138259888, "learning_rate": 2.3626622846975588e-07, "loss": 0.7801, "step": 303480 }, { "epoch": 1.9389111074198535, "grad_norm": 0.6521729826927185, "learning_rate": 2.357792641521861e-07, "loss": 0.7191, "step": 303490 }, { "epoch": 1.9389749945695924, "grad_norm": 1.0326049327850342, "learning_rate": 2.352928010033728e-07, "loss": 0.8572, "step": 303500 }, { "epoch": 1.939038881719331, "grad_norm": 0.9028391242027283, "learning_rate": 2.3480683902821765e-07, "loss": 0.8388, "step": 303510 }, { "epoch": 1.9391027688690696, "grad_norm": 1.4557136297225952, "learning_rate": 2.3432137823160561e-07, "loss": 1.0357, "step": 303520 }, { "epoch": 1.9391666560188083, "grad_norm": 0.7721455693244934, "learning_rate": 2.3383641861843274e-07, "loss": 0.8991, "step": 303530 }, { "epoch": 1.939230543168547, "grad_norm": 1.0463354587554932, "learning_rate": 2.3335196019357853e-07, "loss": 0.8772, "step": 303540 }, { "epoch": 1.9392944303182857, "grad_norm": 0.8078722357749939, "learning_rate": 2.328680029619279e-07, "loss": 0.9063, "step": 303550 }, { "epoch": 1.9393583174680245, "grad_norm": 0.9805110096931458, "learning_rate": 2.3238454692834922e-07, "loss": 0.8548, "step": 303560 }, { "epoch": 1.9394222046177632, "grad_norm": 1.0593864917755127, "learning_rate": 2.319015920977108e-07, "loss": 0.9081, "step": 303570 }, { "epoch": 1.9394860917675019, "grad_norm": 0.895837664604187, "learning_rate": 2.3141913847488094e-07, "loss": 0.6311, "step": 303580 }, { "epoch": 1.9395499789172406, "grad_norm": 0.6257737278938293, "learning_rate": 2.3093718606471693e-07, "loss": 0.8553, "step": 303590 }, { "epoch": 1.9396138660669793, "grad_norm": 1.059502363204956, "learning_rate": 2.3045573487207595e-07, "loss": 1.2677, "step": 303600 }, { "epoch": 1.939677753216718, "grad_norm": 0.9159273505210876, "learning_rate": 2.2997478490179302e-07, "loss": 0.7429, "step": 303610 }, { "epoch": 1.9397416403664567, "grad_norm": 0.8961524367332458, "learning_rate": 2.2949433615873096e-07, "loss": 0.7627, "step": 303620 }, { "epoch": 1.9398055275161954, "grad_norm": 0.7950904965400696, "learning_rate": 2.2901438864771362e-07, "loss": 0.8802, "step": 303630 }, { "epoch": 1.939869414665934, "grad_norm": 1.0109800100326538, "learning_rate": 2.2853494237358163e-07, "loss": 0.9184, "step": 303640 }, { "epoch": 1.9399333018156728, "grad_norm": 0.8042694926261902, "learning_rate": 2.280559973411589e-07, "loss": 0.8039, "step": 303650 }, { "epoch": 1.9399971889654115, "grad_norm": 1.3180112838745117, "learning_rate": 2.2757755355526932e-07, "loss": 0.8882, "step": 303660 }, { "epoch": 1.9400610761151502, "grad_norm": 1.3387608528137207, "learning_rate": 2.2709961102073686e-07, "loss": 0.9035, "step": 303670 }, { "epoch": 1.940124963264889, "grad_norm": 0.9645832180976868, "learning_rate": 2.2662216974236872e-07, "loss": 0.737, "step": 303680 }, { "epoch": 1.9401888504146276, "grad_norm": 1.5381606817245483, "learning_rate": 2.2614522972497775e-07, "loss": 0.9132, "step": 303690 }, { "epoch": 1.9402527375643663, "grad_norm": 1.7082456350326538, "learning_rate": 2.2566879097336567e-07, "loss": 1.0725, "step": 303700 }, { "epoch": 1.940316624714105, "grad_norm": 1.0522699356079102, "learning_rate": 2.251928534923231e-07, "loss": 1.2124, "step": 303710 }, { "epoch": 1.9403805118638437, "grad_norm": 2.351830244064331, "learning_rate": 2.247174172866573e-07, "loss": 0.8393, "step": 303720 }, { "epoch": 1.9404443990135825, "grad_norm": 0.865699291229248, "learning_rate": 2.242424823611422e-07, "loss": 0.8724, "step": 303730 }, { "epoch": 1.9405082861633212, "grad_norm": 0.6963575482368469, "learning_rate": 2.23768048720574e-07, "loss": 0.6325, "step": 303740 }, { "epoch": 1.9405721733130599, "grad_norm": 0.6768919825553894, "learning_rate": 2.2329411636972108e-07, "loss": 0.8137, "step": 303750 }, { "epoch": 1.9406360604627984, "grad_norm": 1.1644431352615356, "learning_rate": 2.2282068531335743e-07, "loss": 0.8637, "step": 303760 }, { "epoch": 1.9406999476125373, "grad_norm": 0.8119156360626221, "learning_rate": 2.2234775555625698e-07, "loss": 0.8328, "step": 303770 }, { "epoch": 1.9407638347622758, "grad_norm": 1.0110023021697998, "learning_rate": 2.2187532710317705e-07, "loss": 0.9217, "step": 303780 }, { "epoch": 1.9408277219120147, "grad_norm": 0.6955659985542297, "learning_rate": 2.2140339995887494e-07, "loss": 0.8405, "step": 303790 }, { "epoch": 1.9408916090617532, "grad_norm": 0.9101753830909729, "learning_rate": 2.2093197412810796e-07, "loss": 0.9248, "step": 303800 }, { "epoch": 1.940955496211492, "grad_norm": 0.7637187242507935, "learning_rate": 2.2046104961561677e-07, "loss": 0.9431, "step": 303810 }, { "epoch": 1.9410193833612306, "grad_norm": 1.151958703994751, "learning_rate": 2.199906264261531e-07, "loss": 1.0865, "step": 303820 }, { "epoch": 1.9410832705109695, "grad_norm": 0.9793457984924316, "learning_rate": 2.195207045644465e-07, "loss": 1.0678, "step": 303830 }, { "epoch": 1.941147157660708, "grad_norm": 0.9088485836982727, "learning_rate": 2.1905128403523212e-07, "loss": 0.953, "step": 303840 }, { "epoch": 1.941211044810447, "grad_norm": 1.3817073106765747, "learning_rate": 2.18582364843245e-07, "loss": 0.8613, "step": 303850 }, { "epoch": 1.9412749319601854, "grad_norm": 0.7287924289703369, "learning_rate": 2.1811394699319254e-07, "loss": 0.951, "step": 303860 }, { "epoch": 1.9413388191099243, "grad_norm": 2.202221393585205, "learning_rate": 2.1764603048980426e-07, "loss": 0.6123, "step": 303870 }, { "epoch": 1.9414027062596628, "grad_norm": 0.8882316946983337, "learning_rate": 2.171786153377875e-07, "loss": 0.9085, "step": 303880 }, { "epoch": 1.9414665934094018, "grad_norm": 1.4518402814865112, "learning_rate": 2.167117015418496e-07, "loss": 0.7472, "step": 303890 }, { "epoch": 1.9415304805591402, "grad_norm": 1.1848163604736328, "learning_rate": 2.162452891066924e-07, "loss": 1.2847, "step": 303900 }, { "epoch": 1.9415943677088792, "grad_norm": 1.130953073501587, "learning_rate": 2.157793780370121e-07, "loss": 0.8192, "step": 303910 }, { "epoch": 1.9416582548586176, "grad_norm": 0.7800238132476807, "learning_rate": 2.153139683375105e-07, "loss": 0.517, "step": 303920 }, { "epoch": 1.9417221420083566, "grad_norm": 1.3722091913223267, "learning_rate": 2.1484906001286164e-07, "loss": 0.8937, "step": 303930 }, { "epoch": 1.941786029158095, "grad_norm": 0.8849961161613464, "learning_rate": 2.1438465306775624e-07, "loss": 0.6877, "step": 303940 }, { "epoch": 1.941849916307834, "grad_norm": 1.1050699949264526, "learning_rate": 2.1392074750686275e-07, "loss": 0.7989, "step": 303950 }, { "epoch": 1.9419138034575725, "grad_norm": 1.2571942806243896, "learning_rate": 2.134573433348608e-07, "loss": 0.7086, "step": 303960 }, { "epoch": 1.9419776906073114, "grad_norm": 0.7900700569152832, "learning_rate": 2.1299444055641882e-07, "loss": 0.681, "step": 303970 }, { "epoch": 1.9420415777570499, "grad_norm": 0.8653547763824463, "learning_rate": 2.125320391761887e-07, "loss": 0.874, "step": 303980 }, { "epoch": 1.9421054649067888, "grad_norm": 1.1003780364990234, "learning_rate": 2.120701391988389e-07, "loss": 0.8899, "step": 303990 }, { "epoch": 1.9421693520565273, "grad_norm": 0.8403931260108948, "learning_rate": 2.1160874062901014e-07, "loss": 0.9496, "step": 304000 }, { "epoch": 1.942233239206266, "grad_norm": 1.0029411315917969, "learning_rate": 2.1114784347135985e-07, "loss": 0.6715, "step": 304010 }, { "epoch": 1.9422971263560047, "grad_norm": 0.5501324534416199, "learning_rate": 2.106874477305232e-07, "loss": 0.6133, "step": 304020 }, { "epoch": 1.9423610135057434, "grad_norm": 1.3969063758850098, "learning_rate": 2.1022755341112977e-07, "loss": 0.7371, "step": 304030 }, { "epoch": 1.9424249006554821, "grad_norm": 1.2871463298797607, "learning_rate": 2.0976816051783142e-07, "loss": 0.7898, "step": 304040 }, { "epoch": 1.9424887878052208, "grad_norm": 0.9517881274223328, "learning_rate": 2.0930926905523564e-07, "loss": 0.8429, "step": 304050 }, { "epoch": 1.9425526749549595, "grad_norm": 0.9025202989578247, "learning_rate": 2.0885087902797195e-07, "loss": 1.0197, "step": 304060 }, { "epoch": 1.9426165621046982, "grad_norm": 1.7701680660247803, "learning_rate": 2.083929904406534e-07, "loss": 0.8149, "step": 304070 }, { "epoch": 1.942680449254437, "grad_norm": 1.1089845895767212, "learning_rate": 2.0793560329789297e-07, "loss": 0.6883, "step": 304080 }, { "epoch": 1.9427443364041757, "grad_norm": 1.1055128574371338, "learning_rate": 2.0747871760429803e-07, "loss": 0.7522, "step": 304090 }, { "epoch": 1.9428082235539144, "grad_norm": 1.0907093286514282, "learning_rate": 2.0702233336447052e-07, "loss": 0.9392, "step": 304100 }, { "epoch": 1.942872110703653, "grad_norm": 0.8361429572105408, "learning_rate": 2.0656645058300673e-07, "loss": 0.9274, "step": 304110 }, { "epoch": 1.9429359978533918, "grad_norm": 1.088329553604126, "learning_rate": 2.0611106926449186e-07, "loss": 0.8, "step": 304120 }, { "epoch": 1.9429998850031305, "grad_norm": 3.630836009979248, "learning_rate": 2.0565618941352228e-07, "loss": 1.1398, "step": 304130 }, { "epoch": 1.9430637721528692, "grad_norm": 0.8681971430778503, "learning_rate": 2.052018110346665e-07, "loss": 0.7294, "step": 304140 }, { "epoch": 1.943127659302608, "grad_norm": 0.7005565762519836, "learning_rate": 2.047479341325098e-07, "loss": 1.0169, "step": 304150 }, { "epoch": 1.9431915464523466, "grad_norm": 1.0016130208969116, "learning_rate": 2.0429455871162073e-07, "loss": 0.8592, "step": 304160 }, { "epoch": 1.9432554336020853, "grad_norm": 1.0213990211486816, "learning_rate": 2.0384168477656785e-07, "loss": 0.6729, "step": 304170 }, { "epoch": 1.943319320751824, "grad_norm": 1.0723354816436768, "learning_rate": 2.0338931233190305e-07, "loss": 1.1405, "step": 304180 }, { "epoch": 1.9433832079015627, "grad_norm": 0.6627349853515625, "learning_rate": 2.0293744138219495e-07, "loss": 0.7205, "step": 304190 }, { "epoch": 1.9434470950513014, "grad_norm": 0.9345703125, "learning_rate": 2.0248607193197878e-07, "loss": 0.7871, "step": 304200 }, { "epoch": 1.9435109822010401, "grad_norm": 0.9300385117530823, "learning_rate": 2.0203520398581754e-07, "loss": 0.9794, "step": 304210 }, { "epoch": 1.9435748693507788, "grad_norm": 1.2261062860488892, "learning_rate": 2.0158483754824097e-07, "loss": 0.9665, "step": 304220 }, { "epoch": 1.9436387565005175, "grad_norm": 0.9771540760993958, "learning_rate": 2.0113497262378432e-07, "loss": 1.0119, "step": 304230 }, { "epoch": 1.9437026436502562, "grad_norm": 1.694318413734436, "learning_rate": 2.0068560921697732e-07, "loss": 0.8907, "step": 304240 }, { "epoch": 1.9437665307999947, "grad_norm": 1.1793835163116455, "learning_rate": 2.002367473323552e-07, "loss": 0.6662, "step": 304250 }, { "epoch": 1.9438304179497337, "grad_norm": 1.1031192541122437, "learning_rate": 1.9978838697443103e-07, "loss": 0.8654, "step": 304260 }, { "epoch": 1.9438943050994721, "grad_norm": 1.7324854135513306, "learning_rate": 1.9934052814771785e-07, "loss": 1.0354, "step": 304270 }, { "epoch": 1.943958192249211, "grad_norm": 1.046745777130127, "learning_rate": 1.9889317085673432e-07, "loss": 0.7834, "step": 304280 }, { "epoch": 1.9440220793989496, "grad_norm": 1.2141883373260498, "learning_rate": 1.9844631510597677e-07, "loss": 0.916, "step": 304290 }, { "epoch": 1.9440859665486885, "grad_norm": 1.0867642164230347, "learning_rate": 1.9799996089994721e-07, "loss": 0.9096, "step": 304300 }, { "epoch": 1.944149853698427, "grad_norm": 0.9960625171661377, "learning_rate": 1.9755410824314758e-07, "loss": 0.7334, "step": 304310 }, { "epoch": 1.944213740848166, "grad_norm": 0.8736566305160522, "learning_rate": 1.9710875714006316e-07, "loss": 0.9111, "step": 304320 }, { "epoch": 1.9442776279979044, "grad_norm": 1.8254917860031128, "learning_rate": 1.9666390759517927e-07, "loss": 0.9085, "step": 304330 }, { "epoch": 1.9443415151476433, "grad_norm": 0.595846951007843, "learning_rate": 1.9621955961297568e-07, "loss": 0.8852, "step": 304340 }, { "epoch": 1.9444054022973818, "grad_norm": 1.2150890827178955, "learning_rate": 1.9577571319792098e-07, "loss": 1.0281, "step": 304350 }, { "epoch": 1.9444692894471207, "grad_norm": 1.0095974206924438, "learning_rate": 1.9533236835450052e-07, "loss": 0.8541, "step": 304360 }, { "epoch": 1.9445331765968592, "grad_norm": 1.1392488479614258, "learning_rate": 1.948895250871663e-07, "loss": 0.7374, "step": 304370 }, { "epoch": 1.9445970637465981, "grad_norm": 1.0379918813705444, "learning_rate": 1.9444718340038138e-07, "loss": 0.8847, "step": 304380 }, { "epoch": 1.9446609508963366, "grad_norm": 1.0987335443496704, "learning_rate": 1.940053432986033e-07, "loss": 0.7846, "step": 304390 }, { "epoch": 1.9447248380460755, "grad_norm": 1.033288598060608, "learning_rate": 1.9356400478627857e-07, "loss": 0.8308, "step": 304400 }, { "epoch": 1.944788725195814, "grad_norm": 1.2702211141586304, "learning_rate": 1.93123167867848e-07, "loss": 0.8131, "step": 304410 }, { "epoch": 1.944852612345553, "grad_norm": 0.7985353469848633, "learning_rate": 1.9268283254776364e-07, "loss": 0.7777, "step": 304420 }, { "epoch": 1.9449164994952914, "grad_norm": 1.0405573844909668, "learning_rate": 1.922429988304497e-07, "loss": 0.8938, "step": 304430 }, { "epoch": 1.9449803866450304, "grad_norm": 0.81767737865448, "learning_rate": 1.91803666720336e-07, "loss": 0.6554, "step": 304440 }, { "epoch": 1.9450442737947689, "grad_norm": 0.8658244609832764, "learning_rate": 1.9136483622185787e-07, "loss": 0.7811, "step": 304450 }, { "epoch": 1.9451081609445078, "grad_norm": 0.8948516249656677, "learning_rate": 1.909265073394173e-07, "loss": 0.8768, "step": 304460 }, { "epoch": 1.9451720480942463, "grad_norm": 0.7505782246589661, "learning_rate": 1.9048868007744413e-07, "loss": 0.8596, "step": 304470 }, { "epoch": 1.945235935243985, "grad_norm": 0.6966861486434937, "learning_rate": 1.900513544403404e-07, "loss": 0.7792, "step": 304480 }, { "epoch": 1.9452998223937237, "grad_norm": 0.9721227288246155, "learning_rate": 1.896145304325081e-07, "loss": 0.709, "step": 304490 }, { "epoch": 1.9453637095434624, "grad_norm": 0.7984893918037415, "learning_rate": 1.8917820805834928e-07, "loss": 1.1086, "step": 304500 }, { "epoch": 1.945427596693201, "grad_norm": 1.087112307548523, "learning_rate": 1.8874238732226047e-07, "loss": 1.0401, "step": 304510 }, { "epoch": 1.9454914838429398, "grad_norm": 0.9860308766365051, "learning_rate": 1.8830706822863254e-07, "loss": 1.1688, "step": 304520 }, { "epoch": 1.9455553709926785, "grad_norm": 2.163724899291992, "learning_rate": 1.878722507818398e-07, "loss": 0.8776, "step": 304530 }, { "epoch": 1.9456192581424172, "grad_norm": 1.2752691507339478, "learning_rate": 1.874379349862676e-07, "loss": 0.9181, "step": 304540 }, { "epoch": 1.945683145292156, "grad_norm": 1.5012123584747314, "learning_rate": 1.8700412084629027e-07, "loss": 0.9558, "step": 304550 }, { "epoch": 1.9457470324418946, "grad_norm": 0.8935086727142334, "learning_rate": 1.865708083662765e-07, "loss": 1.0123, "step": 304560 }, { "epoch": 1.9458109195916333, "grad_norm": 0.936139702796936, "learning_rate": 1.8613799755058948e-07, "loss": 1.0713, "step": 304570 }, { "epoch": 1.945874806741372, "grad_norm": 1.1493687629699707, "learning_rate": 1.857056884035868e-07, "loss": 0.9304, "step": 304580 }, { "epoch": 1.9459386938911107, "grad_norm": 1.3518346548080444, "learning_rate": 1.8527388092962616e-07, "loss": 0.764, "step": 304590 }, { "epoch": 1.9460025810408494, "grad_norm": 1.3774468898773193, "learning_rate": 1.8484257513305403e-07, "loss": 0.9028, "step": 304600 }, { "epoch": 1.9460664681905882, "grad_norm": 0.7998141050338745, "learning_rate": 1.8441177101821138e-07, "loss": 0.813, "step": 304610 }, { "epoch": 1.9461303553403269, "grad_norm": 0.8194617033004761, "learning_rate": 1.839814685894392e-07, "loss": 0.7935, "step": 304620 }, { "epoch": 1.9461942424900656, "grad_norm": 0.8891813158988953, "learning_rate": 1.8355166785106738e-07, "loss": 0.742, "step": 304630 }, { "epoch": 1.9462581296398043, "grad_norm": 1.0683954954147339, "learning_rate": 1.8312236880743127e-07, "loss": 0.7811, "step": 304640 }, { "epoch": 1.946322016789543, "grad_norm": 0.6153748631477356, "learning_rate": 1.826935714628497e-07, "loss": 0.9762, "step": 304650 }, { "epoch": 1.9463859039392817, "grad_norm": 0.7627272605895996, "learning_rate": 1.8226527582164143e-07, "loss": 0.8678, "step": 304660 }, { "epoch": 1.9464497910890204, "grad_norm": 1.9040825366973877, "learning_rate": 1.8183748188811967e-07, "loss": 1.0886, "step": 304670 }, { "epoch": 1.946513678238759, "grad_norm": 0.7414036393165588, "learning_rate": 1.8141018966659206e-07, "loss": 0.8223, "step": 304680 }, { "epoch": 1.9465775653884978, "grad_norm": 0.9439243674278259, "learning_rate": 1.809833991613663e-07, "loss": 0.7223, "step": 304690 }, { "epoch": 1.9466414525382365, "grad_norm": 1.765831470489502, "learning_rate": 1.8055711037673894e-07, "loss": 0.9139, "step": 304700 }, { "epoch": 1.9467053396879752, "grad_norm": 0.9545259475708008, "learning_rate": 1.8013132331699546e-07, "loss": 0.8998, "step": 304710 }, { "epoch": 1.946769226837714, "grad_norm": 0.9046018123626709, "learning_rate": 1.7970603798643237e-07, "loss": 0.6941, "step": 304720 }, { "epoch": 1.9468331139874526, "grad_norm": 0.7828860282897949, "learning_rate": 1.792812543893352e-07, "loss": 0.9326, "step": 304730 }, { "epoch": 1.946897001137191, "grad_norm": 1.1395444869995117, "learning_rate": 1.788569725299727e-07, "loss": 0.8888, "step": 304740 }, { "epoch": 1.94696088828693, "grad_norm": 1.9494872093200684, "learning_rate": 1.7843319241261924e-07, "loss": 1.1222, "step": 304750 }, { "epoch": 1.9470247754366685, "grad_norm": 1.2636685371398926, "learning_rate": 1.780099140415492e-07, "loss": 0.8966, "step": 304760 }, { "epoch": 1.9470886625864074, "grad_norm": 0.903266191482544, "learning_rate": 1.775871374210203e-07, "loss": 0.8379, "step": 304770 }, { "epoch": 1.947152549736146, "grad_norm": 1.4494318962097168, "learning_rate": 1.771648625552902e-07, "loss": 0.8868, "step": 304780 }, { "epoch": 1.9472164368858849, "grad_norm": 0.7543337941169739, "learning_rate": 1.7674308944861106e-07, "loss": 0.6507, "step": 304790 }, { "epoch": 1.9472803240356233, "grad_norm": 0.6446106433868408, "learning_rate": 1.7632181810524062e-07, "loss": 0.9346, "step": 304800 }, { "epoch": 1.9473442111853623, "grad_norm": 0.9636729955673218, "learning_rate": 1.7590104852940326e-07, "loss": 0.8048, "step": 304810 }, { "epoch": 1.9474080983351008, "grad_norm": 0.928029477596283, "learning_rate": 1.7548078072535114e-07, "loss": 0.9762, "step": 304820 }, { "epoch": 1.9474719854848397, "grad_norm": 2.0930633544921875, "learning_rate": 1.7506101469731416e-07, "loss": 0.6671, "step": 304830 }, { "epoch": 1.9475358726345782, "grad_norm": 1.1717947721481323, "learning_rate": 1.7464175044951126e-07, "loss": 0.9609, "step": 304840 }, { "epoch": 1.947599759784317, "grad_norm": 0.8623785376548767, "learning_rate": 1.7422298798617787e-07, "loss": 0.8877, "step": 304850 }, { "epoch": 1.9476636469340556, "grad_norm": 0.6183810830116272, "learning_rate": 1.7380472731152175e-07, "loss": 0.7253, "step": 304860 }, { "epoch": 1.9477275340837945, "grad_norm": 0.8609592318534851, "learning_rate": 1.733869684297562e-07, "loss": 0.9726, "step": 304870 }, { "epoch": 1.947791421233533, "grad_norm": 0.8909668326377869, "learning_rate": 1.7296971134508898e-07, "loss": 1.0667, "step": 304880 }, { "epoch": 1.947855308383272, "grad_norm": 1.0848865509033203, "learning_rate": 1.7255295606172784e-07, "loss": 0.8502, "step": 304890 }, { "epoch": 1.9479191955330104, "grad_norm": 0.9160458445549011, "learning_rate": 1.7213670258386384e-07, "loss": 0.8162, "step": 304900 }, { "epoch": 1.9479830826827493, "grad_norm": 1.1018562316894531, "learning_rate": 1.7172095091568807e-07, "loss": 0.8825, "step": 304910 }, { "epoch": 1.9480469698324878, "grad_norm": 2.1245996952056885, "learning_rate": 1.7130570106139166e-07, "loss": 0.8319, "step": 304920 }, { "epoch": 1.9481108569822267, "grad_norm": 3.173818588256836, "learning_rate": 1.7089095302515456e-07, "loss": 0.8999, "step": 304930 }, { "epoch": 1.9481747441319652, "grad_norm": 0.758355975151062, "learning_rate": 1.7047670681115125e-07, "loss": 0.8667, "step": 304940 }, { "epoch": 1.9482386312817042, "grad_norm": 0.6632780432701111, "learning_rate": 1.7006296242355613e-07, "loss": 0.6416, "step": 304950 }, { "epoch": 1.9483025184314426, "grad_norm": 0.8830016851425171, "learning_rate": 1.6964971986654366e-07, "loss": 0.8193, "step": 304960 }, { "epoch": 1.9483664055811813, "grad_norm": 1.0977959632873535, "learning_rate": 1.692369791442605e-07, "loss": 0.867, "step": 304970 }, { "epoch": 1.94843029273092, "grad_norm": 1.4547436237335205, "learning_rate": 1.6882474026087557e-07, "loss": 0.7399, "step": 304980 }, { "epoch": 1.9484941798806588, "grad_norm": 0.8041918277740479, "learning_rate": 1.6841300322053e-07, "loss": 0.9664, "step": 304990 }, { "epoch": 1.9485580670303975, "grad_norm": 0.667141318321228, "learning_rate": 1.6800176802738153e-07, "loss": 0.8188, "step": 305000 }, { "epoch": 1.9486219541801362, "grad_norm": 0.9592339396476746, "learning_rate": 1.6759103468556025e-07, "loss": 0.8442, "step": 305010 }, { "epoch": 1.9486858413298749, "grad_norm": 0.9630610942840576, "learning_rate": 1.671808031992128e-07, "loss": 0.8916, "step": 305020 }, { "epoch": 1.9487497284796136, "grad_norm": 5.386287689208984, "learning_rate": 1.6677107357246368e-07, "loss": 0.7866, "step": 305030 }, { "epoch": 1.9488136156293523, "grad_norm": 1.0215415954589844, "learning_rate": 1.663618458094429e-07, "loss": 0.8229, "step": 305040 }, { "epoch": 1.948877502779091, "grad_norm": 5.606192588806152, "learning_rate": 1.6595311991426943e-07, "loss": 1.0394, "step": 305050 }, { "epoch": 1.9489413899288297, "grad_norm": 1.9907209873199463, "learning_rate": 1.655448958910677e-07, "loss": 1.229, "step": 305060 }, { "epoch": 1.9490052770785684, "grad_norm": 0.9894927740097046, "learning_rate": 1.6513717374393445e-07, "loss": 0.9943, "step": 305070 }, { "epoch": 1.9490691642283071, "grad_norm": 0.8075218796730042, "learning_rate": 1.6472995347698305e-07, "loss": 0.7627, "step": 305080 }, { "epoch": 1.9491330513780458, "grad_norm": 1.380251169204712, "learning_rate": 1.643232350943158e-07, "loss": 0.8527, "step": 305090 }, { "epoch": 1.9491969385277845, "grad_norm": 0.6336302161216736, "learning_rate": 1.639170186000294e-07, "loss": 0.5664, "step": 305100 }, { "epoch": 1.9492608256775232, "grad_norm": 1.226217269897461, "learning_rate": 1.6351130399820946e-07, "loss": 0.8283, "step": 305110 }, { "epoch": 1.949324712827262, "grad_norm": 1.1361498832702637, "learning_rate": 1.6310609129294718e-07, "loss": 1.1582, "step": 305120 }, { "epoch": 1.9493885999770006, "grad_norm": 0.9070385694503784, "learning_rate": 1.6270138048832262e-07, "loss": 0.921, "step": 305130 }, { "epoch": 1.9494524871267394, "grad_norm": 2.407209634780884, "learning_rate": 1.6229717158841028e-07, "loss": 0.8725, "step": 305140 }, { "epoch": 1.949516374276478, "grad_norm": 0.6049879193305969, "learning_rate": 1.6189346459727916e-07, "loss": 0.817, "step": 305150 }, { "epoch": 1.9495802614262168, "grad_norm": 0.8451147079467773, "learning_rate": 1.6149025951899822e-07, "loss": 0.8979, "step": 305160 }, { "epoch": 1.9496441485759555, "grad_norm": 0.7237834930419922, "learning_rate": 1.6108755635763083e-07, "loss": 0.638, "step": 305170 }, { "epoch": 1.9497080357256942, "grad_norm": 0.8538950085639954, "learning_rate": 1.6068535511722383e-07, "loss": 0.7442, "step": 305180 }, { "epoch": 1.9497719228754329, "grad_norm": 1.0325095653533936, "learning_rate": 1.6028365580183503e-07, "loss": 0.751, "step": 305190 }, { "epoch": 1.9498358100251716, "grad_norm": 1.1748923063278198, "learning_rate": 1.5988245841550566e-07, "loss": 1.0115, "step": 305200 }, { "epoch": 1.94989969717491, "grad_norm": 0.9139209389686584, "learning_rate": 1.594817629622769e-07, "loss": 0.8674, "step": 305210 }, { "epoch": 1.949963584324649, "grad_norm": 0.9411085844039917, "learning_rate": 1.5908156944618447e-07, "loss": 0.8427, "step": 305220 }, { "epoch": 1.9500274714743875, "grad_norm": 1.0001469850540161, "learning_rate": 1.5872182444128803e-07, "loss": 0.8739, "step": 305230 }, { "epoch": 1.9500913586241264, "grad_norm": 0.7743364572525024, "learning_rate": 1.5832258461685456e-07, "loss": 0.6349, "step": 305240 }, { "epoch": 1.950155245773865, "grad_norm": 1.0801235437393188, "learning_rate": 1.5792384674123695e-07, "loss": 0.9892, "step": 305250 }, { "epoch": 1.9502191329236038, "grad_norm": 1.2719773054122925, "learning_rate": 1.575256108184431e-07, "loss": 0.8488, "step": 305260 }, { "epoch": 1.9502830200733423, "grad_norm": 0.8136735558509827, "learning_rate": 1.5712787685248088e-07, "loss": 1.0387, "step": 305270 }, { "epoch": 1.9503469072230812, "grad_norm": 1.0452874898910522, "learning_rate": 1.5673064484736933e-07, "loss": 0.8763, "step": 305280 }, { "epoch": 1.9504107943728197, "grad_norm": 0.9515146613121033, "learning_rate": 1.5633391480709413e-07, "loss": 0.673, "step": 305290 }, { "epoch": 1.9504746815225587, "grad_norm": 0.5954400897026062, "learning_rate": 1.5593768673566323e-07, "loss": 0.7687, "step": 305300 }, { "epoch": 1.9505385686722971, "grad_norm": 0.7953042387962341, "learning_rate": 1.5554196063705673e-07, "loss": 0.7698, "step": 305310 }, { "epoch": 1.950602455822036, "grad_norm": 0.829258143901825, "learning_rate": 1.551467365152659e-07, "loss": 0.8873, "step": 305320 }, { "epoch": 1.9506663429717745, "grad_norm": 0.837475597858429, "learning_rate": 1.5475201437427088e-07, "loss": 0.7546, "step": 305330 }, { "epoch": 1.9507302301215135, "grad_norm": 1.1831344366073608, "learning_rate": 1.543577942180463e-07, "loss": 0.9384, "step": 305340 }, { "epoch": 1.950794117271252, "grad_norm": 0.9250369668006897, "learning_rate": 1.5396407605055564e-07, "loss": 0.8128, "step": 305350 }, { "epoch": 1.9508580044209909, "grad_norm": 1.4125860929489136, "learning_rate": 1.5357085987577347e-07, "loss": 0.7333, "step": 305360 }, { "epoch": 1.9509218915707294, "grad_norm": 1.054139494895935, "learning_rate": 1.5317814569765775e-07, "loss": 0.8228, "step": 305370 }, { "epoch": 1.9509857787204683, "grad_norm": 0.9782888889312744, "learning_rate": 1.5278593352015536e-07, "loss": 0.9152, "step": 305380 }, { "epoch": 1.9510496658702068, "grad_norm": 1.1666638851165771, "learning_rate": 1.5239422334722974e-07, "loss": 0.6384, "step": 305390 }, { "epoch": 1.9511135530199457, "grad_norm": 1.5675245523452759, "learning_rate": 1.5200301518281667e-07, "loss": 1.0474, "step": 305400 }, { "epoch": 1.9511774401696842, "grad_norm": 0.962260365486145, "learning_rate": 1.5161230903085188e-07, "loss": 0.8123, "step": 305410 }, { "epoch": 1.9512413273194231, "grad_norm": 1.3214889764785767, "learning_rate": 1.512221048952822e-07, "loss": 0.9154, "step": 305420 }, { "epoch": 1.9513052144691616, "grad_norm": 1.0269955396652222, "learning_rate": 1.508324027800323e-07, "loss": 1.0752, "step": 305430 }, { "epoch": 1.9513691016189005, "grad_norm": 1.121466875076294, "learning_rate": 1.5044320268902124e-07, "loss": 0.9382, "step": 305440 }, { "epoch": 1.951432988768639, "grad_norm": 1.2383556365966797, "learning_rate": 1.500545046261792e-07, "loss": 0.9561, "step": 305450 }, { "epoch": 1.9514968759183777, "grad_norm": 0.8118842840194702, "learning_rate": 1.4966630859540865e-07, "loss": 0.7875, "step": 305460 }, { "epoch": 1.9515607630681164, "grad_norm": 2.2885642051696777, "learning_rate": 1.4927861460062865e-07, "loss": 1.2945, "step": 305470 }, { "epoch": 1.9516246502178551, "grad_norm": 1.0705455541610718, "learning_rate": 1.4889142264573607e-07, "loss": 1.124, "step": 305480 }, { "epoch": 1.9516885373675938, "grad_norm": 0.8999394178390503, "learning_rate": 1.4850473273463895e-07, "loss": 0.8277, "step": 305490 }, { "epoch": 1.9517524245173326, "grad_norm": 0.9403774738311768, "learning_rate": 1.48118544871223e-07, "loss": 1.1041, "step": 305500 }, { "epoch": 1.9518163116670713, "grad_norm": 1.0024014711380005, "learning_rate": 1.4773285905938517e-07, "loss": 0.8263, "step": 305510 }, { "epoch": 1.95188019881681, "grad_norm": 0.8567982316017151, "learning_rate": 1.4734767530300564e-07, "loss": 0.9281, "step": 305520 }, { "epoch": 1.9519440859665487, "grad_norm": 1.4160600900650024, "learning_rate": 1.469629936059591e-07, "loss": 1.1148, "step": 305530 }, { "epoch": 1.9520079731162874, "grad_norm": 1.0826430320739746, "learning_rate": 1.465788139721258e-07, "loss": 0.878, "step": 305540 }, { "epoch": 1.952071860266026, "grad_norm": 0.7373409271240234, "learning_rate": 1.4619513640537486e-07, "loss": 1.0469, "step": 305550 }, { "epoch": 1.9521357474157648, "grad_norm": 1.3082698583602905, "learning_rate": 1.4581196090956984e-07, "loss": 0.8581, "step": 305560 }, { "epoch": 1.9521996345655035, "grad_norm": 0.9631960988044739, "learning_rate": 1.4542928748856877e-07, "loss": 0.8367, "step": 305570 }, { "epoch": 1.9522635217152422, "grad_norm": 1.2323166131973267, "learning_rate": 1.4504711614621857e-07, "loss": 1.0647, "step": 305580 }, { "epoch": 1.952327408864981, "grad_norm": 1.3021869659423828, "learning_rate": 1.446654468863773e-07, "loss": 0.8232, "step": 305590 }, { "epoch": 1.9523912960147196, "grad_norm": 1.011557936668396, "learning_rate": 1.4428427971289184e-07, "loss": 0.749, "step": 305600 }, { "epoch": 1.9524551831644583, "grad_norm": 0.98341965675354, "learning_rate": 1.439036146295869e-07, "loss": 0.7362, "step": 305610 }, { "epoch": 1.952519070314197, "grad_norm": 1.0501110553741455, "learning_rate": 1.4352345164030945e-07, "loss": 0.78, "step": 305620 }, { "epoch": 1.9525829574639357, "grad_norm": 0.6365110278129578, "learning_rate": 1.4314379074888418e-07, "loss": 0.831, "step": 305630 }, { "epoch": 1.9526468446136744, "grad_norm": 1.1915727853775024, "learning_rate": 1.4276463195913025e-07, "loss": 0.8709, "step": 305640 }, { "epoch": 1.9527107317634131, "grad_norm": 0.9435083866119385, "learning_rate": 1.4238597527486685e-07, "loss": 0.8048, "step": 305650 }, { "epoch": 1.9527746189131518, "grad_norm": 0.7186278700828552, "learning_rate": 1.4200782069991314e-07, "loss": 0.6761, "step": 305660 }, { "epoch": 1.9528385060628906, "grad_norm": 0.835472822189331, "learning_rate": 1.4163016823807164e-07, "loss": 1.257, "step": 305670 }, { "epoch": 1.9529023932126293, "grad_norm": 0.987421452999115, "learning_rate": 1.4125301789315038e-07, "loss": 0.6752, "step": 305680 }, { "epoch": 1.952966280362368, "grad_norm": 0.7175649404525757, "learning_rate": 1.4087636966894635e-07, "loss": 1.1305, "step": 305690 }, { "epoch": 1.9530301675121065, "grad_norm": 1.1987440586090088, "learning_rate": 1.4050022356925096e-07, "loss": 0.7934, "step": 305700 }, { "epoch": 1.9530940546618454, "grad_norm": 0.8681856393814087, "learning_rate": 1.4012457959785007e-07, "loss": 0.6293, "step": 305710 }, { "epoch": 1.9531579418115839, "grad_norm": 1.4752001762390137, "learning_rate": 1.3974943775852956e-07, "loss": 0.9793, "step": 305720 }, { "epoch": 1.9532218289613228, "grad_norm": 0.9839785695075989, "learning_rate": 1.393747980550697e-07, "loss": 0.8663, "step": 305730 }, { "epoch": 1.9532857161110613, "grad_norm": 1.206047773361206, "learning_rate": 1.3900066049123972e-07, "loss": 1.0417, "step": 305740 }, { "epoch": 1.9533496032608002, "grad_norm": 0.652935266494751, "learning_rate": 1.3862702507080883e-07, "loss": 0.7282, "step": 305750 }, { "epoch": 1.9534134904105387, "grad_norm": 0.7819461226463318, "learning_rate": 1.3825389179754067e-07, "loss": 0.9542, "step": 305760 }, { "epoch": 1.9534773775602776, "grad_norm": 1.0411031246185303, "learning_rate": 1.3788126067519337e-07, "loss": 0.7367, "step": 305770 }, { "epoch": 1.953541264710016, "grad_norm": 0.45564350485801697, "learning_rate": 1.3750913170751944e-07, "loss": 0.8215, "step": 305780 }, { "epoch": 1.953605151859755, "grad_norm": 0.9776589870452881, "learning_rate": 1.3713750489826038e-07, "loss": 0.5945, "step": 305790 }, { "epoch": 1.9536690390094935, "grad_norm": 1.087541103363037, "learning_rate": 1.3676638025116872e-07, "loss": 1.0174, "step": 305800 }, { "epoch": 1.9537329261592324, "grad_norm": 0.7229280471801758, "learning_rate": 1.3639575776997483e-07, "loss": 0.5723, "step": 305810 }, { "epoch": 1.953796813308971, "grad_norm": 1.1533738374710083, "learning_rate": 1.3602563745842012e-07, "loss": 1.0214, "step": 305820 }, { "epoch": 1.9538607004587099, "grad_norm": 0.9949087500572205, "learning_rate": 1.3565601932021831e-07, "loss": 1.2322, "step": 305830 }, { "epoch": 1.9539245876084483, "grad_norm": 1.1862506866455078, "learning_rate": 1.3528690335909978e-07, "loss": 0.7501, "step": 305840 }, { "epoch": 1.9539884747581873, "grad_norm": 1.5264919996261597, "learning_rate": 1.3491828957878927e-07, "loss": 0.8117, "step": 305850 }, { "epoch": 1.9540523619079258, "grad_norm": 0.72819983959198, "learning_rate": 1.3455017798298386e-07, "loss": 1.1966, "step": 305860 }, { "epoch": 1.9541162490576647, "grad_norm": 1.30023193359375, "learning_rate": 1.3418256857539724e-07, "loss": 1.1788, "step": 305870 }, { "epoch": 1.9541801362074032, "grad_norm": 1.353724479675293, "learning_rate": 1.33815461359732e-07, "loss": 0.7519, "step": 305880 }, { "epoch": 1.954244023357142, "grad_norm": 0.8705479502677917, "learning_rate": 1.3344885633969073e-07, "loss": 1.2264, "step": 305890 }, { "epoch": 1.9543079105068806, "grad_norm": 0.7442498803138733, "learning_rate": 1.3308275351895938e-07, "loss": 0.8841, "step": 305900 }, { "epoch": 1.9543717976566195, "grad_norm": 0.5314042568206787, "learning_rate": 1.327171529012239e-07, "loss": 0.662, "step": 305910 }, { "epoch": 1.954435684806358, "grad_norm": 1.0172762870788574, "learning_rate": 1.3235205449016463e-07, "loss": 0.8995, "step": 305920 }, { "epoch": 1.954499571956097, "grad_norm": 1.180970549583435, "learning_rate": 1.3198745828946758e-07, "loss": 1.0673, "step": 305930 }, { "epoch": 1.9545634591058354, "grad_norm": 1.0113781690597534, "learning_rate": 1.3162336430279642e-07, "loss": 1.3776, "step": 305940 }, { "epoch": 1.954627346255574, "grad_norm": 1.205683708190918, "learning_rate": 1.3125977253382048e-07, "loss": 0.9683, "step": 305950 }, { "epoch": 1.9546912334053128, "grad_norm": 0.731574296951294, "learning_rate": 1.3089668298619794e-07, "loss": 1.0741, "step": 305960 }, { "epoch": 1.9547551205550515, "grad_norm": 0.8031790852546692, "learning_rate": 1.3053409566359253e-07, "loss": 0.8409, "step": 305970 }, { "epoch": 1.9548190077047902, "grad_norm": 0.8294908404350281, "learning_rate": 1.3017201056965133e-07, "loss": 0.7015, "step": 305980 }, { "epoch": 1.954882894854529, "grad_norm": 0.8706844449043274, "learning_rate": 1.2981042770802143e-07, "loss": 1.1126, "step": 305990 }, { "epoch": 1.9549467820042676, "grad_norm": 0.9077652096748352, "learning_rate": 1.2944934708234436e-07, "loss": 0.7772, "step": 306000 }, { "epoch": 1.9550106691540063, "grad_norm": 0.8920031189918518, "learning_rate": 1.2908876869625608e-07, "loss": 0.5676, "step": 306010 }, { "epoch": 1.955074556303745, "grad_norm": 0.7555814981460571, "learning_rate": 1.2872869255338704e-07, "loss": 0.9694, "step": 306020 }, { "epoch": 1.9551384434534838, "grad_norm": 1.2185505628585815, "learning_rate": 1.2836911865736767e-07, "loss": 0.7554, "step": 306030 }, { "epoch": 1.9552023306032225, "grad_norm": 3.282710552215576, "learning_rate": 1.2801004701181175e-07, "loss": 0.808, "step": 306040 }, { "epoch": 1.9552662177529612, "grad_norm": 0.7775924801826477, "learning_rate": 1.2765147762034413e-07, "loss": 0.7981, "step": 306050 }, { "epoch": 1.9553301049026999, "grad_norm": 0.8961272239685059, "learning_rate": 1.2729341048657305e-07, "loss": 0.9899, "step": 306060 }, { "epoch": 1.9553939920524386, "grad_norm": 1.0614861249923706, "learning_rate": 1.269358456141012e-07, "loss": 0.8512, "step": 306070 }, { "epoch": 1.9554578792021773, "grad_norm": 0.9941128492355347, "learning_rate": 1.2657878300653125e-07, "loss": 0.9489, "step": 306080 }, { "epoch": 1.955521766351916, "grad_norm": 1.3333861827850342, "learning_rate": 1.262222226674603e-07, "loss": 0.6982, "step": 306090 }, { "epoch": 1.9555856535016547, "grad_norm": 1.0364742279052734, "learning_rate": 1.2586616460047996e-07, "loss": 0.8277, "step": 306100 }, { "epoch": 1.9556495406513934, "grad_norm": 0.9991453289985657, "learning_rate": 1.2551060880917065e-07, "loss": 0.8136, "step": 306110 }, { "epoch": 1.955713427801132, "grad_norm": 0.8465530276298523, "learning_rate": 1.2515555529711842e-07, "loss": 0.797, "step": 306120 }, { "epoch": 1.9557773149508708, "grad_norm": 0.8986508846282959, "learning_rate": 1.2480100406790374e-07, "loss": 0.9838, "step": 306130 }, { "epoch": 1.9558412021006095, "grad_norm": 1.2777119874954224, "learning_rate": 1.2444695512508487e-07, "loss": 1.0812, "step": 306140 }, { "epoch": 1.9559050892503482, "grad_norm": 0.688077986240387, "learning_rate": 1.2409340847223672e-07, "loss": 0.8734, "step": 306150 }, { "epoch": 1.955968976400087, "grad_norm": 3.1722869873046875, "learning_rate": 1.23740364112912e-07, "loss": 0.863, "step": 306160 }, { "epoch": 1.9560328635498256, "grad_norm": 1.2481809854507446, "learning_rate": 1.2338782205067455e-07, "loss": 0.8628, "step": 306170 }, { "epoch": 1.9560967506995643, "grad_norm": 0.9949286580085754, "learning_rate": 1.2303578228907153e-07, "loss": 0.857, "step": 306180 }, { "epoch": 1.9561606378493028, "grad_norm": 0.8365099430084229, "learning_rate": 1.2268424483164453e-07, "loss": 1.0734, "step": 306190 }, { "epoch": 1.9562245249990418, "grad_norm": 1.7561908960342407, "learning_rate": 1.2233320968194072e-07, "loss": 0.7738, "step": 306200 }, { "epoch": 1.9562884121487802, "grad_norm": 0.6921905875205994, "learning_rate": 1.2198267684349063e-07, "loss": 0.829, "step": 306210 }, { "epoch": 1.9563522992985192, "grad_norm": 1.623157024383545, "learning_rate": 1.2163264631982474e-07, "loss": 0.8584, "step": 306220 }, { "epoch": 1.9564161864482577, "grad_norm": 0.611640989780426, "learning_rate": 1.2128311811447356e-07, "loss": 0.7492, "step": 306230 }, { "epoch": 1.9564800735979966, "grad_norm": 1.3138545751571655, "learning_rate": 1.2093409223094542e-07, "loss": 1.0977, "step": 306240 }, { "epoch": 1.956543960747735, "grad_norm": 0.7451996803283691, "learning_rate": 1.2058556867276528e-07, "loss": 0.868, "step": 306250 }, { "epoch": 1.956607847897474, "grad_norm": 0.6381491422653198, "learning_rate": 1.202375474434414e-07, "loss": 1.0713, "step": 306260 }, { "epoch": 1.9566717350472125, "grad_norm": 1.098995327949524, "learning_rate": 1.1989002854647659e-07, "loss": 1.1494, "step": 306270 }, { "epoch": 1.9567356221969514, "grad_norm": 1.9603251218795776, "learning_rate": 1.1954301198537353e-07, "loss": 0.9146, "step": 306280 }, { "epoch": 1.95679950934669, "grad_norm": 0.8454369306564331, "learning_rate": 1.1919649776362397e-07, "loss": 1.0223, "step": 306290 }, { "epoch": 1.9568633964964288, "grad_norm": 1.5903698205947876, "learning_rate": 1.1885048588471948e-07, "loss": 0.8199, "step": 306300 }, { "epoch": 1.9569272836461673, "grad_norm": 1.051414132118225, "learning_rate": 1.1850497635214064e-07, "loss": 0.8365, "step": 306310 }, { "epoch": 1.9569911707959062, "grad_norm": 1.1712877750396729, "learning_rate": 1.1815996916937356e-07, "loss": 0.8556, "step": 306320 }, { "epoch": 1.9570550579456447, "grad_norm": 1.2720564603805542, "learning_rate": 1.1781546433988766e-07, "loss": 1.0154, "step": 306330 }, { "epoch": 1.9571189450953836, "grad_norm": 0.6502274870872498, "learning_rate": 1.1747146186715796e-07, "loss": 0.8979, "step": 306340 }, { "epoch": 1.9571828322451221, "grad_norm": 0.8052017092704773, "learning_rate": 1.171279617546428e-07, "loss": 0.9568, "step": 306350 }, { "epoch": 1.957246719394861, "grad_norm": 1.5580902099609375, "learning_rate": 1.1678496400580052e-07, "loss": 1.0563, "step": 306360 }, { "epoch": 1.9573106065445995, "grad_norm": 1.3809198141098022, "learning_rate": 1.1644246862409502e-07, "loss": 0.8175, "step": 306370 }, { "epoch": 1.9573744936943385, "grad_norm": 0.9385179281234741, "learning_rate": 1.1610047561296245e-07, "loss": 0.8123, "step": 306380 }, { "epoch": 1.957438380844077, "grad_norm": 1.2661628723144531, "learning_rate": 1.1575898497586113e-07, "loss": 0.6835, "step": 306390 }, { "epoch": 1.9575022679938159, "grad_norm": 0.7913726568222046, "learning_rate": 1.1541799671621611e-07, "loss": 0.7138, "step": 306400 }, { "epoch": 1.9575661551435544, "grad_norm": 0.999924898147583, "learning_rate": 1.1507751083747465e-07, "loss": 0.7999, "step": 306410 }, { "epoch": 1.9576300422932933, "grad_norm": 0.5822678804397583, "learning_rate": 1.147375273430562e-07, "loss": 0.8937, "step": 306420 }, { "epoch": 1.9576939294430318, "grad_norm": 1.0195214748382568, "learning_rate": 1.1439804623638584e-07, "loss": 1.106, "step": 306430 }, { "epoch": 1.9577578165927705, "grad_norm": 0.6929598450660706, "learning_rate": 1.1405906752088302e-07, "loss": 0.9227, "step": 306440 }, { "epoch": 1.9578217037425092, "grad_norm": 0.7076100707054138, "learning_rate": 1.1372059119996725e-07, "loss": 0.9683, "step": 306450 }, { "epoch": 1.957885590892248, "grad_norm": 0.9137086868286133, "learning_rate": 1.1338261727704136e-07, "loss": 1.0094, "step": 306460 }, { "epoch": 1.9579494780419866, "grad_norm": 2.7839596271514893, "learning_rate": 1.1304514575551372e-07, "loss": 0.7638, "step": 306470 }, { "epoch": 1.9580133651917253, "grad_norm": 0.9103108048439026, "learning_rate": 1.1270817663877609e-07, "loss": 1.1677, "step": 306480 }, { "epoch": 1.958077252341464, "grad_norm": 2.217245578765869, "learning_rate": 1.1237170993022572e-07, "loss": 0.921, "step": 306490 }, { "epoch": 1.9581411394912027, "grad_norm": 0.9462472796440125, "learning_rate": 1.120357456332488e-07, "loss": 0.7343, "step": 306500 }, { "epoch": 1.9582050266409414, "grad_norm": 1.196720004081726, "learning_rate": 1.1170028375123709e-07, "loss": 0.9115, "step": 306510 }, { "epoch": 1.9582689137906801, "grad_norm": 2.692883253097534, "learning_rate": 1.1136532428756008e-07, "loss": 0.7908, "step": 306520 }, { "epoch": 1.9583328009404188, "grad_norm": 0.828401505947113, "learning_rate": 1.1103086724559287e-07, "loss": 0.6602, "step": 306530 }, { "epoch": 1.9583966880901575, "grad_norm": 1.0882549285888672, "learning_rate": 1.1069691262870496e-07, "loss": 0.9501, "step": 306540 }, { "epoch": 1.9584605752398963, "grad_norm": 0.9018024802207947, "learning_rate": 1.1036346044026591e-07, "loss": 1.0834, "step": 306550 }, { "epoch": 1.958524462389635, "grad_norm": 0.62995845079422, "learning_rate": 1.1003051068361747e-07, "loss": 0.7957, "step": 306560 }, { "epoch": 1.9585883495393737, "grad_norm": 0.7326763272285461, "learning_rate": 1.0969806336212917e-07, "loss": 0.938, "step": 306570 }, { "epoch": 1.9586522366891124, "grad_norm": 1.4249024391174316, "learning_rate": 1.093661184791428e-07, "loss": 0.7235, "step": 306580 }, { "epoch": 1.958716123838851, "grad_norm": 0.9121171236038208, "learning_rate": 1.0903467603800011e-07, "loss": 0.8777, "step": 306590 }, { "epoch": 1.9587800109885898, "grad_norm": 0.9764759540557861, "learning_rate": 1.0870373604203732e-07, "loss": 1.0221, "step": 306600 }, { "epoch": 1.9588438981383285, "grad_norm": 0.9533472657203674, "learning_rate": 1.0837329849459066e-07, "loss": 0.7547, "step": 306610 }, { "epoch": 1.9589077852880672, "grad_norm": 0.8116563558578491, "learning_rate": 1.080433633989908e-07, "loss": 0.8485, "step": 306620 }, { "epoch": 1.958971672437806, "grad_norm": 1.36518394947052, "learning_rate": 1.0771393075855729e-07, "loss": 0.7912, "step": 306630 }, { "epoch": 1.9590355595875446, "grad_norm": 0.9521485567092896, "learning_rate": 1.0738500057660417e-07, "loss": 0.8798, "step": 306640 }, { "epoch": 1.9590994467372833, "grad_norm": 0.9593106508255005, "learning_rate": 1.0705657285644544e-07, "loss": 0.8244, "step": 306650 }, { "epoch": 1.959163333887022, "grad_norm": 0.7078234553337097, "learning_rate": 1.0672864760139512e-07, "loss": 0.7332, "step": 306660 }, { "epoch": 1.9592272210367607, "grad_norm": 0.908644437789917, "learning_rate": 1.0640122481475057e-07, "loss": 0.5235, "step": 306670 }, { "epoch": 1.9592911081864992, "grad_norm": 0.8596978783607483, "learning_rate": 1.0607430449980915e-07, "loss": 0.6656, "step": 306680 }, { "epoch": 1.9593549953362381, "grad_norm": 0.6808596849441528, "learning_rate": 1.0574788665986269e-07, "loss": 0.8061, "step": 306690 }, { "epoch": 1.9594188824859766, "grad_norm": 0.6964841485023499, "learning_rate": 1.0542197129819742e-07, "loss": 0.6471, "step": 306700 }, { "epoch": 1.9594827696357155, "grad_norm": 0.8564151525497437, "learning_rate": 1.0509655841809962e-07, "loss": 0.6819, "step": 306710 }, { "epoch": 1.959546656785454, "grad_norm": 0.9149518609046936, "learning_rate": 1.0477164802285e-07, "loss": 1.091, "step": 306720 }, { "epoch": 1.959610543935193, "grad_norm": 0.9932882189750671, "learning_rate": 1.0444724011570706e-07, "loss": 1.0427, "step": 306730 }, { "epoch": 1.9596744310849314, "grad_norm": 0.6701475381851196, "learning_rate": 1.041233346999515e-07, "loss": 0.8218, "step": 306740 }, { "epoch": 1.9597383182346704, "grad_norm": 2.0861892700195312, "learning_rate": 1.0379993177884184e-07, "loss": 0.9127, "step": 306750 }, { "epoch": 1.9598022053844089, "grad_norm": 1.76549232006073, "learning_rate": 1.0347703135563103e-07, "loss": 0.7404, "step": 306760 }, { "epoch": 1.9598660925341478, "grad_norm": 0.678974449634552, "learning_rate": 1.0315463343356646e-07, "loss": 0.6949, "step": 306770 }, { "epoch": 1.9599299796838863, "grad_norm": 0.9061679244041443, "learning_rate": 1.028327380159122e-07, "loss": 0.8652, "step": 306780 }, { "epoch": 1.9599938668336252, "grad_norm": 1.2013291120529175, "learning_rate": 1.0251134510589344e-07, "loss": 0.6827, "step": 306790 }, { "epoch": 1.9600577539833637, "grad_norm": 0.6794036030769348, "learning_rate": 1.0219045470675203e-07, "loss": 0.8597, "step": 306800 }, { "epoch": 1.9601216411331026, "grad_norm": 0.913158118724823, "learning_rate": 1.0187006682172429e-07, "loss": 0.7935, "step": 306810 }, { "epoch": 1.960185528282841, "grad_norm": 2.0173139572143555, "learning_rate": 1.0155018145403539e-07, "loss": 0.8751, "step": 306820 }, { "epoch": 1.96024941543258, "grad_norm": 1.0600955486297607, "learning_rate": 1.0123079860689943e-07, "loss": 0.899, "step": 306830 }, { "epoch": 1.9603133025823185, "grad_norm": 0.9187150597572327, "learning_rate": 1.0091191828353608e-07, "loss": 0.5977, "step": 306840 }, { "epoch": 1.9603771897320574, "grad_norm": 0.7382291555404663, "learning_rate": 1.0059354048716496e-07, "loss": 0.7518, "step": 306850 }, { "epoch": 1.960441076881796, "grad_norm": 0.6964963674545288, "learning_rate": 1.0027566522097797e-07, "loss": 0.7174, "step": 306860 }, { "epoch": 1.9605049640315348, "grad_norm": 1.0228418111801147, "learning_rate": 9.995829248818921e-08, "loss": 0.7848, "step": 306870 }, { "epoch": 1.9605688511812733, "grad_norm": 1.48477041721344, "learning_rate": 9.964142229199058e-08, "loss": 0.8795, "step": 306880 }, { "epoch": 1.9606327383310123, "grad_norm": 1.1190383434295654, "learning_rate": 9.932505463557396e-08, "loss": 0.7041, "step": 306890 }, { "epoch": 1.9606966254807507, "grad_norm": 0.7254348397254944, "learning_rate": 9.900918952212013e-08, "loss": 0.8268, "step": 306900 }, { "epoch": 1.9607605126304894, "grad_norm": 1.076117992401123, "learning_rate": 9.869382695482099e-08, "loss": 0.6709, "step": 306910 }, { "epoch": 1.9608243997802282, "grad_norm": 1.279038906097412, "learning_rate": 9.837896693684068e-08, "loss": 0.7932, "step": 306920 }, { "epoch": 1.9608882869299669, "grad_norm": 0.5272724032402039, "learning_rate": 9.806460947135443e-08, "loss": 0.8304, "step": 306930 }, { "epoch": 1.9609521740797056, "grad_norm": 1.385393738746643, "learning_rate": 9.775075456153194e-08, "loss": 0.9646, "step": 306940 }, { "epoch": 1.9610160612294443, "grad_norm": 1.049930214881897, "learning_rate": 9.743740221053178e-08, "loss": 0.8105, "step": 306950 }, { "epoch": 1.961079948379183, "grad_norm": 0.965087354183197, "learning_rate": 9.712455242150143e-08, "loss": 0.7291, "step": 306960 }, { "epoch": 1.9611438355289217, "grad_norm": 1.0274797677993774, "learning_rate": 9.681220519760503e-08, "loss": 1.0187, "step": 306970 }, { "epoch": 1.9612077226786604, "grad_norm": 1.0843732357025146, "learning_rate": 9.650036054198452e-08, "loss": 1.0503, "step": 306980 }, { "epoch": 1.961271609828399, "grad_norm": 0.9422346353530884, "learning_rate": 9.618901845777073e-08, "loss": 0.8191, "step": 306990 }, { "epoch": 1.9613354969781378, "grad_norm": 1.1185977458953857, "learning_rate": 9.587817894811113e-08, "loss": 0.8307, "step": 307000 }, { "epoch": 1.9613993841278765, "grad_norm": 1.0668240785598755, "learning_rate": 9.556784201613101e-08, "loss": 1.1259, "step": 307010 }, { "epoch": 1.9614632712776152, "grad_norm": 1.0997084379196167, "learning_rate": 9.525800766495562e-08, "loss": 0.8358, "step": 307020 }, { "epoch": 1.961527158427354, "grad_norm": 1.0606350898742676, "learning_rate": 9.494867589770473e-08, "loss": 0.6989, "step": 307030 }, { "epoch": 1.9615910455770926, "grad_norm": 0.6498733758926392, "learning_rate": 9.463984671749804e-08, "loss": 0.7428, "step": 307040 }, { "epoch": 1.9616549327268313, "grad_norm": 0.9367380738258362, "learning_rate": 9.433152012743863e-08, "loss": 0.8984, "step": 307050 }, { "epoch": 1.96171881987657, "grad_norm": 0.8001465201377869, "learning_rate": 9.402369613064066e-08, "loss": 0.828, "step": 307060 }, { "epoch": 1.9617827070263087, "grad_norm": 1.2087067365646362, "learning_rate": 9.371637473019057e-08, "loss": 0.9922, "step": 307070 }, { "epoch": 1.9618465941760475, "grad_norm": 1.5438634157180786, "learning_rate": 9.340955592919698e-08, "loss": 0.6449, "step": 307080 }, { "epoch": 1.9619104813257862, "grad_norm": 1.0287994146347046, "learning_rate": 9.310323973074631e-08, "loss": 0.9819, "step": 307090 }, { "epoch": 1.9619743684755249, "grad_norm": 0.9086164236068726, "learning_rate": 9.279742613791941e-08, "loss": 0.8067, "step": 307100 }, { "epoch": 1.9620382556252636, "grad_norm": 1.034316062927246, "learning_rate": 9.249211515379719e-08, "loss": 1.0819, "step": 307110 }, { "epoch": 1.9621021427750023, "grad_norm": 0.9390623569488525, "learning_rate": 9.218730678146048e-08, "loss": 0.8188, "step": 307120 }, { "epoch": 1.962166029924741, "grad_norm": 2.8493175506591797, "learning_rate": 9.188300102396797e-08, "loss": 0.9277, "step": 307130 }, { "epoch": 1.9622299170744797, "grad_norm": 1.424728512763977, "learning_rate": 9.15791978843894e-08, "loss": 1.1248, "step": 307140 }, { "epoch": 1.9622938042242182, "grad_norm": 0.9395694136619568, "learning_rate": 9.127589736578901e-08, "loss": 1.4445, "step": 307150 }, { "epoch": 1.962357691373957, "grad_norm": 1.2583932876586914, "learning_rate": 9.09730994712199e-08, "loss": 0.8988, "step": 307160 }, { "epoch": 1.9624215785236956, "grad_norm": 1.0137208700180054, "learning_rate": 9.067080420372409e-08, "loss": 0.8206, "step": 307170 }, { "epoch": 1.9624854656734345, "grad_norm": 1.4127328395843506, "learning_rate": 9.036901156635469e-08, "loss": 0.7615, "step": 307180 }, { "epoch": 1.962549352823173, "grad_norm": 1.0815893411636353, "learning_rate": 9.00677215621426e-08, "loss": 0.7994, "step": 307190 }, { "epoch": 1.962613239972912, "grad_norm": 0.8997918367385864, "learning_rate": 8.976693419412985e-08, "loss": 0.8337, "step": 307200 }, { "epoch": 1.9626771271226504, "grad_norm": 2.5383217334747314, "learning_rate": 8.946664946534178e-08, "loss": 0.9905, "step": 307210 }, { "epoch": 1.9627410142723893, "grad_norm": 1.107540249824524, "learning_rate": 8.916686737880375e-08, "loss": 0.9141, "step": 307220 }, { "epoch": 1.9628049014221278, "grad_norm": 1.3632421493530273, "learning_rate": 8.886758793753558e-08, "loss": 0.9035, "step": 307230 }, { "epoch": 1.9628687885718668, "grad_norm": 0.8134247064590454, "learning_rate": 8.856881114455151e-08, "loss": 0.8471, "step": 307240 }, { "epoch": 1.9629326757216052, "grad_norm": 2.934614896774292, "learning_rate": 8.82705370028547e-08, "loss": 0.7951, "step": 307250 }, { "epoch": 1.9629965628713442, "grad_norm": 0.9333258271217346, "learning_rate": 8.797276551545386e-08, "loss": 0.7279, "step": 307260 }, { "epoch": 1.9630604500210826, "grad_norm": 0.9117864370346069, "learning_rate": 8.767549668535213e-08, "loss": 0.9715, "step": 307270 }, { "epoch": 1.9631243371708216, "grad_norm": 1.166901707649231, "learning_rate": 8.737873051553047e-08, "loss": 1.1847, "step": 307280 }, { "epoch": 1.96318822432056, "grad_norm": 0.8172804117202759, "learning_rate": 8.708246700899203e-08, "loss": 0.9207, "step": 307290 }, { "epoch": 1.963252111470299, "grad_norm": 0.5826395153999329, "learning_rate": 8.678670616871221e-08, "loss": 0.6237, "step": 307300 }, { "epoch": 1.9633159986200375, "grad_norm": 1.1163614988327026, "learning_rate": 8.649144799767194e-08, "loss": 0.996, "step": 307310 }, { "epoch": 1.9633798857697764, "grad_norm": 1.161007285118103, "learning_rate": 8.61966924988411e-08, "loss": 0.894, "step": 307320 }, { "epoch": 1.9634437729195149, "grad_norm": 1.0262027978897095, "learning_rate": 8.590243967519507e-08, "loss": 1.1078, "step": 307330 }, { "epoch": 1.9635076600692538, "grad_norm": 0.8189107775688171, "learning_rate": 8.560868952969259e-08, "loss": 1.0965, "step": 307340 }, { "epoch": 1.9635715472189923, "grad_norm": 1.2024630308151245, "learning_rate": 8.53154420652924e-08, "loss": 1.1089, "step": 307350 }, { "epoch": 1.9636354343687312, "grad_norm": 1.5332752466201782, "learning_rate": 8.502269728494771e-08, "loss": 0.7511, "step": 307360 }, { "epoch": 1.9636993215184697, "grad_norm": 1.2168183326721191, "learning_rate": 8.473045519160616e-08, "loss": 0.9701, "step": 307370 }, { "epoch": 1.9637632086682086, "grad_norm": 0.8302614092826843, "learning_rate": 8.443871578820984e-08, "loss": 0.675, "step": 307380 }, { "epoch": 1.9638270958179471, "grad_norm": 0.7621539235115051, "learning_rate": 8.414747907770082e-08, "loss": 0.9875, "step": 307390 }, { "epoch": 1.9638909829676858, "grad_norm": 0.7296609282493591, "learning_rate": 8.385674506301566e-08, "loss": 0.9113, "step": 307400 }, { "epoch": 1.9639548701174245, "grad_norm": 1.1947790384292603, "learning_rate": 8.35665137470687e-08, "loss": 0.8201, "step": 307410 }, { "epoch": 1.9640187572671632, "grad_norm": 1.920008659362793, "learning_rate": 8.327678513279646e-08, "loss": 0.7582, "step": 307420 }, { "epoch": 1.964082644416902, "grad_norm": 0.603803277015686, "learning_rate": 8.298755922310774e-08, "loss": 0.8512, "step": 307430 }, { "epoch": 1.9641465315666407, "grad_norm": 0.8965196013450623, "learning_rate": 8.269883602091688e-08, "loss": 0.9519, "step": 307440 }, { "epoch": 1.9642104187163794, "grad_norm": 0.8932124376296997, "learning_rate": 8.24106155291382e-08, "loss": 0.6974, "step": 307450 }, { "epoch": 1.964274305866118, "grad_norm": 0.9798451662063599, "learning_rate": 8.212289775066384e-08, "loss": 0.8882, "step": 307460 }, { "epoch": 1.9643381930158568, "grad_norm": 0.876908540725708, "learning_rate": 8.183568268840258e-08, "loss": 0.8991, "step": 307470 }, { "epoch": 1.9644020801655955, "grad_norm": 0.7665677070617676, "learning_rate": 8.1548970345241e-08, "loss": 0.9696, "step": 307480 }, { "epoch": 1.9644659673153342, "grad_norm": 0.7917914986610413, "learning_rate": 8.126276072406014e-08, "loss": 0.7703, "step": 307490 }, { "epoch": 1.9645298544650729, "grad_norm": 0.9933443665504456, "learning_rate": 8.097705382775767e-08, "loss": 0.8286, "step": 307500 }, { "epoch": 1.9645937416148116, "grad_norm": 1.0009136199951172, "learning_rate": 8.069184965919797e-08, "loss": 0.7784, "step": 307510 }, { "epoch": 1.9646576287645503, "grad_norm": 1.0411992073059082, "learning_rate": 8.04071482212565e-08, "loss": 0.9634, "step": 307520 }, { "epoch": 1.964721515914289, "grad_norm": 0.5806550979614258, "learning_rate": 8.012294951680321e-08, "loss": 0.991, "step": 307530 }, { "epoch": 1.9647854030640277, "grad_norm": 0.9990647435188293, "learning_rate": 7.983925354869693e-08, "loss": 0.705, "step": 307540 }, { "epoch": 1.9648492902137664, "grad_norm": 1.2305150032043457, "learning_rate": 7.955606031980201e-08, "loss": 0.9844, "step": 307550 }, { "epoch": 1.9649131773635051, "grad_norm": 1.0738801956176758, "learning_rate": 7.927336983296063e-08, "loss": 0.8836, "step": 307560 }, { "epoch": 1.9649770645132438, "grad_norm": 1.563961148262024, "learning_rate": 7.899118209102607e-08, "loss": 0.9084, "step": 307570 }, { "epoch": 1.9650409516629825, "grad_norm": 0.9830324053764343, "learning_rate": 7.870949709683495e-08, "loss": 0.9476, "step": 307580 }, { "epoch": 1.9651048388127212, "grad_norm": 0.7370650172233582, "learning_rate": 7.842831485322944e-08, "loss": 1.0117, "step": 307590 }, { "epoch": 1.96516872596246, "grad_norm": 1.03929603099823, "learning_rate": 7.814763536303504e-08, "loss": 0.8531, "step": 307600 }, { "epoch": 1.9652326131121987, "grad_norm": 1.147141695022583, "learning_rate": 7.786745862908839e-08, "loss": 0.884, "step": 307610 }, { "epoch": 1.9652965002619374, "grad_norm": 1.5236049890518188, "learning_rate": 7.758778465420392e-08, "loss": 0.8023, "step": 307620 }, { "epoch": 1.965360387411676, "grad_norm": 0.7750815153121948, "learning_rate": 7.7308613441196e-08, "loss": 0.7576, "step": 307630 }, { "epoch": 1.9654242745614146, "grad_norm": 0.9773805737495422, "learning_rate": 7.702994499288463e-08, "loss": 0.8848, "step": 307640 }, { "epoch": 1.9654881617111535, "grad_norm": 1.3267862796783447, "learning_rate": 7.675177931206756e-08, "loss": 0.955, "step": 307650 }, { "epoch": 1.965552048860892, "grad_norm": 1.22646164894104, "learning_rate": 7.647411640155366e-08, "loss": 0.673, "step": 307660 }, { "epoch": 1.965615936010631, "grad_norm": 1.5440233945846558, "learning_rate": 7.61969562641296e-08, "loss": 0.8305, "step": 307670 }, { "epoch": 1.9656798231603694, "grad_norm": 0.8694813847541809, "learning_rate": 7.592029890259867e-08, "loss": 0.7927, "step": 307680 }, { "epoch": 1.9657437103101083, "grad_norm": 0.867393970489502, "learning_rate": 7.564414431973643e-08, "loss": 0.7397, "step": 307690 }, { "epoch": 1.9658075974598468, "grad_norm": 0.6875196099281311, "learning_rate": 7.536849251832956e-08, "loss": 0.7244, "step": 307700 }, { "epoch": 1.9658714846095857, "grad_norm": 1.2303906679153442, "learning_rate": 7.50933435011536e-08, "loss": 0.8776, "step": 307710 }, { "epoch": 1.9659353717593242, "grad_norm": 2.2225420475006104, "learning_rate": 7.481869727097856e-08, "loss": 0.7413, "step": 307720 }, { "epoch": 1.9659992589090631, "grad_norm": 0.717719554901123, "learning_rate": 7.45445538305689e-08, "loss": 0.7105, "step": 307730 }, { "epoch": 1.9660631460588016, "grad_norm": 2.5248043537139893, "learning_rate": 7.427091318268908e-08, "loss": 1.0971, "step": 307740 }, { "epoch": 1.9661270332085405, "grad_norm": 0.8963668346405029, "learning_rate": 7.399777533009245e-08, "loss": 0.8243, "step": 307750 }, { "epoch": 1.966190920358279, "grad_norm": 0.9372978806495667, "learning_rate": 7.372514027553235e-08, "loss": 0.7271, "step": 307760 }, { "epoch": 1.966254807508018, "grad_norm": 1.5391466617584229, "learning_rate": 7.345300802175103e-08, "loss": 0.7229, "step": 307770 }, { "epoch": 1.9663186946577564, "grad_norm": 0.8805508017539978, "learning_rate": 7.318137857149077e-08, "loss": 0.9438, "step": 307780 }, { "epoch": 1.9663825818074954, "grad_norm": 1.4048082828521729, "learning_rate": 7.291025192748269e-08, "loss": 0.7908, "step": 307790 }, { "epoch": 1.9664464689572339, "grad_norm": 1.3518507480621338, "learning_rate": 7.263962809246905e-08, "loss": 0.8418, "step": 307800 }, { "epoch": 1.9665103561069728, "grad_norm": 2.4091033935546875, "learning_rate": 7.236950706915879e-08, "loss": 0.8969, "step": 307810 }, { "epoch": 1.9665742432567113, "grad_norm": 0.7952820658683777, "learning_rate": 7.209988886028862e-08, "loss": 0.8785, "step": 307820 }, { "epoch": 1.9666381304064502, "grad_norm": 0.6336418986320496, "learning_rate": 7.183077346856192e-08, "loss": 0.9454, "step": 307830 }, { "epoch": 1.9667020175561887, "grad_norm": 1.4031201601028442, "learning_rate": 7.156216089669876e-08, "loss": 0.8487, "step": 307840 }, { "epoch": 1.9667659047059276, "grad_norm": 0.887291669845581, "learning_rate": 7.129405114739141e-08, "loss": 0.9261, "step": 307850 }, { "epoch": 1.966829791855666, "grad_norm": 0.8543388247489929, "learning_rate": 7.102644422335436e-08, "loss": 0.7339, "step": 307860 }, { "epoch": 1.966893679005405, "grad_norm": 0.9197491407394409, "learning_rate": 7.075934012726881e-08, "loss": 0.8638, "step": 307870 }, { "epoch": 1.9669575661551435, "grad_norm": 0.79677814245224, "learning_rate": 7.049273886183815e-08, "loss": 0.9238, "step": 307880 }, { "epoch": 1.9670214533048822, "grad_norm": 0.9617187976837158, "learning_rate": 7.022664042973804e-08, "loss": 0.9432, "step": 307890 }, { "epoch": 1.967085340454621, "grad_norm": 1.7885737419128418, "learning_rate": 6.996104483364963e-08, "loss": 0.6147, "step": 307900 }, { "epoch": 1.9671492276043596, "grad_norm": 0.5837380886077881, "learning_rate": 6.969595207625412e-08, "loss": 0.699, "step": 307910 }, { "epoch": 1.9672131147540983, "grad_norm": 0.8746609687805176, "learning_rate": 6.943136216021051e-08, "loss": 0.8678, "step": 307920 }, { "epoch": 1.967277001903837, "grad_norm": 0.8975337147712708, "learning_rate": 6.916727508819443e-08, "loss": 0.951, "step": 307930 }, { "epoch": 1.9673408890535757, "grad_norm": 1.542599081993103, "learning_rate": 6.890369086285376e-08, "loss": 0.9868, "step": 307940 }, { "epoch": 1.9674047762033144, "grad_norm": 0.840015172958374, "learning_rate": 6.864060948685857e-08, "loss": 0.8198, "step": 307950 }, { "epoch": 1.9674686633530531, "grad_norm": 1.1040661334991455, "learning_rate": 6.837803096284012e-08, "loss": 0.9692, "step": 307960 }, { "epoch": 1.9675325505027919, "grad_norm": 0.7686569690704346, "learning_rate": 6.811595529345738e-08, "loss": 0.8241, "step": 307970 }, { "epoch": 1.9675964376525306, "grad_norm": 1.4878031015396118, "learning_rate": 6.785438248134158e-08, "loss": 0.6764, "step": 307980 }, { "epoch": 1.9676603248022693, "grad_norm": 1.2950985431671143, "learning_rate": 6.759331252912949e-08, "loss": 1.2905, "step": 307990 }, { "epoch": 1.967724211952008, "grad_norm": 1.3230798244476318, "learning_rate": 6.733274543945234e-08, "loss": 0.8295, "step": 308000 }, { "epoch": 1.9677880991017467, "grad_norm": 1.8880711793899536, "learning_rate": 6.707268121493027e-08, "loss": 0.9665, "step": 308010 }, { "epoch": 1.9678519862514854, "grad_norm": 0.7278730869293213, "learning_rate": 6.68131198581834e-08, "loss": 1.1621, "step": 308020 }, { "epoch": 1.967915873401224, "grad_norm": 0.9226792454719543, "learning_rate": 6.655406137183184e-08, "loss": 0.7692, "step": 308030 }, { "epoch": 1.9679797605509628, "grad_norm": 0.8120977878570557, "learning_rate": 6.629550575847354e-08, "loss": 0.8968, "step": 308040 }, { "epoch": 1.9680436477007015, "grad_norm": 0.8604071736335754, "learning_rate": 6.603745302072306e-08, "loss": 0.7378, "step": 308050 }, { "epoch": 1.9681075348504402, "grad_norm": 0.7497774958610535, "learning_rate": 6.577990316117277e-08, "loss": 0.7505, "step": 308060 }, { "epoch": 1.968171422000179, "grad_norm": 0.9616866111755371, "learning_rate": 6.552285618241505e-08, "loss": 0.7913, "step": 308070 }, { "epoch": 1.9682353091499176, "grad_norm": 0.7489196062088013, "learning_rate": 6.52663120870478e-08, "loss": 1.0104, "step": 308080 }, { "epoch": 1.9682991962996563, "grad_norm": 0.9955611824989319, "learning_rate": 6.501027087764122e-08, "loss": 0.7075, "step": 308090 }, { "epoch": 1.968363083449395, "grad_norm": 0.8881136775016785, "learning_rate": 6.475473255678765e-08, "loss": 0.8903, "step": 308100 }, { "epoch": 1.9684269705991337, "grad_norm": 0.9819602966308594, "learning_rate": 6.449969712705173e-08, "loss": 0.956, "step": 308110 }, { "epoch": 1.9684908577488724, "grad_norm": 0.8601024150848389, "learning_rate": 6.424516459100361e-08, "loss": 0.8281, "step": 308120 }, { "epoch": 1.968554744898611, "grad_norm": 0.9564414620399475, "learning_rate": 6.39911349512079e-08, "loss": 1.4346, "step": 308130 }, { "epoch": 1.9686186320483499, "grad_norm": 0.954218327999115, "learning_rate": 6.373760821022367e-08, "loss": 0.8884, "step": 308140 }, { "epoch": 1.9686825191980883, "grad_norm": 0.711357057094574, "learning_rate": 6.34845843705989e-08, "loss": 0.7504, "step": 308150 }, { "epoch": 1.9687464063478273, "grad_norm": 0.8907003402709961, "learning_rate": 6.323206343488708e-08, "loss": 0.9079, "step": 308160 }, { "epoch": 1.9688102934975658, "grad_norm": 0.7382941842079163, "learning_rate": 6.298004540563062e-08, "loss": 0.8033, "step": 308170 }, { "epoch": 1.9688741806473047, "grad_norm": 1.1640852689743042, "learning_rate": 6.27285302853664e-08, "loss": 0.6376, "step": 308180 }, { "epoch": 1.9689380677970432, "grad_norm": 2.0369021892547607, "learning_rate": 6.247751807663127e-08, "loss": 0.8086, "step": 308190 }, { "epoch": 1.969001954946782, "grad_norm": 0.8535321950912476, "learning_rate": 6.222700878194543e-08, "loss": 0.863, "step": 308200 }, { "epoch": 1.9690658420965206, "grad_norm": 1.113789677619934, "learning_rate": 6.197700240383464e-08, "loss": 0.7646, "step": 308210 }, { "epoch": 1.9691297292462595, "grad_norm": 1.0696840286254883, "learning_rate": 6.17274989448191e-08, "loss": 0.7492, "step": 308220 }, { "epoch": 1.969193616395998, "grad_norm": 0.6535469889640808, "learning_rate": 6.147849840741349e-08, "loss": 0.8728, "step": 308230 }, { "epoch": 1.969257503545737, "grad_norm": 0.549797534942627, "learning_rate": 6.123000079412134e-08, "loss": 0.788, "step": 308240 }, { "epoch": 1.9693213906954754, "grad_norm": 1.2363709211349487, "learning_rate": 6.09820061074462e-08, "loss": 0.9564, "step": 308250 }, { "epoch": 1.9693852778452143, "grad_norm": 0.9653347730636597, "learning_rate": 6.073451434988053e-08, "loss": 0.8188, "step": 308260 }, { "epoch": 1.9694491649949528, "grad_norm": 1.123185396194458, "learning_rate": 6.048752552392789e-08, "loss": 0.942, "step": 308270 }, { "epoch": 1.9695130521446917, "grad_norm": 0.9950637221336365, "learning_rate": 6.024103963206962e-08, "loss": 0.9046, "step": 308280 }, { "epoch": 1.9695769392944302, "grad_norm": 2.174548625946045, "learning_rate": 5.999505667678706e-08, "loss": 0.6862, "step": 308290 }, { "epoch": 1.9696408264441692, "grad_norm": 0.6808480620384216, "learning_rate": 5.974957666055602e-08, "loss": 0.7023, "step": 308300 }, { "epoch": 1.9697047135939076, "grad_norm": 1.1171269416809082, "learning_rate": 5.950459958585231e-08, "loss": 0.7116, "step": 308310 }, { "epoch": 1.9697686007436466, "grad_norm": 1.3545777797698975, "learning_rate": 5.9260125455140594e-08, "loss": 0.8542, "step": 308320 }, { "epoch": 1.969832487893385, "grad_norm": 0.9661934971809387, "learning_rate": 5.9016154270891135e-08, "loss": 0.9956, "step": 308330 }, { "epoch": 1.969896375043124, "grad_norm": 1.025800108909607, "learning_rate": 5.877268603554642e-08, "loss": 0.9316, "step": 308340 }, { "epoch": 1.9699602621928625, "grad_norm": 0.5792987942695618, "learning_rate": 5.852972075157115e-08, "loss": 0.7468, "step": 308350 }, { "epoch": 1.9700241493426014, "grad_norm": 1.380750060081482, "learning_rate": 5.828725842140226e-08, "loss": 0.8148, "step": 308360 }, { "epoch": 1.9700880364923399, "grad_norm": 0.6792870759963989, "learning_rate": 5.804529904749334e-08, "loss": 0.7083, "step": 308370 }, { "epoch": 1.9701519236420786, "grad_norm": 1.1884015798568726, "learning_rate": 5.7803842632270234e-08, "loss": 0.7961, "step": 308380 }, { "epoch": 1.9702158107918173, "grad_norm": 0.7494359016418457, "learning_rate": 5.7562889178164326e-08, "loss": 0.8931, "step": 308390 }, { "epoch": 1.970279697941556, "grad_norm": 0.8433160781860352, "learning_rate": 5.7322438687612555e-08, "loss": 0.814, "step": 308400 }, { "epoch": 1.9703435850912947, "grad_norm": 0.8369490504264832, "learning_rate": 5.708249116302966e-08, "loss": 0.9008, "step": 308410 }, { "epoch": 1.9704074722410334, "grad_norm": 0.7244152426719666, "learning_rate": 5.6843046606830374e-08, "loss": 0.8711, "step": 308420 }, { "epoch": 1.9704713593907721, "grad_norm": 1.4331932067871094, "learning_rate": 5.660410502142943e-08, "loss": 0.8069, "step": 308430 }, { "epoch": 1.9705352465405108, "grad_norm": 1.290757656097412, "learning_rate": 5.636566640923602e-08, "loss": 0.8775, "step": 308440 }, { "epoch": 1.9705991336902495, "grad_norm": 1.4813116788864136, "learning_rate": 5.612773077264266e-08, "loss": 0.8085, "step": 308450 }, { "epoch": 1.9706630208399882, "grad_norm": 0.7159252762794495, "learning_rate": 5.5890298114052995e-08, "loss": 0.6825, "step": 308460 }, { "epoch": 1.970726907989727, "grad_norm": 0.8382778763771057, "learning_rate": 5.5653368435854005e-08, "loss": 1.1421, "step": 308470 }, { "epoch": 1.9707907951394656, "grad_norm": 3.4221079349517822, "learning_rate": 5.5416941740432657e-08, "loss": 0.7611, "step": 308480 }, { "epoch": 1.9708546822892044, "grad_norm": 1.379305124282837, "learning_rate": 5.518101803017595e-08, "loss": 0.8903, "step": 308490 }, { "epoch": 1.970918569438943, "grad_norm": 1.2596087455749512, "learning_rate": 5.4945597307448636e-08, "loss": 1.3971, "step": 308500 }, { "epoch": 1.9709824565886818, "grad_norm": 1.0103543996810913, "learning_rate": 5.471067957463216e-08, "loss": 0.7434, "step": 308510 }, { "epoch": 1.9710463437384205, "grad_norm": 1.200963020324707, "learning_rate": 5.4476264834085747e-08, "loss": 0.7768, "step": 308520 }, { "epoch": 1.9711102308881592, "grad_norm": 1.0093293190002441, "learning_rate": 5.424235308817416e-08, "loss": 0.8774, "step": 308530 }, { "epoch": 1.9711741180378979, "grad_norm": 1.93284273147583, "learning_rate": 5.4008944339251075e-08, "loss": 0.8628, "step": 308540 }, { "epoch": 1.9712380051876366, "grad_norm": 2.522632360458374, "learning_rate": 5.3776038589670175e-08, "loss": 0.9007, "step": 308550 }, { "epoch": 1.9713018923373753, "grad_norm": 0.8160067200660706, "learning_rate": 5.354363584176847e-08, "loss": 0.9622, "step": 308560 }, { "epoch": 1.971365779487114, "grad_norm": 1.1085783243179321, "learning_rate": 5.331173609789408e-08, "loss": 0.6308, "step": 308570 }, { "epoch": 1.9714296666368527, "grad_norm": 0.7385463118553162, "learning_rate": 5.308033936038403e-08, "loss": 0.9497, "step": 308580 }, { "epoch": 1.9714935537865914, "grad_norm": 1.51162850856781, "learning_rate": 5.284944563155869e-08, "loss": 1.1597, "step": 308590 }, { "epoch": 1.9715574409363301, "grad_norm": 1.4442684650421143, "learning_rate": 5.261905491375507e-08, "loss": 0.8328, "step": 308600 }, { "epoch": 1.9716213280860688, "grad_norm": 0.8158069849014282, "learning_rate": 5.2389167209287995e-08, "loss": 0.9388, "step": 308610 }, { "epoch": 1.9716852152358073, "grad_norm": 1.0222749710083008, "learning_rate": 5.215978252047227e-08, "loss": 0.9329, "step": 308620 }, { "epoch": 1.9717491023855462, "grad_norm": 1.143006443977356, "learning_rate": 5.193090084961716e-08, "loss": 1.398, "step": 308630 }, { "epoch": 1.9718129895352847, "grad_norm": 0.857649028301239, "learning_rate": 5.170252219902638e-08, "loss": 0.6419, "step": 308640 }, { "epoch": 1.9718768766850236, "grad_norm": 1.4051035642623901, "learning_rate": 5.14746465710092e-08, "loss": 0.9146, "step": 308650 }, { "epoch": 1.9719407638347621, "grad_norm": 1.1671336889266968, "learning_rate": 5.124727396784712e-08, "loss": 1.0312, "step": 308660 }, { "epoch": 1.972004650984501, "grad_norm": 0.5252185463905334, "learning_rate": 5.102040439184386e-08, "loss": 0.6761, "step": 308670 }, { "epoch": 1.9720685381342395, "grad_norm": 1.0200812816619873, "learning_rate": 5.079403784527537e-08, "loss": 1.1923, "step": 308680 }, { "epoch": 1.9721324252839785, "grad_norm": 1.1095391511917114, "learning_rate": 5.056817433041761e-08, "loss": 0.7504, "step": 308690 }, { "epoch": 1.972196312433717, "grad_norm": 1.6178041696548462, "learning_rate": 5.0342813849557634e-08, "loss": 0.7886, "step": 308700 }, { "epoch": 1.9722601995834559, "grad_norm": 1.4610570669174194, "learning_rate": 5.011795640495476e-08, "loss": 0.6762, "step": 308710 }, { "epoch": 1.9723240867331944, "grad_norm": 1.3095816373825073, "learning_rate": 4.989360199887383e-08, "loss": 1.0937, "step": 308720 }, { "epoch": 1.9723879738829333, "grad_norm": 0.9102385640144348, "learning_rate": 4.966975063358526e-08, "loss": 0.7277, "step": 308730 }, { "epoch": 1.9724518610326718, "grad_norm": 0.9060350656509399, "learning_rate": 4.944640231132613e-08, "loss": 0.8636, "step": 308740 }, { "epoch": 1.9725157481824107, "grad_norm": 0.7537937164306641, "learning_rate": 4.922355703436132e-08, "loss": 0.6715, "step": 308750 }, { "epoch": 1.9725796353321492, "grad_norm": 1.6920832395553589, "learning_rate": 4.900121480492792e-08, "loss": 0.8922, "step": 308760 }, { "epoch": 1.9726435224818881, "grad_norm": 0.9837844967842102, "learning_rate": 4.8779375625263026e-08, "loss": 0.7857, "step": 308770 }, { "epoch": 1.9727074096316266, "grad_norm": 0.9435822367668152, "learning_rate": 4.8558039497609284e-08, "loss": 1.0804, "step": 308780 }, { "epoch": 1.9727712967813655, "grad_norm": 1.6462347507476807, "learning_rate": 4.8337206424187156e-08, "loss": 0.7593, "step": 308790 }, { "epoch": 1.972835183931104, "grad_norm": 1.322011947631836, "learning_rate": 4.8116876407222624e-08, "loss": 0.9249, "step": 308800 }, { "epoch": 1.972899071080843, "grad_norm": 0.7767255902290344, "learning_rate": 4.789704944893614e-08, "loss": 0.9743, "step": 308810 }, { "epoch": 1.9729629582305814, "grad_norm": 1.2136768102645874, "learning_rate": 4.76777255515426e-08, "loss": 0.8889, "step": 308820 }, { "epoch": 1.9730268453803204, "grad_norm": 1.8056683540344238, "learning_rate": 4.745890471725134e-08, "loss": 0.7624, "step": 308830 }, { "epoch": 1.9730907325300588, "grad_norm": 0.8533572554588318, "learning_rate": 4.724058694826061e-08, "loss": 0.7261, "step": 308840 }, { "epoch": 1.9731546196797976, "grad_norm": 1.088926076889038, "learning_rate": 4.7022772246774205e-08, "loss": 0.9164, "step": 308850 }, { "epoch": 1.9732185068295363, "grad_norm": 1.2552237510681152, "learning_rate": 4.680546061497926e-08, "loss": 0.7689, "step": 308860 }, { "epoch": 1.973282393979275, "grad_norm": 3.773592948913574, "learning_rate": 4.658865205507401e-08, "loss": 1.0516, "step": 308870 }, { "epoch": 1.9733462811290137, "grad_norm": 1.4239734411239624, "learning_rate": 4.63723465692345e-08, "loss": 0.8565, "step": 308880 }, { "epoch": 1.9734101682787524, "grad_norm": 1.5526864528656006, "learning_rate": 4.6156544159642326e-08, "loss": 0.8802, "step": 308890 }, { "epoch": 1.973474055428491, "grad_norm": 2.3669862747192383, "learning_rate": 4.594124482847351e-08, "loss": 0.7934, "step": 308900 }, { "epoch": 1.9735379425782298, "grad_norm": 0.9015244841575623, "learning_rate": 4.572644857788744e-08, "loss": 0.5639, "step": 308910 }, { "epoch": 1.9736018297279685, "grad_norm": 1.3841748237609863, "learning_rate": 4.55121554100546e-08, "loss": 0.7559, "step": 308920 }, { "epoch": 1.9736657168777072, "grad_norm": 1.267322063446045, "learning_rate": 4.5298365327128836e-08, "loss": 1.0862, "step": 308930 }, { "epoch": 1.973729604027446, "grad_norm": 0.8511454463005066, "learning_rate": 4.508507833126951e-08, "loss": 0.6968, "step": 308940 }, { "epoch": 1.9737934911771846, "grad_norm": 1.142594814300537, "learning_rate": 4.487229442461938e-08, "loss": 0.9511, "step": 308950 }, { "epoch": 1.9738573783269233, "grad_norm": 1.2920407056808472, "learning_rate": 4.466001360932115e-08, "loss": 0.76, "step": 308960 }, { "epoch": 1.973921265476662, "grad_norm": 0.6771072149276733, "learning_rate": 4.444823588751756e-08, "loss": 0.8519, "step": 308970 }, { "epoch": 1.9739851526264007, "grad_norm": 1.40349280834198, "learning_rate": 4.4236961261334697e-08, "loss": 0.893, "step": 308980 }, { "epoch": 1.9740490397761394, "grad_norm": 0.9430562257766724, "learning_rate": 4.4026189732904175e-08, "loss": 0.7753, "step": 308990 }, { "epoch": 1.9741129269258781, "grad_norm": 0.8726404309272766, "learning_rate": 4.381592130434653e-08, "loss": 1.1479, "step": 309000 }, { "epoch": 1.9741768140756168, "grad_norm": 0.7614126205444336, "learning_rate": 4.360615597778228e-08, "loss": 0.8382, "step": 309010 }, { "epoch": 1.9742407012253556, "grad_norm": 1.5872389078140259, "learning_rate": 4.3396893755320856e-08, "loss": 0.8007, "step": 309020 }, { "epoch": 1.9743045883750943, "grad_norm": 1.073142170906067, "learning_rate": 4.3188134639071674e-08, "loss": 0.9587, "step": 309030 }, { "epoch": 1.974368475524833, "grad_norm": 0.9952945113182068, "learning_rate": 4.2979878631138615e-08, "loss": 0.9594, "step": 309040 }, { "epoch": 1.9744323626745717, "grad_norm": 1.0852810144424438, "learning_rate": 4.277212573361999e-08, "loss": 0.9547, "step": 309050 }, { "epoch": 1.9744962498243104, "grad_norm": 1.8056341409683228, "learning_rate": 4.256487594859748e-08, "loss": 0.8571, "step": 309060 }, { "epoch": 1.974560136974049, "grad_norm": 0.8054976463317871, "learning_rate": 4.235812927817495e-08, "loss": 0.8943, "step": 309070 }, { "epoch": 1.9746240241237878, "grad_norm": 0.6693013906478882, "learning_rate": 4.2151885724417416e-08, "loss": 0.7472, "step": 309080 }, { "epoch": 1.9746879112735265, "grad_norm": 1.0728830099105835, "learning_rate": 4.19461452894121e-08, "loss": 1.0022, "step": 309090 }, { "epoch": 1.9747517984232652, "grad_norm": 1.1727595329284668, "learning_rate": 4.174090797523511e-08, "loss": 1.0071, "step": 309100 }, { "epoch": 1.9748156855730037, "grad_norm": 0.8001273274421692, "learning_rate": 4.153617378394037e-08, "loss": 0.8537, "step": 309110 }, { "epoch": 1.9748795727227426, "grad_norm": 2.7028579711914062, "learning_rate": 4.1331942717598435e-08, "loss": 0.8011, "step": 309120 }, { "epoch": 1.974943459872481, "grad_norm": 1.1264238357543945, "learning_rate": 4.112821477826323e-08, "loss": 0.7103, "step": 309130 }, { "epoch": 1.97500734702222, "grad_norm": 0.7403964996337891, "learning_rate": 4.092498996798866e-08, "loss": 0.7906, "step": 309140 }, { "epoch": 1.9750712341719585, "grad_norm": 0.7814722061157227, "learning_rate": 4.0722268288823086e-08, "loss": 1.0551, "step": 309150 }, { "epoch": 1.9751351213216974, "grad_norm": 1.4338569641113281, "learning_rate": 4.052004974279822e-08, "loss": 1.2511, "step": 309160 }, { "epoch": 1.975199008471436, "grad_norm": 0.9130191206932068, "learning_rate": 4.0318334331962414e-08, "loss": 0.7111, "step": 309170 }, { "epoch": 1.9752628956211749, "grad_norm": 1.1973497867584229, "learning_rate": 4.0117122058341836e-08, "loss": 0.8637, "step": 309180 }, { "epoch": 1.9753267827709133, "grad_norm": 0.5802940130233765, "learning_rate": 3.991641292396264e-08, "loss": 0.9088, "step": 309190 }, { "epoch": 1.9753906699206523, "grad_norm": 0.8695168495178223, "learning_rate": 3.971620693085098e-08, "loss": 0.8054, "step": 309200 }, { "epoch": 1.9754545570703907, "grad_norm": 1.0912967920303345, "learning_rate": 3.9516504081010817e-08, "loss": 0.8578, "step": 309210 }, { "epoch": 1.9755184442201297, "grad_norm": 0.8944864273071289, "learning_rate": 3.93173043764683e-08, "loss": 0.9516, "step": 309220 }, { "epoch": 1.9755823313698682, "grad_norm": 1.0011634826660156, "learning_rate": 3.911860781922183e-08, "loss": 0.621, "step": 309230 }, { "epoch": 1.975646218519607, "grad_norm": 1.4041532278060913, "learning_rate": 3.892041441126981e-08, "loss": 0.8882, "step": 309240 }, { "epoch": 1.9757101056693456, "grad_norm": 1.770445704460144, "learning_rate": 3.8722724154610644e-08, "loss": 0.776, "step": 309250 }, { "epoch": 1.9757739928190845, "grad_norm": 0.8279808759689331, "learning_rate": 3.852553705124273e-08, "loss": 0.7962, "step": 309260 }, { "epoch": 1.975837879968823, "grad_norm": 1.0905953645706177, "learning_rate": 3.832885310314227e-08, "loss": 0.7295, "step": 309270 }, { "epoch": 1.975901767118562, "grad_norm": 2.3927602767944336, "learning_rate": 3.8132672312291005e-08, "loss": 0.858, "step": 309280 }, { "epoch": 1.9759656542683004, "grad_norm": 1.1994647979736328, "learning_rate": 3.793699468067069e-08, "loss": 1.0402, "step": 309290 }, { "epoch": 1.9760295414180393, "grad_norm": 1.07845938205719, "learning_rate": 3.774182021024641e-08, "loss": 1.0522, "step": 309300 }, { "epoch": 1.9760934285677778, "grad_norm": 1.2150828838348389, "learning_rate": 3.754714890298328e-08, "loss": 0.7252, "step": 309310 }, { "epoch": 1.9761573157175167, "grad_norm": 0.6640936732292175, "learning_rate": 3.735298076084637e-08, "loss": 0.9416, "step": 309320 }, { "epoch": 1.9762212028672552, "grad_norm": 0.9532160758972168, "learning_rate": 3.7159315785789686e-08, "loss": 0.8159, "step": 309330 }, { "epoch": 1.976285090016994, "grad_norm": 1.0099438428878784, "learning_rate": 3.696615397976166e-08, "loss": 0.7487, "step": 309340 }, { "epoch": 1.9763489771667326, "grad_norm": 0.9834662079811096, "learning_rate": 3.677349534471075e-08, "loss": 0.8413, "step": 309350 }, { "epoch": 1.9764128643164713, "grad_norm": 1.1218534708023071, "learning_rate": 3.658133988256873e-08, "loss": 0.663, "step": 309360 }, { "epoch": 1.97647675146621, "grad_norm": 1.4277747869491577, "learning_rate": 3.638968759528405e-08, "loss": 1.1349, "step": 309370 }, { "epoch": 1.9765406386159488, "grad_norm": 1.0635625123977661, "learning_rate": 3.619853848477739e-08, "loss": 0.6781, "step": 309380 }, { "epoch": 1.9766045257656875, "grad_norm": 1.3290634155273438, "learning_rate": 3.6007892552974983e-08, "loss": 0.9776, "step": 309390 }, { "epoch": 1.9766684129154262, "grad_norm": 0.9123384952545166, "learning_rate": 3.581774980179753e-08, "loss": 0.8103, "step": 309400 }, { "epoch": 1.9767323000651649, "grad_norm": 0.7891640067100525, "learning_rate": 3.562811023316015e-08, "loss": 0.9219, "step": 309410 }, { "epoch": 1.9767961872149036, "grad_norm": 0.7858989834785461, "learning_rate": 3.5438973848977985e-08, "loss": 0.7497, "step": 309420 }, { "epoch": 1.9768600743646423, "grad_norm": 2.1860246658325195, "learning_rate": 3.5250340651149515e-08, "loss": 0.9047, "step": 309430 }, { "epoch": 1.976923961514381, "grad_norm": 3.4610395431518555, "learning_rate": 3.506221064157322e-08, "loss": 0.7699, "step": 309440 }, { "epoch": 1.9769878486641197, "grad_norm": 1.016252040863037, "learning_rate": 3.487458382214759e-08, "loss": 0.7325, "step": 309450 }, { "epoch": 1.9770517358138584, "grad_norm": 1.5463958978652954, "learning_rate": 3.468746019475999e-08, "loss": 0.7865, "step": 309460 }, { "epoch": 1.977115622963597, "grad_norm": 1.0177315473556519, "learning_rate": 3.450083976129226e-08, "loss": 0.734, "step": 309470 }, { "epoch": 1.9771795101133358, "grad_norm": 1.2989296913146973, "learning_rate": 3.431472252363177e-08, "loss": 1.0005, "step": 309480 }, { "epoch": 1.9772433972630745, "grad_norm": 1.1454684734344482, "learning_rate": 3.412910848364925e-08, "loss": 0.9839, "step": 309490 }, { "epoch": 1.9773072844128132, "grad_norm": 0.9672458171844482, "learning_rate": 3.394399764321543e-08, "loss": 0.9025, "step": 309500 }, { "epoch": 1.977371171562552, "grad_norm": 0.5477538704872131, "learning_rate": 3.375939000418438e-08, "loss": 0.7266, "step": 309510 }, { "epoch": 1.9774350587122906, "grad_norm": 0.7952908873558044, "learning_rate": 3.3575285568432365e-08, "loss": 0.7958, "step": 309520 }, { "epoch": 1.9774989458620293, "grad_norm": 1.088985800743103, "learning_rate": 3.339168433779682e-08, "loss": 0.7897, "step": 309530 }, { "epoch": 1.977562833011768, "grad_norm": 0.9924382567405701, "learning_rate": 3.3208586314137366e-08, "loss": 1.0101, "step": 309540 }, { "epoch": 1.9776267201615068, "grad_norm": 1.4753326177597046, "learning_rate": 3.302599149929697e-08, "loss": 0.7234, "step": 309550 }, { "epoch": 1.9776906073112455, "grad_norm": 0.9083718061447144, "learning_rate": 3.28438998951075e-08, "loss": 0.7613, "step": 309560 }, { "epoch": 1.9777544944609842, "grad_norm": 0.7028256058692932, "learning_rate": 3.2662311503411925e-08, "loss": 0.7795, "step": 309570 }, { "epoch": 1.9778183816107227, "grad_norm": 0.60731440782547, "learning_rate": 3.248122632603101e-08, "loss": 0.8068, "step": 309580 }, { "epoch": 1.9778822687604616, "grad_norm": 1.1641340255737305, "learning_rate": 3.230064436479663e-08, "loss": 0.9682, "step": 309590 }, { "epoch": 1.9779461559102, "grad_norm": 0.8485056161880493, "learning_rate": 3.2120565621518436e-08, "loss": 0.9777, "step": 309600 }, { "epoch": 1.978010043059939, "grad_norm": 1.0408892631530762, "learning_rate": 3.194099009801721e-08, "loss": 0.9244, "step": 309610 }, { "epoch": 1.9780739302096775, "grad_norm": 1.219113826751709, "learning_rate": 3.17619177961026e-08, "loss": 0.7961, "step": 309620 }, { "epoch": 1.9781378173594164, "grad_norm": 1.190337896347046, "learning_rate": 3.158334871756763e-08, "loss": 1.0724, "step": 309630 }, { "epoch": 1.978201704509155, "grad_norm": 0.6391940116882324, "learning_rate": 3.140528286422195e-08, "loss": 0.6665, "step": 309640 }, { "epoch": 1.9782655916588938, "grad_norm": 1.1940670013427734, "learning_rate": 3.122772023784748e-08, "loss": 1.0091, "step": 309650 }, { "epoch": 1.9783294788086323, "grad_norm": 0.9822312593460083, "learning_rate": 3.1050660840242776e-08, "loss": 1.0307, "step": 309660 }, { "epoch": 1.9783933659583712, "grad_norm": 1.103455901145935, "learning_rate": 3.087410467318974e-08, "loss": 0.7808, "step": 309670 }, { "epoch": 1.9784572531081097, "grad_norm": 0.7938708066940308, "learning_rate": 3.0698051738459186e-08, "loss": 0.9174, "step": 309680 }, { "epoch": 1.9785211402578486, "grad_norm": 1.227244257926941, "learning_rate": 3.0522502037833026e-08, "loss": 0.8234, "step": 309690 }, { "epoch": 1.9785850274075871, "grad_norm": 0.8157173991203308, "learning_rate": 3.03474555730765e-08, "loss": 0.8377, "step": 309700 }, { "epoch": 1.978648914557326, "grad_norm": 1.1654152870178223, "learning_rate": 3.0172912345943774e-08, "loss": 0.9414, "step": 309710 }, { "epoch": 1.9787128017070645, "grad_norm": 0.6457794308662415, "learning_rate": 2.999887235820564e-08, "loss": 0.8444, "step": 309720 }, { "epoch": 1.9787766888568035, "grad_norm": 1.0486998558044434, "learning_rate": 2.9825335611610716e-08, "loss": 0.9999, "step": 309730 }, { "epoch": 1.978840576006542, "grad_norm": 1.2199923992156982, "learning_rate": 2.965230210789649e-08, "loss": 0.8839, "step": 309740 }, { "epoch": 1.9789044631562809, "grad_norm": 1.5385959148406982, "learning_rate": 2.9479771848822668e-08, "loss": 0.7668, "step": 309750 }, { "epoch": 1.9789683503060194, "grad_norm": 0.6414878368377686, "learning_rate": 2.9307744836115647e-08, "loss": 0.825, "step": 309760 }, { "epoch": 1.9790322374557583, "grad_norm": 1.0736933946609497, "learning_rate": 2.9136221071507376e-08, "loss": 0.8665, "step": 309770 }, { "epoch": 1.9790961246054968, "grad_norm": 0.9618337154388428, "learning_rate": 2.8965200556729798e-08, "loss": 1.4679, "step": 309780 }, { "epoch": 1.9791600117552357, "grad_norm": 0.9808238744735718, "learning_rate": 2.8794683293503764e-08, "loss": 0.7879, "step": 309790 }, { "epoch": 1.9792238989049742, "grad_norm": 1.2491557598114014, "learning_rate": 2.864164803810243e-08, "loss": 1.1444, "step": 309800 }, { "epoch": 1.9792877860547131, "grad_norm": 1.5579088926315308, "learning_rate": 2.8472086957548326e-08, "loss": 0.9873, "step": 309810 }, { "epoch": 1.9793516732044516, "grad_norm": 1.5183621644973755, "learning_rate": 2.8303029133514013e-08, "loss": 0.9985, "step": 309820 }, { "epoch": 1.9794155603541903, "grad_norm": 0.9913473725318909, "learning_rate": 2.8134474567703684e-08, "loss": 0.943, "step": 309830 }, { "epoch": 1.979479447503929, "grad_norm": 1.0000174045562744, "learning_rate": 2.7966423261810426e-08, "loss": 0.752, "step": 309840 }, { "epoch": 1.9795433346536677, "grad_norm": 1.1551103591918945, "learning_rate": 2.779887521752178e-08, "loss": 0.9543, "step": 309850 }, { "epoch": 1.9796072218034064, "grad_norm": 0.5685717463493347, "learning_rate": 2.7631830436536387e-08, "loss": 0.766, "step": 309860 }, { "epoch": 1.9796711089531451, "grad_norm": 1.0493838787078857, "learning_rate": 2.7465288920530685e-08, "loss": 0.9169, "step": 309870 }, { "epoch": 1.9797349961028838, "grad_norm": 0.9339484572410583, "learning_rate": 2.729925067117556e-08, "loss": 0.8649, "step": 309880 }, { "epoch": 1.9797988832526225, "grad_norm": 0.6909673810005188, "learning_rate": 2.7133715690152994e-08, "loss": 0.9404, "step": 309890 }, { "epoch": 1.9798627704023612, "grad_norm": 1.0615977048873901, "learning_rate": 2.6968683979128327e-08, "loss": 0.8283, "step": 309900 }, { "epoch": 1.9799266575521, "grad_norm": 1.017637014389038, "learning_rate": 2.6804155539761346e-08, "loss": 0.8404, "step": 309910 }, { "epoch": 1.9799905447018387, "grad_norm": 1.027228832244873, "learning_rate": 2.6640130373711824e-08, "loss": 0.9486, "step": 309920 }, { "epoch": 1.9800544318515774, "grad_norm": 0.8696177005767822, "learning_rate": 2.64766084826229e-08, "loss": 0.9764, "step": 309930 }, { "epoch": 1.980118319001316, "grad_norm": 1.3316969871520996, "learning_rate": 2.6313589868154354e-08, "loss": 0.8519, "step": 309940 }, { "epoch": 1.9801822061510548, "grad_norm": 1.4391385316848755, "learning_rate": 2.6151074531938214e-08, "loss": 1.0183, "step": 309950 }, { "epoch": 1.9802460933007935, "grad_norm": 0.9557727575302124, "learning_rate": 2.598906247561206e-08, "loss": 1.1704, "step": 309960 }, { "epoch": 1.9803099804505322, "grad_norm": 1.243375539779663, "learning_rate": 2.5827553700813466e-08, "loss": 1.006, "step": 309970 }, { "epoch": 1.980373867600271, "grad_norm": 0.8975056409835815, "learning_rate": 2.5666548209163367e-08, "loss": 0.8066, "step": 309980 }, { "epoch": 1.9804377547500096, "grad_norm": 0.869646430015564, "learning_rate": 2.550604600228823e-08, "loss": 1.264, "step": 309990 }, { "epoch": 1.9805016418997483, "grad_norm": 0.8230206370353699, "learning_rate": 2.5346047081797885e-08, "loss": 0.808, "step": 310000 }, { "epoch": 1.980565529049487, "grad_norm": 0.7488855123519897, "learning_rate": 2.5186551449307705e-08, "loss": 0.903, "step": 310010 }, { "epoch": 1.9806294161992257, "grad_norm": 1.3847332000732422, "learning_rate": 2.502755910642196e-08, "loss": 0.8955, "step": 310020 }, { "epoch": 1.9806933033489644, "grad_norm": 0.9645035862922668, "learning_rate": 2.4869070054744926e-08, "loss": 0.9145, "step": 310030 }, { "epoch": 1.9807571904987031, "grad_norm": 1.4131964445114136, "learning_rate": 2.471108429586977e-08, "loss": 0.9992, "step": 310040 }, { "epoch": 1.9808210776484418, "grad_norm": 0.8667084574699402, "learning_rate": 2.455360183138966e-08, "loss": 0.8105, "step": 310050 }, { "epoch": 1.9808849647981805, "grad_norm": 1.352062702178955, "learning_rate": 2.439662266289222e-08, "loss": 0.8148, "step": 310060 }, { "epoch": 1.980948851947919, "grad_norm": 0.8915233612060547, "learning_rate": 2.4240146791953966e-08, "loss": 0.8997, "step": 310070 }, { "epoch": 1.981012739097658, "grad_norm": 1.9420161247253418, "learning_rate": 2.4084174220151412e-08, "loss": 0.6849, "step": 310080 }, { "epoch": 1.9810766262473964, "grad_norm": 0.611120343208313, "learning_rate": 2.3928704949055525e-08, "loss": 0.9966, "step": 310090 }, { "epoch": 1.9811405133971354, "grad_norm": 2.8182990550994873, "learning_rate": 2.377373898023727e-08, "loss": 0.7746, "step": 310100 }, { "epoch": 1.9812044005468739, "grad_norm": 0.9300549626350403, "learning_rate": 2.361927631524541e-08, "loss": 0.7707, "step": 310110 }, { "epoch": 1.9812682876966128, "grad_norm": 0.9150687456130981, "learning_rate": 2.3465316955650907e-08, "loss": 0.8798, "step": 310120 }, { "epoch": 1.9813321748463513, "grad_norm": 1.082905888557434, "learning_rate": 2.3311860902991423e-08, "loss": 0.9707, "step": 310130 }, { "epoch": 1.9813960619960902, "grad_norm": 1.0547878742218018, "learning_rate": 2.3158908158821268e-08, "loss": 0.8752, "step": 310140 }, { "epoch": 1.9814599491458287, "grad_norm": 0.7612301111221313, "learning_rate": 2.3006458724678103e-08, "loss": 0.8479, "step": 310150 }, { "epoch": 1.9815238362955676, "grad_norm": 0.8997673988342285, "learning_rate": 2.2854512602094036e-08, "loss": 0.9854, "step": 310160 }, { "epoch": 1.981587723445306, "grad_norm": 1.7977722883224487, "learning_rate": 2.270306979260117e-08, "loss": 0.8531, "step": 310170 }, { "epoch": 1.981651610595045, "grad_norm": 2.0584871768951416, "learning_rate": 2.255213029772607e-08, "loss": 1.1729, "step": 310180 }, { "epoch": 1.9817154977447835, "grad_norm": 0.806151807308197, "learning_rate": 2.2401694118984183e-08, "loss": 0.8084, "step": 310190 }, { "epoch": 1.9817793848945224, "grad_norm": 0.8514753580093384, "learning_rate": 2.2251761257896518e-08, "loss": 0.9633, "step": 310200 }, { "epoch": 1.981843272044261, "grad_norm": 0.8717397451400757, "learning_rate": 2.2102331715967428e-08, "loss": 0.8358, "step": 310210 }, { "epoch": 1.9819071591939998, "grad_norm": 0.7835900187492371, "learning_rate": 2.1953405494712364e-08, "loss": 0.8919, "step": 310220 }, { "epoch": 1.9819710463437383, "grad_norm": 0.8039876222610474, "learning_rate": 2.1804982595613478e-08, "loss": 1.0103, "step": 310230 }, { "epoch": 1.9820349334934773, "grad_norm": 1.0741451978683472, "learning_rate": 2.1657063020186218e-08, "loss": 0.9521, "step": 310240 }, { "epoch": 1.9820988206432157, "grad_norm": 1.1201385259628296, "learning_rate": 2.1509646769901636e-08, "loss": 1.0668, "step": 310250 }, { "epoch": 1.9821627077929547, "grad_norm": 0.6859369874000549, "learning_rate": 2.1362733846258533e-08, "loss": 0.8435, "step": 310260 }, { "epoch": 1.9822265949426932, "grad_norm": 1.120652198791504, "learning_rate": 2.1216324250727947e-08, "loss": 0.7989, "step": 310270 }, { "epoch": 1.982290482092432, "grad_norm": 0.8452847003936768, "learning_rate": 2.1070417984780933e-08, "loss": 0.7964, "step": 310280 }, { "epoch": 1.9823543692421706, "grad_norm": 1.0975067615509033, "learning_rate": 2.0925015049899633e-08, "loss": 1.0015, "step": 310290 }, { "epoch": 1.9824182563919095, "grad_norm": 1.0435091257095337, "learning_rate": 2.078011544753844e-08, "loss": 0.9067, "step": 310300 }, { "epoch": 1.982482143541648, "grad_norm": 1.3975369930267334, "learning_rate": 2.063571917916285e-08, "loss": 0.9096, "step": 310310 }, { "epoch": 1.9825460306913867, "grad_norm": 0.8230214715003967, "learning_rate": 2.04918262462217e-08, "loss": 0.7911, "step": 310320 }, { "epoch": 1.9826099178411254, "grad_norm": 1.2974622249603271, "learning_rate": 2.034843665016939e-08, "loss": 0.6434, "step": 310330 }, { "epoch": 1.982673804990864, "grad_norm": 0.7529612183570862, "learning_rate": 2.020555039244365e-08, "loss": 0.8052, "step": 310340 }, { "epoch": 1.9827376921406028, "grad_norm": 1.1118345260620117, "learning_rate": 2.0063167474487776e-08, "loss": 0.7984, "step": 310350 }, { "epoch": 1.9828015792903415, "grad_norm": 0.8356834650039673, "learning_rate": 1.9921287897733953e-08, "loss": 0.8602, "step": 310360 }, { "epoch": 1.9828654664400802, "grad_norm": 0.9013441801071167, "learning_rate": 1.977991166360882e-08, "loss": 1.1554, "step": 310370 }, { "epoch": 1.982929353589819, "grad_norm": 1.1530930995941162, "learning_rate": 1.963903877354456e-08, "loss": 0.9188, "step": 310380 }, { "epoch": 1.9829932407395576, "grad_norm": 1.799553632736206, "learning_rate": 1.949866922895116e-08, "loss": 0.9647, "step": 310390 }, { "epoch": 1.9830571278892963, "grad_norm": 1.3467897176742554, "learning_rate": 1.9358803031244155e-08, "loss": 0.796, "step": 310400 }, { "epoch": 1.983121015039035, "grad_norm": 2.4441401958465576, "learning_rate": 1.9219440181839077e-08, "loss": 0.758, "step": 310410 }, { "epoch": 1.9831849021887737, "grad_norm": 1.3251237869262695, "learning_rate": 1.9080580682129257e-08, "loss": 0.9851, "step": 310420 }, { "epoch": 1.9832487893385125, "grad_norm": 0.8397975564002991, "learning_rate": 1.8942224533519128e-08, "loss": 0.7603, "step": 310430 }, { "epoch": 1.9833126764882512, "grad_norm": 1.116385579109192, "learning_rate": 1.8804371737396463e-08, "loss": 0.7254, "step": 310440 }, { "epoch": 1.9833765636379899, "grad_norm": 0.6969117522239685, "learning_rate": 1.8667022295160153e-08, "loss": 0.9462, "step": 310450 }, { "epoch": 1.9834404507877286, "grad_norm": 1.8197271823883057, "learning_rate": 1.8530176208181317e-08, "loss": 0.8398, "step": 310460 }, { "epoch": 1.9835043379374673, "grad_norm": 0.9810863733291626, "learning_rate": 1.8393833477847733e-08, "loss": 1.0075, "step": 310470 }, { "epoch": 1.983568225087206, "grad_norm": 1.3049362897872925, "learning_rate": 1.825799410553053e-08, "loss": 0.6365, "step": 310480 }, { "epoch": 1.9836321122369447, "grad_norm": 1.0517561435699463, "learning_rate": 1.8122658092589727e-08, "loss": 0.7645, "step": 310490 }, { "epoch": 1.9836959993866834, "grad_norm": 0.9201875925064087, "learning_rate": 1.7987825440396456e-08, "loss": 0.9245, "step": 310500 }, { "epoch": 1.983759886536422, "grad_norm": 0.7460044026374817, "learning_rate": 1.7853496150305183e-08, "loss": 0.8921, "step": 310510 }, { "epoch": 1.9838237736861608, "grad_norm": 0.7730250358581543, "learning_rate": 1.7719670223675934e-08, "loss": 0.8903, "step": 310520 }, { "epoch": 1.9838876608358995, "grad_norm": 1.0625085830688477, "learning_rate": 1.7586347661840973e-08, "loss": 0.8217, "step": 310530 }, { "epoch": 1.9839515479856382, "grad_norm": 0.8693110346794128, "learning_rate": 1.7453528466160328e-08, "loss": 0.8307, "step": 310540 }, { "epoch": 1.984015435135377, "grad_norm": 2.2066643238067627, "learning_rate": 1.7321212637960716e-08, "loss": 1.1016, "step": 310550 }, { "epoch": 1.9840793222851154, "grad_norm": 1.2037817239761353, "learning_rate": 1.71894001785744e-08, "loss": 0.7203, "step": 310560 }, { "epoch": 1.9841432094348543, "grad_norm": 0.7629579901695251, "learning_rate": 1.70580910893392e-08, "loss": 0.759, "step": 310570 }, { "epoch": 1.9842070965845928, "grad_norm": 0.9195466041564941, "learning_rate": 1.6927285371565182e-08, "loss": 0.9331, "step": 310580 }, { "epoch": 1.9842709837343318, "grad_norm": 0.9034010171890259, "learning_rate": 1.679698302657351e-08, "loss": 0.7365, "step": 310590 }, { "epoch": 1.9843348708840702, "grad_norm": 1.162416934967041, "learning_rate": 1.6667184055685348e-08, "loss": 0.9009, "step": 310600 }, { "epoch": 1.9843987580338092, "grad_norm": 1.3139643669128418, "learning_rate": 1.6537888460194105e-08, "loss": 0.7501, "step": 310610 }, { "epoch": 1.9844626451835476, "grad_norm": 2.04888916015625, "learning_rate": 1.640909624140985e-08, "loss": 1.0927, "step": 310620 }, { "epoch": 1.9845265323332866, "grad_norm": 0.8494736552238464, "learning_rate": 1.6280807400625987e-08, "loss": 0.9263, "step": 310630 }, { "epoch": 1.984590419483025, "grad_norm": 0.645581841468811, "learning_rate": 1.6153021939141476e-08, "loss": 0.8617, "step": 310640 }, { "epoch": 1.984654306632764, "grad_norm": 0.8450150489807129, "learning_rate": 1.602573985823308e-08, "loss": 0.7702, "step": 310650 }, { "epoch": 1.9847181937825025, "grad_norm": 0.7330366969108582, "learning_rate": 1.5898961159188652e-08, "loss": 0.8653, "step": 310660 }, { "epoch": 1.9847820809322414, "grad_norm": 0.8303292989730835, "learning_rate": 1.5772685843284953e-08, "loss": 0.8591, "step": 310670 }, { "epoch": 1.9848459680819799, "grad_norm": 1.4144198894500732, "learning_rate": 1.5646913911793182e-08, "loss": 0.9637, "step": 310680 }, { "epoch": 1.9849098552317188, "grad_norm": 1.0386841297149658, "learning_rate": 1.5521645365979e-08, "loss": 0.8452, "step": 310690 }, { "epoch": 1.9849737423814573, "grad_norm": 1.1626369953155518, "learning_rate": 1.5396880207108056e-08, "loss": 0.9221, "step": 310700 }, { "epoch": 1.9850376295311962, "grad_norm": 0.6234553456306458, "learning_rate": 1.5272618436429353e-08, "loss": 1.0333, "step": 310710 }, { "epoch": 1.9851015166809347, "grad_norm": 1.2528012990951538, "learning_rate": 1.5148860055197445e-08, "loss": 0.9026, "step": 310720 }, { "epoch": 1.9851654038306736, "grad_norm": 0.8561525344848633, "learning_rate": 1.502560506466133e-08, "loss": 0.8678, "step": 310730 }, { "epoch": 1.9852292909804121, "grad_norm": 1.1541293859481812, "learning_rate": 1.4902853466064458e-08, "loss": 1.2388, "step": 310740 }, { "epoch": 1.985293178130151, "grad_norm": 0.7439695596694946, "learning_rate": 1.478060526063363e-08, "loss": 0.792, "step": 310750 }, { "epoch": 1.9853570652798895, "grad_norm": 0.8408505320549011, "learning_rate": 1.4658860449606737e-08, "loss": 0.8528, "step": 310760 }, { "epoch": 1.9854209524296285, "grad_norm": 0.9411364197731018, "learning_rate": 1.453761903421058e-08, "loss": 0.7902, "step": 310770 }, { "epoch": 1.985484839579367, "grad_norm": 1.618725061416626, "learning_rate": 1.4416881015660854e-08, "loss": 0.9176, "step": 310780 }, { "epoch": 1.9855487267291059, "grad_norm": 1.5805702209472656, "learning_rate": 1.4296646395178803e-08, "loss": 0.8695, "step": 310790 }, { "epoch": 1.9856126138788444, "grad_norm": 0.9400895833969116, "learning_rate": 1.417691517397457e-08, "loss": 0.8942, "step": 310800 }, { "epoch": 1.985676501028583, "grad_norm": 0.7929409742355347, "learning_rate": 1.4057687353247195e-08, "loss": 0.8796, "step": 310810 }, { "epoch": 1.9857403881783218, "grad_norm": 2.9416072368621826, "learning_rate": 1.3938962934212373e-08, "loss": 0.9496, "step": 310820 }, { "epoch": 1.9858042753280605, "grad_norm": 1.3560771942138672, "learning_rate": 1.3820741918046942e-08, "loss": 0.9342, "step": 310830 }, { "epoch": 1.9858681624777992, "grad_norm": 1.4186067581176758, "learning_rate": 1.3703024305955492e-08, "loss": 0.9982, "step": 310840 }, { "epoch": 1.9859320496275379, "grad_norm": 1.0630927085876465, "learning_rate": 1.358581009912041e-08, "loss": 0.8193, "step": 310850 }, { "epoch": 1.9859959367772766, "grad_norm": 0.8476179838180542, "learning_rate": 1.3469099298718535e-08, "loss": 0.6791, "step": 310860 }, { "epoch": 1.9860598239270153, "grad_norm": 0.7086460590362549, "learning_rate": 1.33528919059267e-08, "loss": 0.8539, "step": 310870 }, { "epoch": 1.986123711076754, "grad_norm": 0.8723922967910767, "learning_rate": 1.3237187921916195e-08, "loss": 0.7082, "step": 310880 }, { "epoch": 1.9861875982264927, "grad_norm": 1.034899115562439, "learning_rate": 1.3121987347852748e-08, "loss": 0.7722, "step": 310890 }, { "epoch": 1.9862514853762314, "grad_norm": 0.9084420204162598, "learning_rate": 1.3007290184890997e-08, "loss": 0.791, "step": 310900 }, { "epoch": 1.9863153725259701, "grad_norm": 1.9150114059448242, "learning_rate": 1.2893096434196672e-08, "loss": 0.8656, "step": 310910 }, { "epoch": 1.9863792596757088, "grad_norm": 2.226154327392578, "learning_rate": 1.2779406096913305e-08, "loss": 0.8388, "step": 310920 }, { "epoch": 1.9864431468254475, "grad_norm": 0.9489161968231201, "learning_rate": 1.2666219174184424e-08, "loss": 1.0354, "step": 310930 }, { "epoch": 1.9865070339751862, "grad_norm": 0.6309258937835693, "learning_rate": 1.255353566715356e-08, "loss": 0.8209, "step": 310940 }, { "epoch": 1.986570921124925, "grad_norm": 0.710475742816925, "learning_rate": 1.244135557695314e-08, "loss": 0.7545, "step": 310950 }, { "epoch": 1.9866348082746637, "grad_norm": 1.2402219772338867, "learning_rate": 1.232967890471004e-08, "loss": 0.7927, "step": 310960 }, { "epoch": 1.9866986954244024, "grad_norm": 1.2162563800811768, "learning_rate": 1.2218505651556688e-08, "loss": 0.8432, "step": 310970 }, { "epoch": 1.986762582574141, "grad_norm": 0.8971146941184998, "learning_rate": 1.210783581860886e-08, "loss": 1.0107, "step": 310980 }, { "epoch": 1.9868264697238798, "grad_norm": 1.3195732831954956, "learning_rate": 1.1997669406982326e-08, "loss": 0.8304, "step": 310990 }, { "epoch": 1.9868903568736185, "grad_norm": 0.9834543466567993, "learning_rate": 1.188800641778176e-08, "loss": 0.816, "step": 311000 }, { "epoch": 1.9869542440233572, "grad_norm": 0.8268367052078247, "learning_rate": 1.1778846852111836e-08, "loss": 0.7153, "step": 311010 }, { "epoch": 1.987018131173096, "grad_norm": 0.816234290599823, "learning_rate": 1.1670190711082773e-08, "loss": 1.0464, "step": 311020 }, { "epoch": 1.9870820183228346, "grad_norm": 1.0838505029678345, "learning_rate": 1.1562037995777041e-08, "loss": 0.696, "step": 311030 }, { "epoch": 1.9871459054725733, "grad_norm": 1.1296429634094238, "learning_rate": 1.145438870728266e-08, "loss": 0.945, "step": 311040 }, { "epoch": 1.9872097926223118, "grad_norm": 1.261118769645691, "learning_rate": 1.1347242846693196e-08, "loss": 0.6953, "step": 311050 }, { "epoch": 1.9872736797720507, "grad_norm": 0.8011787533760071, "learning_rate": 1.1240600415085567e-08, "loss": 0.8288, "step": 311060 }, { "epoch": 1.9873375669217892, "grad_norm": 1.4028711318969727, "learning_rate": 1.1134461413531138e-08, "loss": 0.8766, "step": 311070 }, { "epoch": 1.9874014540715281, "grad_norm": 1.1019203662872314, "learning_rate": 1.1028825843095724e-08, "loss": 0.6276, "step": 311080 }, { "epoch": 1.9874653412212666, "grad_norm": 1.0110036134719849, "learning_rate": 1.092369370485069e-08, "loss": 0.6338, "step": 311090 }, { "epoch": 1.9875292283710055, "grad_norm": 0.6997506022453308, "learning_rate": 1.0819064999850747e-08, "loss": 0.8742, "step": 311100 }, { "epoch": 1.987593115520744, "grad_norm": 0.68264240026474, "learning_rate": 1.0714939729145056e-08, "loss": 0.6993, "step": 311110 }, { "epoch": 1.987657002670483, "grad_norm": 1.01038658618927, "learning_rate": 1.061131789378833e-08, "loss": 1.0496, "step": 311120 }, { "epoch": 1.9877208898202214, "grad_norm": 0.7514334321022034, "learning_rate": 1.0508199494824178e-08, "loss": 0.7158, "step": 311130 }, { "epoch": 1.9877847769699604, "grad_norm": 0.6786255836486816, "learning_rate": 1.0405584533290657e-08, "loss": 0.932, "step": 311140 }, { "epoch": 1.9878486641196988, "grad_norm": 0.9236333966255188, "learning_rate": 1.0303473010214726e-08, "loss": 0.8977, "step": 311150 }, { "epoch": 1.9879125512694378, "grad_norm": 1.0109336376190186, "learning_rate": 1.020186492663444e-08, "loss": 0.9807, "step": 311160 }, { "epoch": 1.9879764384191763, "grad_norm": 1.585142970085144, "learning_rate": 1.0100760283571209e-08, "loss": 0.8529, "step": 311170 }, { "epoch": 1.9880403255689152, "grad_norm": 0.9748506546020508, "learning_rate": 1.0000159082035332e-08, "loss": 0.9685, "step": 311180 }, { "epoch": 1.9881042127186537, "grad_norm": 0.7866042256355286, "learning_rate": 9.900061323048215e-09, "loss": 0.8834, "step": 311190 }, { "epoch": 1.9881680998683926, "grad_norm": 1.2893184423446655, "learning_rate": 9.800467007614611e-09, "loss": 0.9315, "step": 311200 }, { "epoch": 1.988231987018131, "grad_norm": 1.1545730829238892, "learning_rate": 9.701376136739271e-09, "loss": 0.8941, "step": 311210 }, { "epoch": 1.98829587416787, "grad_norm": 0.687833845615387, "learning_rate": 9.602788711415844e-09, "loss": 0.844, "step": 311220 }, { "epoch": 1.9883597613176085, "grad_norm": 0.8345115184783936, "learning_rate": 9.504704732643533e-09, "loss": 0.8394, "step": 311230 }, { "epoch": 1.9884236484673474, "grad_norm": 0.6788913607597351, "learning_rate": 9.407124201404883e-09, "loss": 0.7653, "step": 311240 }, { "epoch": 1.988487535617086, "grad_norm": 0.9235719442367554, "learning_rate": 9.31004711868244e-09, "loss": 0.8282, "step": 311250 }, { "epoch": 1.9885514227668248, "grad_norm": 1.3782670497894287, "learning_rate": 9.213473485458757e-09, "loss": 0.7283, "step": 311260 }, { "epoch": 1.9886153099165633, "grad_norm": 1.1919926404953003, "learning_rate": 9.117403302705274e-09, "loss": 0.7445, "step": 311270 }, { "epoch": 1.988679197066302, "grad_norm": 1.1234567165374756, "learning_rate": 9.021836571382336e-09, "loss": 0.8275, "step": 311280 }, { "epoch": 1.9887430842160407, "grad_norm": 1.243963599205017, "learning_rate": 8.926773292461389e-09, "loss": 1.1169, "step": 311290 }, { "epoch": 1.9888069713657794, "grad_norm": 2.11574649810791, "learning_rate": 8.83221346689722e-09, "loss": 1.1771, "step": 311300 }, { "epoch": 1.9888708585155181, "grad_norm": 1.3299797773361206, "learning_rate": 8.738157095639076e-09, "loss": 0.8361, "step": 311310 }, { "epoch": 1.9889347456652569, "grad_norm": 0.8851807117462158, "learning_rate": 8.644604179636195e-09, "loss": 0.957, "step": 311320 }, { "epoch": 1.9889986328149956, "grad_norm": 0.9992744326591492, "learning_rate": 8.551554719832267e-09, "loss": 0.9448, "step": 311330 }, { "epoch": 1.9890625199647343, "grad_norm": 0.8495112061500549, "learning_rate": 8.459008717159878e-09, "loss": 0.6763, "step": 311340 }, { "epoch": 1.989126407114473, "grad_norm": 0.5434170365333557, "learning_rate": 8.366966172557167e-09, "loss": 0.6915, "step": 311350 }, { "epoch": 1.9891902942642117, "grad_norm": 0.9140389561653137, "learning_rate": 8.275427086951171e-09, "loss": 0.9399, "step": 311360 }, { "epoch": 1.9892541814139504, "grad_norm": 0.9066022038459778, "learning_rate": 8.184391461252272e-09, "loss": 0.8907, "step": 311370 }, { "epoch": 1.989318068563689, "grad_norm": 2.6549975872039795, "learning_rate": 8.093859296393058e-09, "loss": 1.2238, "step": 311380 }, { "epoch": 1.9893819557134278, "grad_norm": 1.2978613376617432, "learning_rate": 8.00383059327281e-09, "loss": 0.9597, "step": 311390 }, { "epoch": 1.9894458428631665, "grad_norm": 1.0250873565673828, "learning_rate": 7.91430535280746e-09, "loss": 1.1152, "step": 311400 }, { "epoch": 1.9895097300129052, "grad_norm": 0.81373131275177, "learning_rate": 7.82528357589074e-09, "loss": 0.9364, "step": 311410 }, { "epoch": 1.989573617162644, "grad_norm": 0.51475989818573, "learning_rate": 7.736765263427481e-09, "loss": 0.9326, "step": 311420 }, { "epoch": 1.9896375043123826, "grad_norm": 0.9355649948120117, "learning_rate": 7.64875041630031e-09, "loss": 0.9437, "step": 311430 }, { "epoch": 1.9897013914621213, "grad_norm": 1.1884983777999878, "learning_rate": 7.561239035397405e-09, "loss": 0.8401, "step": 311440 }, { "epoch": 1.98976527861186, "grad_norm": 1.307823657989502, "learning_rate": 7.474231121606946e-09, "loss": 1.0791, "step": 311450 }, { "epoch": 1.9898291657615987, "grad_norm": 1.1962790489196777, "learning_rate": 7.387726675800455e-09, "loss": 0.9416, "step": 311460 }, { "epoch": 1.9898930529113374, "grad_norm": 0.8856204748153687, "learning_rate": 7.30172569884946e-09, "loss": 0.8437, "step": 311470 }, { "epoch": 1.9899569400610762, "grad_norm": 1.0025514364242554, "learning_rate": 7.216228191619934e-09, "loss": 0.6585, "step": 311480 }, { "epoch": 1.9900208272108149, "grad_norm": 2.3673927783966064, "learning_rate": 7.1312341549723e-09, "loss": 0.829, "step": 311490 }, { "epoch": 1.9900847143605536, "grad_norm": 0.5192925333976746, "learning_rate": 7.046743589761428e-09, "loss": 0.7879, "step": 311500 }, { "epoch": 1.9901486015102923, "grad_norm": 0.9873639345169067, "learning_rate": 6.9627564968421935e-09, "loss": 0.6812, "step": 311510 }, { "epoch": 1.990212488660031, "grad_norm": 1.7059824466705322, "learning_rate": 6.879272877052811e-09, "loss": 1.0209, "step": 311520 }, { "epoch": 1.9902763758097697, "grad_norm": 0.8215252757072449, "learning_rate": 6.796292731248155e-09, "loss": 0.7219, "step": 311530 }, { "epoch": 1.9903402629595082, "grad_norm": 1.047405481338501, "learning_rate": 6.713816060249789e-09, "loss": 0.8509, "step": 311540 }, { "epoch": 1.990404150109247, "grad_norm": 0.7972722053527832, "learning_rate": 6.631842864890381e-09, "loss": 0.912, "step": 311550 }, { "epoch": 1.9904680372589856, "grad_norm": 0.8139147758483887, "learning_rate": 6.5503731460081485e-09, "loss": 0.6762, "step": 311560 }, { "epoch": 1.9905319244087245, "grad_norm": 1.3652825355529785, "learning_rate": 6.469406904408004e-09, "loss": 0.8775, "step": 311570 }, { "epoch": 1.990595811558463, "grad_norm": 1.0654441118240356, "learning_rate": 6.388944140911512e-09, "loss": 0.826, "step": 311580 }, { "epoch": 1.990659698708202, "grad_norm": 1.323000192642212, "learning_rate": 6.308984856329137e-09, "loss": 1.079, "step": 311590 }, { "epoch": 1.9907235858579404, "grad_norm": 1.7142542600631714, "learning_rate": 6.229529051465788e-09, "loss": 1.0488, "step": 311600 }, { "epoch": 1.9907874730076793, "grad_norm": 2.2610385417938232, "learning_rate": 6.150576727120827e-09, "loss": 0.8548, "step": 311610 }, { "epoch": 1.9908513601574178, "grad_norm": 0.9243570566177368, "learning_rate": 6.0721278840936146e-09, "loss": 0.8044, "step": 311620 }, { "epoch": 1.9909152473071567, "grad_norm": 1.067752718925476, "learning_rate": 5.9941825231724094e-09, "loss": 0.6292, "step": 311630 }, { "epoch": 1.9909791344568952, "grad_norm": 1.1682084798812866, "learning_rate": 5.916740645134367e-09, "loss": 0.9441, "step": 311640 }, { "epoch": 1.9910430216066342, "grad_norm": 0.7192792296409607, "learning_rate": 5.839802250773297e-09, "loss": 0.5446, "step": 311650 }, { "epoch": 1.9911069087563726, "grad_norm": 0.6210076808929443, "learning_rate": 5.763367340849701e-09, "loss": 0.9302, "step": 311660 }, { "epoch": 1.9911707959061116, "grad_norm": 1.1094497442245483, "learning_rate": 5.687435916146288e-09, "loss": 0.9404, "step": 311670 }, { "epoch": 1.99123468305585, "grad_norm": 0.5943570733070374, "learning_rate": 5.61200797741801e-09, "loss": 0.6468, "step": 311680 }, { "epoch": 1.991298570205589, "grad_norm": 1.3287687301635742, "learning_rate": 5.5370835254253685e-09, "loss": 0.9242, "step": 311690 }, { "epoch": 1.9913624573553275, "grad_norm": 1.7294316291809082, "learning_rate": 5.462662560928866e-09, "loss": 0.7075, "step": 311700 }, { "epoch": 1.9914263445050664, "grad_norm": 1.2055655717849731, "learning_rate": 5.3887450846779045e-09, "loss": 0.822, "step": 311710 }, { "epoch": 1.9914902316548049, "grad_norm": 1.283769965171814, "learning_rate": 5.315331097405229e-09, "loss": 1.0232, "step": 311720 }, { "epoch": 1.9915541188045438, "grad_norm": 1.0435206890106201, "learning_rate": 5.242420599865794e-09, "loss": 0.7253, "step": 311730 }, { "epoch": 1.9916180059542823, "grad_norm": 1.496469497680664, "learning_rate": 5.1700135927867935e-09, "loss": 0.9393, "step": 311740 }, { "epoch": 1.9916818931040212, "grad_norm": 1.004408836364746, "learning_rate": 5.0981100768954235e-09, "loss": 0.848, "step": 311750 }, { "epoch": 1.9917457802537597, "grad_norm": 0.7888327836990356, "learning_rate": 5.026710052918882e-09, "loss": 0.7427, "step": 311760 }, { "epoch": 1.9918096674034984, "grad_norm": 1.417661190032959, "learning_rate": 4.955813521573261e-09, "loss": 0.7668, "step": 311770 }, { "epoch": 1.9918735545532371, "grad_norm": 1.0522990226745605, "learning_rate": 4.885420483574654e-09, "loss": 0.8339, "step": 311780 }, { "epoch": 1.9919374417029758, "grad_norm": 1.6315908432006836, "learning_rate": 4.8155309396336056e-09, "loss": 0.8946, "step": 311790 }, { "epoch": 1.9920013288527145, "grad_norm": 0.8391977548599243, "learning_rate": 4.746144890449555e-09, "loss": 0.846, "step": 311800 }, { "epoch": 1.9920652160024532, "grad_norm": 0.7376478314399719, "learning_rate": 4.6772623367274945e-09, "loss": 0.9631, "step": 311810 }, { "epoch": 1.992129103152192, "grad_norm": 0.8697778582572937, "learning_rate": 4.6088832791557626e-09, "loss": 0.9359, "step": 311820 }, { "epoch": 1.9921929903019306, "grad_norm": 1.1047428846359253, "learning_rate": 4.541007718422696e-09, "loss": 0.7677, "step": 311830 }, { "epoch": 1.9922568774516694, "grad_norm": 1.339664101600647, "learning_rate": 4.473635655216635e-09, "loss": 0.7771, "step": 311840 }, { "epoch": 1.992320764601408, "grad_norm": 1.1949299573898315, "learning_rate": 4.4067670902148136e-09, "loss": 0.7404, "step": 311850 }, { "epoch": 1.9923846517511468, "grad_norm": 1.6755359172821045, "learning_rate": 4.340402024083368e-09, "loss": 0.8248, "step": 311860 }, { "epoch": 1.9924485389008855, "grad_norm": 1.1501131057739258, "learning_rate": 4.274540457505083e-09, "loss": 0.7366, "step": 311870 }, { "epoch": 1.9925124260506242, "grad_norm": 1.342016339302063, "learning_rate": 4.20918239112944e-09, "loss": 0.8977, "step": 311880 }, { "epoch": 1.9925763132003629, "grad_norm": 0.888166069984436, "learning_rate": 4.1443278256170226e-09, "loss": 0.9847, "step": 311890 }, { "epoch": 1.9926402003501016, "grad_norm": 1.199773907661438, "learning_rate": 4.079976761628412e-09, "loss": 0.9824, "step": 311900 }, { "epoch": 1.9927040874998403, "grad_norm": 0.8143197894096375, "learning_rate": 4.016129199801988e-09, "loss": 0.6692, "step": 311910 }, { "epoch": 1.992767974649579, "grad_norm": 0.5461716651916504, "learning_rate": 3.952785140792781e-09, "loss": 0.9864, "step": 311920 }, { "epoch": 1.9928318617993177, "grad_norm": 0.9898871779441833, "learning_rate": 3.889944585228067e-09, "loss": 0.8885, "step": 311930 }, { "epoch": 1.9928957489490564, "grad_norm": 1.2186857461929321, "learning_rate": 3.827607533746225e-09, "loss": 0.8809, "step": 311940 }, { "epoch": 1.9929596360987951, "grad_norm": 0.7133896946907043, "learning_rate": 3.765773986968979e-09, "loss": 0.7136, "step": 311950 }, { "epoch": 1.9930235232485338, "grad_norm": 1.3756506443023682, "learning_rate": 3.704443945523606e-09, "loss": 0.7445, "step": 311960 }, { "epoch": 1.9930874103982725, "grad_norm": 0.9272878766059875, "learning_rate": 3.64361741003183e-09, "loss": 0.905, "step": 311970 }, { "epoch": 1.9931512975480112, "grad_norm": 0.8822316527366638, "learning_rate": 3.583294381098723e-09, "loss": 0.6295, "step": 311980 }, { "epoch": 1.99321518469775, "grad_norm": 0.501191258430481, "learning_rate": 3.5234748593349077e-09, "loss": 0.6983, "step": 311990 }, { "epoch": 1.9932790718474886, "grad_norm": 2.474304676055908, "learning_rate": 3.4641588453454553e-09, "loss": 0.7978, "step": 312000 }, { "epoch": 1.9933429589972271, "grad_norm": 0.9736420512199402, "learning_rate": 3.4053463397243357e-09, "loss": 0.905, "step": 312010 }, { "epoch": 1.993406846146966, "grad_norm": 0.9467423558235168, "learning_rate": 3.347037343065518e-09, "loss": 0.8016, "step": 312020 }, { "epoch": 1.9934707332967045, "grad_norm": 0.658368706703186, "learning_rate": 3.2892318559518685e-09, "loss": 0.7383, "step": 312030 }, { "epoch": 1.9935346204464435, "grad_norm": 1.1242454051971436, "learning_rate": 3.2319298789718065e-09, "loss": 0.8804, "step": 312040 }, { "epoch": 1.993598507596182, "grad_norm": 0.6961290836334229, "learning_rate": 3.1751314126970967e-09, "loss": 0.8945, "step": 312050 }, { "epoch": 1.9936623947459209, "grad_norm": 0.6901680827140808, "learning_rate": 3.1188364577050543e-09, "loss": 0.7463, "step": 312060 }, { "epoch": 1.9937262818956594, "grad_norm": 0.796148955821991, "learning_rate": 3.063045014556343e-09, "loss": 1.2063, "step": 312070 }, { "epoch": 1.9937901690453983, "grad_norm": 1.2905848026275635, "learning_rate": 3.0077570838171753e-09, "loss": 0.8915, "step": 312080 }, { "epoch": 1.9938540561951368, "grad_norm": 0.9283998012542725, "learning_rate": 2.952972666042664e-09, "loss": 1.0008, "step": 312090 }, { "epoch": 1.9939179433448757, "grad_norm": 0.957834005355835, "learning_rate": 2.8986917617879195e-09, "loss": 0.9992, "step": 312100 }, { "epoch": 1.9939818304946142, "grad_norm": 0.9345075488090515, "learning_rate": 2.8449143715969517e-09, "loss": 1.1347, "step": 312110 }, { "epoch": 1.9940457176443531, "grad_norm": 0.9846530556678772, "learning_rate": 2.7916404960137697e-09, "loss": 0.6848, "step": 312120 }, { "epoch": 1.9941096047940916, "grad_norm": 0.8634576797485352, "learning_rate": 2.73887013556573e-09, "loss": 1.1417, "step": 312130 }, { "epoch": 1.9941734919438305, "grad_norm": 0.9867701530456543, "learning_rate": 2.6866032907968406e-09, "loss": 0.9245, "step": 312140 }, { "epoch": 1.994237379093569, "grad_norm": 0.9844792485237122, "learning_rate": 2.6348399622233566e-09, "loss": 0.8678, "step": 312150 }, { "epoch": 1.994301266243308, "grad_norm": NaN, "learning_rate": 2.5886834732924416e-09, "loss": 0.9213, "step": 312160 }, { "epoch": 1.9943651533930464, "grad_norm": 2.1825661659240723, "learning_rate": 2.5378768269301856e-09, "loss": 0.9257, "step": 312170 }, { "epoch": 1.9944290405427854, "grad_norm": 0.8919321894645691, "learning_rate": 2.4875736982676868e-09, "loss": 0.874, "step": 312180 }, { "epoch": 1.9944929276925238, "grad_norm": 3.1658194065093994, "learning_rate": 2.437774087810096e-09, "loss": 0.9289, "step": 312190 }, { "epoch": 1.9945568148422628, "grad_norm": 0.993950366973877, "learning_rate": 2.3884779960570147e-09, "loss": 0.9231, "step": 312200 }, { "epoch": 1.9946207019920013, "grad_norm": 0.6404727697372437, "learning_rate": 2.3396854235080425e-09, "loss": 1.0877, "step": 312210 }, { "epoch": 1.9946845891417402, "grad_norm": 1.1700347661972046, "learning_rate": 2.2913963706516772e-09, "loss": 0.972, "step": 312220 }, { "epoch": 1.9947484762914787, "grad_norm": 2.679147481918335, "learning_rate": 2.2436108379764175e-09, "loss": 0.6191, "step": 312230 }, { "epoch": 1.9948123634412176, "grad_norm": 1.6708064079284668, "learning_rate": 2.1963288259596594e-09, "loss": 1.2286, "step": 312240 }, { "epoch": 1.994876250590956, "grad_norm": 0.9976462125778198, "learning_rate": 2.1495503350787983e-09, "loss": 0.686, "step": 312250 }, { "epoch": 1.9949401377406948, "grad_norm": 0.7239921689033508, "learning_rate": 2.1032753658112302e-09, "loss": 0.9027, "step": 312260 }, { "epoch": 1.9950040248904335, "grad_norm": 0.9449382424354553, "learning_rate": 2.057503918612147e-09, "loss": 0.8882, "step": 312270 }, { "epoch": 1.9950679120401722, "grad_norm": 0.9939311146736145, "learning_rate": 2.012235993953393e-09, "loss": 0.9177, "step": 312280 }, { "epoch": 1.995131799189911, "grad_norm": 1.0171376466751099, "learning_rate": 1.9674715922846086e-09, "loss": 0.7605, "step": 312290 }, { "epoch": 1.9951956863396496, "grad_norm": 0.8827845454216003, "learning_rate": 1.9232107140554346e-09, "loss": 0.9845, "step": 312300 }, { "epoch": 1.9952595734893883, "grad_norm": 0.8483384251594543, "learning_rate": 1.8794533597155105e-09, "loss": 0.8771, "step": 312310 }, { "epoch": 1.995323460639127, "grad_norm": 1.1981017589569092, "learning_rate": 1.8361995297033752e-09, "loss": 0.9435, "step": 312320 }, { "epoch": 1.9953873477888657, "grad_norm": 0.6356529593467712, "learning_rate": 1.7934492244575662e-09, "loss": 0.8276, "step": 312330 }, { "epoch": 1.9954512349386044, "grad_norm": 1.1917228698730469, "learning_rate": 1.7512024444055197e-09, "loss": 0.9145, "step": 312340 }, { "epoch": 1.9955151220883431, "grad_norm": 0.7620391845703125, "learning_rate": 1.7094591899691204e-09, "loss": 1.0847, "step": 312350 }, { "epoch": 1.9955790092380818, "grad_norm": 2.1927714347839355, "learning_rate": 1.668219461575804e-09, "loss": 0.5758, "step": 312360 }, { "epoch": 1.9956428963878206, "grad_norm": 0.8946861624717712, "learning_rate": 1.6274832596419043e-09, "loss": 0.8894, "step": 312370 }, { "epoch": 1.9957067835375593, "grad_norm": 0.6378492116928101, "learning_rate": 1.5872505845726525e-09, "loss": 0.8183, "step": 312380 }, { "epoch": 1.995770670687298, "grad_norm": 0.891268253326416, "learning_rate": 1.5475214367732804e-09, "loss": 0.7768, "step": 312390 }, { "epoch": 1.9958345578370367, "grad_norm": 1.2618356943130493, "learning_rate": 1.508295816643468e-09, "loss": 1.0496, "step": 312400 }, { "epoch": 1.9958984449867754, "grad_norm": 0.9742398858070374, "learning_rate": 1.4695737245828955e-09, "loss": 1.0082, "step": 312410 }, { "epoch": 1.995962332136514, "grad_norm": 0.8416606783866882, "learning_rate": 1.43135516097459e-09, "loss": 0.7395, "step": 312420 }, { "epoch": 1.9960262192862528, "grad_norm": 2.4470884799957275, "learning_rate": 1.3936401262126809e-09, "loss": 0.7238, "step": 312430 }, { "epoch": 1.9960901064359915, "grad_norm": 0.7618647813796997, "learning_rate": 1.3564286206690924e-09, "loss": 0.9034, "step": 312440 }, { "epoch": 1.9961539935857302, "grad_norm": 1.1379642486572266, "learning_rate": 1.3197206447213007e-09, "loss": 0.9954, "step": 312450 }, { "epoch": 1.996217880735469, "grad_norm": 1.0726209878921509, "learning_rate": 1.2835161987356792e-09, "loss": 1.0815, "step": 312460 }, { "epoch": 1.9962817678852076, "grad_norm": 1.2896431684494019, "learning_rate": 1.2478152830841528e-09, "loss": 0.8023, "step": 312470 }, { "epoch": 1.9963456550349463, "grad_norm": 1.954415202140808, "learning_rate": 1.2126178981219928e-09, "loss": 1.3741, "step": 312480 }, { "epoch": 1.996409542184685, "grad_norm": 0.8789839744567871, "learning_rate": 1.1779240442044703e-09, "loss": 0.9468, "step": 312490 }, { "epoch": 1.9964734293344235, "grad_norm": 1.0630998611450195, "learning_rate": 1.143733721681306e-09, "loss": 0.8048, "step": 312500 }, { "epoch": 1.9965373164841624, "grad_norm": 1.151191234588623, "learning_rate": 1.1100469308911176e-09, "loss": 0.6205, "step": 312510 }, { "epoch": 1.996601203633901, "grad_norm": 0.8841159343719482, "learning_rate": 1.0768636721836257e-09, "loss": 0.6618, "step": 312520 }, { "epoch": 1.9966650907836399, "grad_norm": 1.1014671325683594, "learning_rate": 1.0441839458807944e-09, "loss": 0.8356, "step": 312530 }, { "epoch": 1.9967289779333783, "grad_norm": 0.8289458155632019, "learning_rate": 1.0120077523212423e-09, "loss": 0.7793, "step": 312540 }, { "epoch": 1.9967928650831173, "grad_norm": 2.142965078353882, "learning_rate": 9.803350918269338e-10, "loss": 0.7983, "step": 312550 }, { "epoch": 1.9968567522328557, "grad_norm": 0.8722677230834961, "learning_rate": 9.491659647198336e-10, "loss": 0.8305, "step": 312560 }, { "epoch": 1.9969206393825947, "grad_norm": 1.0402874946594238, "learning_rate": 9.185003713052531e-10, "loss": 1.0401, "step": 312570 }, { "epoch": 1.9969845265323332, "grad_norm": 1.0141799449920654, "learning_rate": 8.883383118940547e-10, "loss": 0.9602, "step": 312580 }, { "epoch": 1.997048413682072, "grad_norm": 1.4456356763839722, "learning_rate": 8.586797867971008e-10, "loss": 1.01, "step": 312590 }, { "epoch": 1.9971123008318106, "grad_norm": 1.1017581224441528, "learning_rate": 8.295247963086006e-10, "loss": 0.9403, "step": 312600 }, { "epoch": 1.9971761879815495, "grad_norm": 1.3453593254089355, "learning_rate": 8.008733407227631e-10, "loss": 0.7422, "step": 312610 }, { "epoch": 1.997240075131288, "grad_norm": 1.323583722114563, "learning_rate": 7.727254203226952e-10, "loss": 1.041, "step": 312620 }, { "epoch": 1.997303962281027, "grad_norm": 1.0199631452560425, "learning_rate": 7.450810354026061e-10, "loss": 0.9727, "step": 312630 }, { "epoch": 1.9973678494307654, "grad_norm": 1.100943684577942, "learning_rate": 7.179401862289492e-10, "loss": 1.0275, "step": 312640 }, { "epoch": 1.9974317365805043, "grad_norm": 1.1497398614883423, "learning_rate": 6.913028730848315e-10, "loss": 0.8959, "step": 312650 }, { "epoch": 1.9974956237302428, "grad_norm": 1.6988590955734253, "learning_rate": 6.651690962367064e-10, "loss": 0.8686, "step": 312660 }, { "epoch": 1.9975595108799817, "grad_norm": 1.0665082931518555, "learning_rate": 6.395388559454762e-10, "loss": 0.8715, "step": 312670 }, { "epoch": 1.9976233980297202, "grad_norm": 1.0004991292953491, "learning_rate": 6.144121524664925e-10, "loss": 0.6519, "step": 312680 }, { "epoch": 1.9976872851794591, "grad_norm": 0.9688588976860046, "learning_rate": 5.897889860606576e-10, "loss": 0.8501, "step": 312690 }, { "epoch": 1.9977511723291976, "grad_norm": 0.8588424324989319, "learning_rate": 5.656693569666694e-10, "loss": 0.7342, "step": 312700 }, { "epoch": 1.9978150594789366, "grad_norm": 0.8054032325744629, "learning_rate": 5.42053265434328e-10, "loss": 0.7475, "step": 312710 }, { "epoch": 1.997878946628675, "grad_norm": 0.6064567565917969, "learning_rate": 5.189407116967804e-10, "loss": 0.8118, "step": 312720 }, { "epoch": 1.997942833778414, "grad_norm": 1.036947250366211, "learning_rate": 4.963316959927245e-10, "loss": 0.8701, "step": 312730 }, { "epoch": 1.9980067209281525, "grad_norm": 1.018930196762085, "learning_rate": 4.742262185442048e-10, "loss": 0.7014, "step": 312740 }, { "epoch": 1.9980706080778912, "grad_norm": 0.773653507232666, "learning_rate": 4.5262427957881713e-10, "loss": 0.9808, "step": 312750 }, { "epoch": 1.9981344952276299, "grad_norm": 1.2292520999908447, "learning_rate": 4.315258793075039e-10, "loss": 0.7937, "step": 312760 }, { "epoch": 1.9981983823773686, "grad_norm": 1.4209551811218262, "learning_rate": 4.1093101794675847e-10, "loss": 0.7514, "step": 312770 }, { "epoch": 1.9982622695271073, "grad_norm": 0.9260154366493225, "learning_rate": 3.908396957075233e-10, "loss": 0.9435, "step": 312780 }, { "epoch": 1.998326156676846, "grad_norm": 2.0101253986358643, "learning_rate": 3.7125191278408745e-10, "loss": 0.9585, "step": 312790 }, { "epoch": 1.9983900438265847, "grad_norm": 0.8051590323448181, "learning_rate": 3.52167669376291e-10, "loss": 0.9492, "step": 312800 }, { "epoch": 1.9984539309763234, "grad_norm": 0.7497386336326599, "learning_rate": 3.335869656839741e-10, "loss": 0.7522, "step": 312810 }, { "epoch": 1.998517818126062, "grad_norm": 1.0292730331420898, "learning_rate": 3.155098018847724e-10, "loss": 0.866, "step": 312820 }, { "epoch": 1.9985817052758008, "grad_norm": 1.0129666328430176, "learning_rate": 2.979361781674239e-10, "loss": 0.9058, "step": 312830 }, { "epoch": 1.9986455924255395, "grad_norm": 1.1604506969451904, "learning_rate": 2.808660947040131e-10, "loss": 0.8215, "step": 312840 }, { "epoch": 1.9987094795752782, "grad_norm": 1.2080724239349365, "learning_rate": 2.6429955166662466e-10, "loss": 0.8299, "step": 312850 }, { "epoch": 1.998773366725017, "grad_norm": 1.4316158294677734, "learning_rate": 2.4823654922179194e-10, "loss": 0.6791, "step": 312860 }, { "epoch": 1.9988372538747556, "grad_norm": 1.3033369779586792, "learning_rate": 2.3267708753604845e-10, "loss": 0.8213, "step": 312870 }, { "epoch": 1.9989011410244943, "grad_norm": 0.672759473323822, "learning_rate": 2.1762116676482537e-10, "loss": 0.657, "step": 312880 }, { "epoch": 1.998965028174233, "grad_norm": 0.6766613721847534, "learning_rate": 2.0306878705800282e-10, "loss": 0.7861, "step": 312890 }, { "epoch": 1.9990289153239718, "grad_norm": 0.7596450448036194, "learning_rate": 1.890199485599098e-10, "loss": 0.8423, "step": 312900 }, { "epoch": 1.9990928024737105, "grad_norm": 0.7633396983146667, "learning_rate": 1.754746514148753e-10, "loss": 0.583, "step": 312910 }, { "epoch": 1.9991566896234492, "grad_norm": 0.7127503156661987, "learning_rate": 1.6243289576167719e-10, "loss": 0.8471, "step": 312920 }, { "epoch": 1.9992205767731879, "grad_norm": 0.8593207001686096, "learning_rate": 1.4989468172244002e-10, "loss": 0.7892, "step": 312930 }, { "epoch": 1.9992844639229266, "grad_norm": 1.2852826118469238, "learning_rate": 1.3786000943594168e-10, "loss": 1.0036, "step": 312940 }, { "epoch": 1.9993483510726653, "grad_norm": 0.5354711413383484, "learning_rate": 1.2632887901320444e-10, "loss": 0.8968, "step": 312950 }, { "epoch": 1.999412238222404, "grad_norm": 1.204946756362915, "learning_rate": 1.1530129057635286e-10, "loss": 1.0277, "step": 312960 }, { "epoch": 1.9994761253721427, "grad_norm": 0.9072065949440002, "learning_rate": 1.0477724423640923e-10, "loss": 1.0339, "step": 312970 }, { "epoch": 1.9995400125218814, "grad_norm": 1.1976101398468018, "learning_rate": 9.47567400932936e-11, "loss": 0.7641, "step": 312980 }, { "epoch": 1.9996038996716199, "grad_norm": 1.8328163623809814, "learning_rate": 8.523977825247719e-11, "loss": 0.8129, "step": 312990 }, { "epoch": 1.9996677868213588, "grad_norm": 0.9194107055664062, "learning_rate": 7.622635881388007e-11, "loss": 0.7071, "step": 313000 }, { "epoch": 1.9997316739710973, "grad_norm": 1.5391420125961304, "learning_rate": 6.771648186076896e-11, "loss": 0.6495, "step": 313010 }, { "epoch": 1.9997955611208362, "grad_norm": 1.162663221359253, "learning_rate": 5.971014748196169e-11, "loss": 0.8881, "step": 313020 }, { "epoch": 1.9998594482705747, "grad_norm": 2.731156587600708, "learning_rate": 5.220735575517388e-11, "loss": 0.7754, "step": 313030 }, { "epoch": 1.9999233354203136, "grad_norm": 1.8060675859451294, "learning_rate": 4.520810676367227e-11, "loss": 0.9916, "step": 313040 }, { "epoch": 1.9999872225700521, "grad_norm": 1.0147830247879028, "learning_rate": 3.87124005685191e-11, "loss": 0.8296, "step": 313050 } ], "logging_steps": 10, "max_steps": 313052, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1583506541582582e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }